{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 15517, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.444544692917445e-05, "grad_norm": 0.15129432154940006, "learning_rate": 0.0, "loss": 0.0012, "step": 1 }, { "epoch": 0.0001288908938583489, "grad_norm": 0.26991460323889616, "learning_rate": 9.43403983493194e-07, "loss": 0.0003, "step": 2 }, { "epoch": 0.00019333634078752337, "grad_norm": 1.9954004446079063, "learning_rate": 1.4952599368676733e-06, "loss": 0.0091, "step": 3 }, { "epoch": 0.0002577817877166978, "grad_norm": 0.22931631126106178, "learning_rate": 1.886807966986388e-06, "loss": 0.0003, "step": 4 }, { "epoch": 0.00032222723464587225, "grad_norm": 0.25371039245875826, "learning_rate": 2.1905162141015006e-06, "loss": 0.0005, "step": 5 }, { "epoch": 0.00038667268157504674, "grad_norm": 1.1948746521302072, "learning_rate": 2.4386639203608674e-06, "loss": 0.006, "step": 6 }, { "epoch": 0.0004511181285042212, "grad_norm": 0.30320543126617766, "learning_rate": 2.648469816548369e-06, "loss": 0.0027, "step": 7 }, { "epoch": 0.0005155635754333956, "grad_norm": 0.10152956997270147, "learning_rate": 2.8302119504795815e-06, "loss": 0.0009, "step": 8 }, { "epoch": 0.0005800090223625701, "grad_norm": 1.4987747433411953, "learning_rate": 2.9905198737353466e-06, "loss": 0.0129, "step": 9 }, { "epoch": 0.0006444544692917445, "grad_norm": 0.02707402421982954, "learning_rate": 3.1339201975946947e-06, "loss": 0.0001, "step": 10 }, { "epoch": 0.000708899916220919, "grad_norm": 0.018766550431254077, "learning_rate": 3.2636415696447343e-06, "loss": 0.0001, "step": 11 }, { "epoch": 0.0007733453631500935, "grad_norm": 0.20907100771370063, "learning_rate": 3.382067903854061e-06, "loss": 0.0004, "step": 12 }, { "epoch": 0.0008377908100792679, "grad_norm": 0.5817576027518055, "learning_rate": 3.4910095707707385e-06, "loss": 0.0026, "step": 13 }, { "epoch": 0.0009022362570084424, "grad_norm": 0.01711225147567763, "learning_rate": 3.5918738000415628e-06, "loss": 0.0, "step": 14 }, { "epoch": 0.0009666817039376168, "grad_norm": 0.07462783118349445, "learning_rate": 3.6857761509691734e-06, "loss": 0.0003, "step": 15 }, { "epoch": 0.0010311271508667912, "grad_norm": 0.10816803494037058, "learning_rate": 3.773615933972776e-06, "loss": 0.0002, "step": 16 }, { "epoch": 0.0010955725977959657, "grad_norm": 1.9217337579697105, "learning_rate": 3.8561287268159795e-06, "loss": 0.0167, "step": 17 }, { "epoch": 0.0011600180447251401, "grad_norm": 0.1417044693166633, "learning_rate": 3.93392385722854e-06, "loss": 0.0002, "step": 18 }, { "epoch": 0.0012244634916543146, "grad_norm": 0.023784749719864124, "learning_rate": 4.0075117377730165e-06, "loss": 0.0, "step": 19 }, { "epoch": 0.001288908938583489, "grad_norm": 0.07828858355950565, "learning_rate": 4.077324181087888e-06, "loss": 0.0002, "step": 20 }, { "epoch": 0.0013533543855126634, "grad_norm": 0.04722417233368723, "learning_rate": 4.143729753416042e-06, "loss": 0.0002, "step": 21 }, { "epoch": 0.001417799832441838, "grad_norm": 0.08640805563755993, "learning_rate": 4.207045553137928e-06, "loss": 0.0002, "step": 22 }, { "epoch": 0.0014822452793710125, "grad_norm": 1.7851848307852938, "learning_rate": 4.267546368922451e-06, "loss": 0.012, "step": 23 }, { "epoch": 0.001546690726300187, "grad_norm": 0.18091976525222841, "learning_rate": 4.325471887347255e-06, "loss": 0.0004, "step": 24 }, { "epoch": 0.0016111361732293614, "grad_norm": 0.03394466558933047, "learning_rate": 4.381032428203001e-06, "loss": 0.0004, "step": 25 }, { "epoch": 0.0016755816201585358, "grad_norm": 0.041340723123166515, "learning_rate": 4.434413554263933e-06, "loss": 0.0002, "step": 26 }, { "epoch": 0.0017400270670877103, "grad_norm": 0.20461789214303566, "learning_rate": 4.485779810603019e-06, "loss": 0.0022, "step": 27 }, { "epoch": 0.0018044725140168847, "grad_norm": 0.12106531716915613, "learning_rate": 4.5352777835347565e-06, "loss": 0.0002, "step": 28 }, { "epoch": 0.0018689179609460592, "grad_norm": 0.03492363492746817, "learning_rate": 4.583038622537583e-06, "loss": 0.0001, "step": 29 }, { "epoch": 0.0019333634078752336, "grad_norm": 0.03259851462272291, "learning_rate": 4.629180134462368e-06, "loss": 0.0001, "step": 30 }, { "epoch": 0.001997808854804408, "grad_norm": 0.03013889637859078, "learning_rate": 4.673808534226263e-06, "loss": 0.0002, "step": 31 }, { "epoch": 0.0020622543017335825, "grad_norm": 0.027037052171979114, "learning_rate": 4.717019917465971e-06, "loss": 0.0003, "step": 32 }, { "epoch": 0.002126699748662757, "grad_norm": 0.005106793730490385, "learning_rate": 4.758901506512407e-06, "loss": 0.0, "step": 33 }, { "epoch": 0.0021911451955919314, "grad_norm": 0.3133135457884165, "learning_rate": 4.799532710309174e-06, "loss": 0.0008, "step": 34 }, { "epoch": 0.002255590642521106, "grad_norm": 0.06576723596208979, "learning_rate": 4.838986030649869e-06, "loss": 0.0004, "step": 35 }, { "epoch": 0.0023200360894502802, "grad_norm": 0.44200181870496286, "learning_rate": 4.877327840721735e-06, "loss": 0.0026, "step": 36 }, { "epoch": 0.0023844815363794547, "grad_norm": 0.005113691502045184, "learning_rate": 4.914619056956378e-06, "loss": 0.0001, "step": 37 }, { "epoch": 0.002448926983308629, "grad_norm": 0.011710030818769188, "learning_rate": 4.9509157212662115e-06, "loss": 0.0001, "step": 38 }, { "epoch": 0.0025133724302378035, "grad_norm": 0.10730184004136714, "learning_rate": 4.986269507638412e-06, "loss": 0.0013, "step": 39 }, { "epoch": 0.002577817877166978, "grad_norm": 0.06514135570120874, "learning_rate": 5.0207281645810825e-06, "loss": 0.0002, "step": 40 }, { "epoch": 0.0026422633240961524, "grad_norm": 0.00949610941862856, "learning_rate": 5.054335902928648e-06, "loss": 0.0001, "step": 41 }, { "epoch": 0.002706708771025327, "grad_norm": 0.011550266470039141, "learning_rate": 5.087133736909237e-06, "loss": 0.0001, "step": 42 }, { "epoch": 0.0027711542179545013, "grad_norm": 0.007752723479024223, "learning_rate": 5.119159785074679e-06, "loss": 0.0, "step": 43 }, { "epoch": 0.002835599664883676, "grad_norm": 0.5378045317232993, "learning_rate": 5.1504495366311225e-06, "loss": 0.0032, "step": 44 }, { "epoch": 0.0029000451118128506, "grad_norm": 0.2128986742090359, "learning_rate": 5.181036087836847e-06, "loss": 0.0006, "step": 45 }, { "epoch": 0.002964490558742025, "grad_norm": 0.000640446091847838, "learning_rate": 5.210950352415644e-06, "loss": 0.0, "step": 46 }, { "epoch": 0.0030289360056711995, "grad_norm": 0.030881680917973543, "learning_rate": 5.2402212493395684e-06, "loss": 0.0001, "step": 47 }, { "epoch": 0.003093381452600374, "grad_norm": 0.05658321101545766, "learning_rate": 5.268875870840449e-06, "loss": 0.0001, "step": 48 }, { "epoch": 0.0031578268995295484, "grad_norm": 0.5703326443811637, "learning_rate": 5.296939633096738e-06, "loss": 0.0052, "step": 49 }, { "epoch": 0.003222272346458723, "grad_norm": 2.8203730272893908, "learning_rate": 5.324436411696195e-06, "loss": 0.0161, "step": 50 }, { "epoch": 0.0032867177933878972, "grad_norm": 0.001724963280434341, "learning_rate": 5.351388663683652e-06, "loss": 0.0, "step": 51 }, { "epoch": 0.0033511632403170717, "grad_norm": 0.01416510359891742, "learning_rate": 5.377817537757127e-06, "loss": 0.0001, "step": 52 }, { "epoch": 0.003415608687246246, "grad_norm": 0.0049279435077748575, "learning_rate": 5.403742973967069e-06, "loss": 0.0, "step": 53 }, { "epoch": 0.0034800541341754206, "grad_norm": 0.7568150671265557, "learning_rate": 5.4291837940962135e-06, "loss": 0.0023, "step": 54 }, { "epoch": 0.003544499581104595, "grad_norm": 0.18716012429739934, "learning_rate": 5.454157783746235e-06, "loss": 0.0006, "step": 55 }, { "epoch": 0.0036089450280337694, "grad_norm": 0.03473747532236853, "learning_rate": 5.4786817670279514e-06, "loss": 0.0001, "step": 56 }, { "epoch": 0.003673390474962944, "grad_norm": 0.08810403927552686, "learning_rate": 5.50277167464069e-06, "loss": 0.0003, "step": 57 }, { "epoch": 0.0037378359218921183, "grad_norm": 0.017372241080586315, "learning_rate": 5.526442606030776e-06, "loss": 0.0001, "step": 58 }, { "epoch": 0.0038022813688212928, "grad_norm": 0.21547829813614516, "learning_rate": 5.549708886236511e-06, "loss": 0.0017, "step": 59 }, { "epoch": 0.003866726815750467, "grad_norm": 0.5537295329959373, "learning_rate": 5.572584117955562e-06, "loss": 0.002, "step": 60 }, { "epoch": 0.003931172262679642, "grad_norm": 0.14335554933770492, "learning_rate": 5.5950812293086465e-06, "loss": 0.0004, "step": 61 }, { "epoch": 0.003995617709608816, "grad_norm": 0.011859054511193099, "learning_rate": 5.617212517719457e-06, "loss": 0.0001, "step": 62 }, { "epoch": 0.004060063156537991, "grad_norm": 0.004499399040304683, "learning_rate": 5.638989690283715e-06, "loss": 0.0, "step": 63 }, { "epoch": 0.004124508603467165, "grad_norm": 0.9716421455530285, "learning_rate": 5.660423900959163e-06, "loss": 0.0033, "step": 64 }, { "epoch": 0.00418895405039634, "grad_norm": 0.2602800996635666, "learning_rate": 5.681525784872238e-06, "loss": 0.004, "step": 65 }, { "epoch": 0.004253399497325514, "grad_norm": 0.505029623796257, "learning_rate": 5.7023054900056e-06, "loss": 0.0048, "step": 66 }, { "epoch": 0.004317844944254689, "grad_norm": 0.06494347848669335, "learning_rate": 5.722772706502867e-06, "loss": 0.0002, "step": 67 }, { "epoch": 0.004382290391183863, "grad_norm": 0.05793100298650254, "learning_rate": 5.742936693802368e-06, "loss": 0.0004, "step": 68 }, { "epoch": 0.004446735838113038, "grad_norm": 0.3872734676672814, "learning_rate": 5.762806305790124e-06, "loss": 0.0031, "step": 69 }, { "epoch": 0.004511181285042212, "grad_norm": 0.032758724262618666, "learning_rate": 5.782390014143064e-06, "loss": 0.0001, "step": 70 }, { "epoch": 0.0045756267319713865, "grad_norm": 0.510062996956511, "learning_rate": 5.801695930016513e-06, "loss": 0.0053, "step": 71 }, { "epoch": 0.0046400721789005605, "grad_norm": 0.0020728367399568642, "learning_rate": 5.820731824214928e-06, "loss": 0.0, "step": 72 }, { "epoch": 0.004704517625829735, "grad_norm": 0.022118583202706633, "learning_rate": 5.8395051459714105e-06, "loss": 0.0001, "step": 73 }, { "epoch": 0.004768963072758909, "grad_norm": 0.023627041277547944, "learning_rate": 5.858023040449572e-06, "loss": 0.0, "step": 74 }, { "epoch": 0.004833408519688084, "grad_norm": 0.046421888470687886, "learning_rate": 5.876292365070674e-06, "loss": 0.0004, "step": 75 }, { "epoch": 0.004897853966617258, "grad_norm": 0.02261795227128185, "learning_rate": 5.894319704759405e-06, "loss": 0.0, "step": 76 }, { "epoch": 0.004962299413546433, "grad_norm": 0.035546317724952904, "learning_rate": 5.912111386193103e-06, "loss": 0.0001, "step": 77 }, { "epoch": 0.005026744860475607, "grad_norm": 0.01905267113714707, "learning_rate": 5.929673491131606e-06, "loss": 0.0, "step": 78 }, { "epoch": 0.005091190307404782, "grad_norm": 0.015782799957426378, "learning_rate": 5.9470118688979865e-06, "loss": 0.0001, "step": 79 }, { "epoch": 0.005155635754333956, "grad_norm": 0.10050158065115125, "learning_rate": 5.964132148074277e-06, "loss": 0.0003, "step": 80 }, { "epoch": 0.005220081201263131, "grad_norm": 0.02889224712849086, "learning_rate": 5.981039747470693e-06, "loss": 0.0002, "step": 81 }, { "epoch": 0.005284526648192305, "grad_norm": 0.04525759150992565, "learning_rate": 5.997739886421841e-06, "loss": 0.0003, "step": 82 }, { "epoch": 0.00534897209512148, "grad_norm": 0.10955070109304078, "learning_rate": 6.014237594458876e-06, "loss": 0.0002, "step": 83 }, { "epoch": 0.005413417542050654, "grad_norm": 0.005650621324755179, "learning_rate": 6.030537720402429e-06, "loss": 0.0, "step": 84 }, { "epoch": 0.005477862988979829, "grad_norm": 0.22230525543403104, "learning_rate": 6.04664494091748e-06, "loss": 0.001, "step": 85 }, { "epoch": 0.005542308435909003, "grad_norm": 0.1757163404784244, "learning_rate": 6.062563768567872e-06, "loss": 0.0021, "step": 86 }, { "epoch": 0.0056067538828381775, "grad_norm": 0.10250988499729953, "learning_rate": 6.0782985594052556e-06, "loss": 0.0003, "step": 87 }, { "epoch": 0.005671199329767352, "grad_norm": 0.07404739274091283, "learning_rate": 6.093853520124317e-06, "loss": 0.0006, "step": 88 }, { "epoch": 0.005735644776696526, "grad_norm": 0.01653300445057603, "learning_rate": 6.109232714813748e-06, "loss": 0.0, "step": 89 }, { "epoch": 0.005800090223625701, "grad_norm": 0.009526052586484979, "learning_rate": 6.124440071330041e-06, "loss": 0.0001, "step": 90 }, { "epoch": 0.005864535670554875, "grad_norm": 0.010664951094544989, "learning_rate": 6.139479387319107e-06, "loss": 0.0, "step": 91 }, { "epoch": 0.00592898111748405, "grad_norm": 0.020226888276105217, "learning_rate": 6.154354335908838e-06, "loss": 0.0001, "step": 92 }, { "epoch": 0.005993426564413224, "grad_norm": 0.4540252129143341, "learning_rate": 6.169068471093936e-06, "loss": 0.0036, "step": 93 }, { "epoch": 0.006057872011342399, "grad_norm": 0.0843852312896068, "learning_rate": 6.183625232832763e-06, "loss": 0.0008, "step": 94 }, { "epoch": 0.006122317458271573, "grad_norm": 0.7314261216216805, "learning_rate": 6.1980279518745175e-06, "loss": 0.0058, "step": 95 }, { "epoch": 0.006186762905200748, "grad_norm": 0.0012243406202469007, "learning_rate": 6.212279854333643e-06, "loss": 0.0, "step": 96 }, { "epoch": 0.006251208352129922, "grad_norm": 0.028267818956160366, "learning_rate": 6.226384066027224e-06, "loss": 0.0001, "step": 97 }, { "epoch": 0.006315653799059097, "grad_norm": 0.018740492262862375, "learning_rate": 6.240343616589932e-06, "loss": 0.0001, "step": 98 }, { "epoch": 0.006380099245988271, "grad_norm": 0.051675413777484226, "learning_rate": 6.25416144338008e-06, "loss": 0.0001, "step": 99 }, { "epoch": 0.006444544692917446, "grad_norm": 0.3241670981018575, "learning_rate": 6.267840395189389e-06, "loss": 0.0019, "step": 100 }, { "epoch": 0.00650899013984662, "grad_norm": 4.658525973154353, "learning_rate": 6.281383235768169e-06, "loss": 0.0324, "step": 101 }, { "epoch": 0.0065734355867757945, "grad_norm": 0.13965773384789484, "learning_rate": 6.2947926471768465e-06, "loss": 0.0004, "step": 102 }, { "epoch": 0.0066378810337049685, "grad_norm": 0.027297791319953163, "learning_rate": 6.30807123297399e-06, "loss": 0.0001, "step": 103 }, { "epoch": 0.006702326480634143, "grad_norm": 0.014509134729985261, "learning_rate": 6.3212215212503204e-06, "loss": 0.0, "step": 104 }, { "epoch": 0.006766771927563317, "grad_norm": 0.011064436533537543, "learning_rate": 6.334245967517543e-06, "loss": 0.0, "step": 105 }, { "epoch": 0.006831217374492492, "grad_norm": 0.33541193592494806, "learning_rate": 6.347146957460263e-06, "loss": 0.0002, "step": 106 }, { "epoch": 0.006895662821421666, "grad_norm": 0.13366590525564065, "learning_rate": 6.3599268095586996e-06, "loss": 0.0002, "step": 107 }, { "epoch": 0.006960108268350841, "grad_norm": 0.3804038820837792, "learning_rate": 6.372587777589408e-06, "loss": 0.0033, "step": 108 }, { "epoch": 0.007024553715280015, "grad_norm": 0.004748762629093711, "learning_rate": 6.385132053010746e-06, "loss": 0.0, "step": 109 }, { "epoch": 0.00708899916220919, "grad_norm": 0.18445047393599892, "learning_rate": 6.397561767239429e-06, "loss": 0.002, "step": 110 }, { "epoch": 0.007153444609138364, "grad_norm": 0.10146856090062799, "learning_rate": 6.40987899382405e-06, "loss": 0.0003, "step": 111 }, { "epoch": 0.007217890056067539, "grad_norm": 0.011157311312939988, "learning_rate": 6.422085750521144e-06, "loss": 0.0, "step": 112 }, { "epoch": 0.007282335502996713, "grad_norm": 0.607664348129492, "learning_rate": 6.434184001278967e-06, "loss": 0.0028, "step": 113 }, { "epoch": 0.007346780949925888, "grad_norm": 1.1590725239402295, "learning_rate": 6.446175658133884e-06, "loss": 0.0069, "step": 114 }, { "epoch": 0.007411226396855062, "grad_norm": 0.0064697996621230795, "learning_rate": 6.458062583023952e-06, "loss": 0.0, "step": 115 }, { "epoch": 0.007475671843784237, "grad_norm": 0.09498369574237263, "learning_rate": 6.46984658952397e-06, "loss": 0.0002, "step": 116 }, { "epoch": 0.0075401172907134115, "grad_norm": 0.0215383638363165, "learning_rate": 6.481529444506085e-06, "loss": 0.0001, "step": 117 }, { "epoch": 0.0076045627376425855, "grad_norm": 0.1171485120767354, "learning_rate": 6.493112869729705e-06, "loss": 0.0002, "step": 118 }, { "epoch": 0.00766900818457176, "grad_norm": 0.06883933984294645, "learning_rate": 6.5045985433643485e-06, "loss": 0.0003, "step": 119 }, { "epoch": 0.007733453631500934, "grad_norm": 0.015494437689087105, "learning_rate": 6.515988101448755e-06, "loss": 0.0, "step": 120 }, { "epoch": 0.007797899078430109, "grad_norm": 1.084890231104155, "learning_rate": 6.5272831392894685e-06, "loss": 0.0045, "step": 121 }, { "epoch": 0.007862344525359284, "grad_norm": 0.0041539375579682835, "learning_rate": 6.538485212801841e-06, "loss": 0.0, "step": 122 }, { "epoch": 0.007926789972288458, "grad_norm": 0.021719405589740653, "learning_rate": 6.54959583979632e-06, "loss": 0.0001, "step": 123 }, { "epoch": 0.007991235419217632, "grad_norm": 0.207106756440556, "learning_rate": 6.560616501212651e-06, "loss": 0.0015, "step": 124 }, { "epoch": 0.008055680866146806, "grad_norm": 0.12389089981738528, "learning_rate": 6.571548642304503e-06, "loss": 0.0015, "step": 125 }, { "epoch": 0.008120126313075982, "grad_norm": 0.08421375587621023, "learning_rate": 6.582393673776909e-06, "loss": 0.0017, "step": 126 }, { "epoch": 0.008184571760005156, "grad_norm": 0.7895623314177207, "learning_rate": 6.593152972878746e-06, "loss": 0.0034, "step": 127 }, { "epoch": 0.00824901720693433, "grad_norm": 0.009141865289784029, "learning_rate": 6.603827884452357e-06, "loss": 0.0001, "step": 128 }, { "epoch": 0.008313462653863504, "grad_norm": 0.06799812572737098, "learning_rate": 6.614419721942351e-06, "loss": 0.0002, "step": 129 }, { "epoch": 0.00837790810079268, "grad_norm": 0.010198638620555195, "learning_rate": 6.624929768365432e-06, "loss": 0.0001, "step": 130 }, { "epoch": 0.008442353547721854, "grad_norm": 0.14960549750420293, "learning_rate": 6.635359277243088e-06, "loss": 0.0003, "step": 131 }, { "epoch": 0.008506798994651028, "grad_norm": 0.03669934654162836, "learning_rate": 6.6457094734987945e-06, "loss": 0.0001, "step": 132 }, { "epoch": 0.008571244441580202, "grad_norm": 0.0583445395178466, "learning_rate": 6.6559815543213856e-06, "loss": 0.0002, "step": 133 }, { "epoch": 0.008635689888509377, "grad_norm": 0.21443872420801796, "learning_rate": 6.666176689996061e-06, "loss": 0.002, "step": 134 }, { "epoch": 0.008700135335438551, "grad_norm": 0.5406641455353228, "learning_rate": 6.67629602470452e-06, "loss": 0.0055, "step": 135 }, { "epoch": 0.008764580782367725, "grad_norm": 0.0021834450013692394, "learning_rate": 6.686340677295562e-06, "loss": 0.0, "step": 136 }, { "epoch": 0.0088290262292969, "grad_norm": 0.008434527707541637, "learning_rate": 6.696311742027455e-06, "loss": 0.0, "step": 137 }, { "epoch": 0.008893471676226075, "grad_norm": 0.04693062531604418, "learning_rate": 6.706210289283318e-06, "loss": 0.0002, "step": 138 }, { "epoch": 0.00895791712315525, "grad_norm": 0.05186746336439399, "learning_rate": 6.7160373662606684e-06, "loss": 0.0002, "step": 139 }, { "epoch": 0.009022362570084423, "grad_norm": 0.015129704926972309, "learning_rate": 6.7257939976362575e-06, "loss": 0.0, "step": 140 }, { "epoch": 0.009086808017013597, "grad_norm": 0.021042850048326282, "learning_rate": 6.735481186207243e-06, "loss": 0.0001, "step": 141 }, { "epoch": 0.009151253463942773, "grad_norm": 0.02537888704924219, "learning_rate": 6.745099913509707e-06, "loss": 0.0001, "step": 142 }, { "epoch": 0.009215698910871947, "grad_norm": 0.02671439549274601, "learning_rate": 6.754651140415472e-06, "loss": 0.0, "step": 143 }, { "epoch": 0.009280144357801121, "grad_norm": 0.008137524957251669, "learning_rate": 6.764135807708122e-06, "loss": 0.0, "step": 144 }, { "epoch": 0.009344589804730297, "grad_norm": 0.003042221966208203, "learning_rate": 6.773554836639082e-06, "loss": 0.0, "step": 145 }, { "epoch": 0.00940903525165947, "grad_norm": 0.06033152692120294, "learning_rate": 6.782909129464605e-06, "loss": 0.0002, "step": 146 }, { "epoch": 0.009473480698588645, "grad_norm": 0.027788121319448158, "learning_rate": 6.792199569964411e-06, "loss": 0.0, "step": 147 }, { "epoch": 0.009537926145517819, "grad_norm": 0.09830052518820728, "learning_rate": 6.801427023942765e-06, "loss": 0.0009, "step": 148 }, { "epoch": 0.009602371592446994, "grad_norm": 0.013099850923853006, "learning_rate": 6.810592339712671e-06, "loss": 0.0001, "step": 149 }, { "epoch": 0.009666817039376168, "grad_norm": 0.3610734402843211, "learning_rate": 6.819696348563868e-06, "loss": 0.005, "step": 150 }, { "epoch": 0.009731262486305342, "grad_norm": 0.22420221826039138, "learning_rate": 6.828739865215293e-06, "loss": 0.0025, "step": 151 }, { "epoch": 0.009795707933234516, "grad_norm": 0.026359216933519877, "learning_rate": 6.837723688252599e-06, "loss": 0.0001, "step": 152 }, { "epoch": 0.009860153380163692, "grad_norm": 0.37353960421039445, "learning_rate": 6.846648600551325e-06, "loss": 0.0013, "step": 153 }, { "epoch": 0.009924598827092866, "grad_norm": 0.37925933825010816, "learning_rate": 6.8555153696862974e-06, "loss": 0.0014, "step": 154 }, { "epoch": 0.00998904427402204, "grad_norm": 0.015689512713385756, "learning_rate": 6.864324748327763e-06, "loss": 0.0001, "step": 155 }, { "epoch": 0.010053489720951214, "grad_norm": 0.42026716718154256, "learning_rate": 6.8730774746248e-06, "loss": 0.0027, "step": 156 }, { "epoch": 0.01011793516788039, "grad_norm": 0.03761871169419944, "learning_rate": 6.881774272576467e-06, "loss": 0.0001, "step": 157 }, { "epoch": 0.010182380614809564, "grad_norm": 0.030528250735410357, "learning_rate": 6.89041585239118e-06, "loss": 0.0002, "step": 158 }, { "epoch": 0.010246826061738738, "grad_norm": 0.20077781600093578, "learning_rate": 6.899002910834743e-06, "loss": 0.0009, "step": 159 }, { "epoch": 0.010311271508667912, "grad_norm": 1.613127029104335, "learning_rate": 6.907536131567471e-06, "loss": 0.0053, "step": 160 }, { "epoch": 0.010375716955597088, "grad_norm": 0.39481368200606176, "learning_rate": 6.91601618547082e-06, "loss": 0.0103, "step": 161 }, { "epoch": 0.010440162402526262, "grad_norm": 0.03645218519897308, "learning_rate": 6.9244437309638855e-06, "loss": 0.0001, "step": 162 }, { "epoch": 0.010504607849455436, "grad_norm": 0.026445971997361595, "learning_rate": 6.932819414310185e-06, "loss": 0.0001, "step": 163 }, { "epoch": 0.01056905329638461, "grad_norm": 0.012293269592344393, "learning_rate": 6.941143869915035e-06, "loss": 0.0, "step": 164 }, { "epoch": 0.010633498743313785, "grad_norm": 0.006382313439032722, "learning_rate": 6.949417720613908e-06, "loss": 0.0, "step": 165 }, { "epoch": 0.01069794419024296, "grad_norm": 0.18303938626882663, "learning_rate": 6.95764157795207e-06, "loss": 0.0007, "step": 166 }, { "epoch": 0.010762389637172133, "grad_norm": 0.08027807718869757, "learning_rate": 6.965816042455817e-06, "loss": 0.0002, "step": 167 }, { "epoch": 0.010826835084101307, "grad_norm": 0.009543528622426802, "learning_rate": 6.9739417038956234e-06, "loss": 0.0, "step": 168 }, { "epoch": 0.010891280531030483, "grad_norm": 0.005663156424081436, "learning_rate": 6.982019141541477e-06, "loss": 0.0, "step": 169 }, { "epoch": 0.010955725977959657, "grad_norm": 0.06478321556126382, "learning_rate": 6.990048924410674e-06, "loss": 0.0001, "step": 170 }, { "epoch": 0.011020171424888831, "grad_norm": 0.11265850722264495, "learning_rate": 6.998031611508364e-06, "loss": 0.0002, "step": 171 }, { "epoch": 0.011084616871818005, "grad_norm": 0.13747799410071634, "learning_rate": 7.005967752061066e-06, "loss": 0.0017, "step": 172 }, { "epoch": 0.011149062318747181, "grad_norm": 0.8610413781168746, "learning_rate": 7.013857885743432e-06, "loss": 0.0035, "step": 173 }, { "epoch": 0.011213507765676355, "grad_norm": 2.1479235225041444, "learning_rate": 7.02170254289845e-06, "loss": 0.0081, "step": 174 }, { "epoch": 0.011277953212605529, "grad_norm": 0.19717496733882242, "learning_rate": 7.02950224475137e-06, "loss": 0.0009, "step": 175 }, { "epoch": 0.011342398659534705, "grad_norm": 0.12631252498003578, "learning_rate": 7.037257503617509e-06, "loss": 0.0007, "step": 176 }, { "epoch": 0.011406844106463879, "grad_norm": 0.002589981572960044, "learning_rate": 7.044968823104184e-06, "loss": 0.0, "step": 177 }, { "epoch": 0.011471289553393053, "grad_norm": 0.048464294586520046, "learning_rate": 7.052636698306942e-06, "loss": 0.0001, "step": 178 }, { "epoch": 0.011535735000322227, "grad_norm": 0.008706107825396808, "learning_rate": 7.060261616000313e-06, "loss": 0.0, "step": 179 }, { "epoch": 0.011600180447251402, "grad_norm": 0.016572784451954176, "learning_rate": 7.067844054823235e-06, "loss": 0.0002, "step": 180 }, { "epoch": 0.011664625894180576, "grad_norm": 0.011129301311151215, "learning_rate": 7.075384485459343e-06, "loss": 0.0001, "step": 181 }, { "epoch": 0.01172907134110975, "grad_norm": 0.039764761465697955, "learning_rate": 7.082883370812301e-06, "loss": 0.0001, "step": 182 }, { "epoch": 0.011793516788038924, "grad_norm": 0.024318506875899433, "learning_rate": 7.09034116617632e-06, "loss": 0.0001, "step": 183 }, { "epoch": 0.0118579622349681, "grad_norm": 2.2273904967445186, "learning_rate": 7.0977583194020325e-06, "loss": 0.012, "step": 184 }, { "epoch": 0.011922407681897274, "grad_norm": 0.006233002881990179, "learning_rate": 7.105135271057878e-06, "loss": 0.0, "step": 185 }, { "epoch": 0.011986853128826448, "grad_norm": 0.09233918702362297, "learning_rate": 7.1124724545871305e-06, "loss": 0.0004, "step": 186 }, { "epoch": 0.012051298575755622, "grad_norm": 0.006889842787298746, "learning_rate": 7.119770296460714e-06, "loss": 0.0, "step": 187 }, { "epoch": 0.012115744022684798, "grad_norm": 0.03291308421119183, "learning_rate": 7.127029216325958e-06, "loss": 0.0002, "step": 188 }, { "epoch": 0.012180189469613972, "grad_norm": 0.4183982353790482, "learning_rate": 7.1342496271513885e-06, "loss": 0.0006, "step": 189 }, { "epoch": 0.012244634916543146, "grad_norm": 0.0010894861958931816, "learning_rate": 7.141431935367712e-06, "loss": 0.0, "step": 190 }, { "epoch": 0.01230908036347232, "grad_norm": 0.1424472482504031, "learning_rate": 7.14857654100509e-06, "loss": 0.0005, "step": 191 }, { "epoch": 0.012373525810401496, "grad_norm": 0.0006208977366605882, "learning_rate": 7.155683837826838e-06, "loss": 0.0, "step": 192 }, { "epoch": 0.01243797125733067, "grad_norm": 0.006579198044893609, "learning_rate": 7.162754213459641e-06, "loss": 0.0001, "step": 193 }, { "epoch": 0.012502416704259844, "grad_norm": 0.0037932328500453966, "learning_rate": 7.1697880495204184e-06, "loss": 0.0, "step": 194 }, { "epoch": 0.012566862151189018, "grad_norm": 3.7558033860150153, "learning_rate": 7.176785721739913e-06, "loss": 0.0326, "step": 195 }, { "epoch": 0.012631307598118193, "grad_norm": 0.048965416061562524, "learning_rate": 7.1837476000831255e-06, "loss": 0.0001, "step": 196 }, { "epoch": 0.012695753045047367, "grad_norm": 0.004427047393266682, "learning_rate": 7.190674048866693e-06, "loss": 0.0, "step": 197 }, { "epoch": 0.012760198491976541, "grad_norm": 0.8024053711522192, "learning_rate": 7.197565426873274e-06, "loss": 0.0031, "step": 198 }, { "epoch": 0.012824643938905715, "grad_norm": 0.16278141967045448, "learning_rate": 7.20442208746308e-06, "loss": 0.0005, "step": 199 }, { "epoch": 0.012889089385834891, "grad_norm": 0.009971334443131889, "learning_rate": 7.211244378682583e-06, "loss": 0.0001, "step": 200 }, { "epoch": 0.012953534832764065, "grad_norm": 0.0020690471456571028, "learning_rate": 7.218032643370541e-06, "loss": 0.0, "step": 201 }, { "epoch": 0.01301798027969324, "grad_norm": 0.03277078691989995, "learning_rate": 7.224787219261363e-06, "loss": 0.0001, "step": 202 }, { "epoch": 0.013082425726622415, "grad_norm": 0.01937230899628366, "learning_rate": 7.231508439085952e-06, "loss": 0.0001, "step": 203 }, { "epoch": 0.013146871173551589, "grad_norm": 0.05567845986542249, "learning_rate": 7.238196630670041e-06, "loss": 0.0005, "step": 204 }, { "epoch": 0.013211316620480763, "grad_norm": 0.004371745422922564, "learning_rate": 7.244852117030148e-06, "loss": 0.0, "step": 205 }, { "epoch": 0.013275762067409937, "grad_norm": 0.0070734951743021, "learning_rate": 7.251475216467184e-06, "loss": 0.0001, "step": 206 }, { "epoch": 0.013340207514339113, "grad_norm": 0.02153720307664906, "learning_rate": 7.258066242657797e-06, "loss": 0.0001, "step": 207 }, { "epoch": 0.013404652961268287, "grad_norm": 0.12040073376471393, "learning_rate": 7.2646255047435146e-06, "loss": 0.0004, "step": 208 }, { "epoch": 0.01346909840819746, "grad_norm": 0.0004398452218530347, "learning_rate": 7.271153307417751e-06, "loss": 0.0, "step": 209 }, { "epoch": 0.013533543855126635, "grad_norm": 0.04042410483723827, "learning_rate": 7.277649951010737e-06, "loss": 0.0003, "step": 210 }, { "epoch": 0.01359798930205581, "grad_norm": 0.06771473747313296, "learning_rate": 7.284115731572427e-06, "loss": 0.0001, "step": 211 }, { "epoch": 0.013662434748984984, "grad_norm": 0.01151591523695472, "learning_rate": 7.290550940953457e-06, "loss": 0.0, "step": 212 }, { "epoch": 0.013726880195914158, "grad_norm": 0.4189345173901024, "learning_rate": 7.296955866884187e-06, "loss": 0.003, "step": 213 }, { "epoch": 0.013791325642843332, "grad_norm": 0.04164191329700527, "learning_rate": 7.303330793051894e-06, "loss": 0.0001, "step": 214 }, { "epoch": 0.013855771089772508, "grad_norm": 0.45272719251647475, "learning_rate": 7.309675999176179e-06, "loss": 0.0042, "step": 215 }, { "epoch": 0.013920216536701682, "grad_norm": 0.10562175672393527, "learning_rate": 7.315991761082603e-06, "loss": 0.0001, "step": 216 }, { "epoch": 0.013984661983630856, "grad_norm": 0.004886530786615905, "learning_rate": 7.322278350774632e-06, "loss": 0.0, "step": 217 }, { "epoch": 0.01404910743056003, "grad_norm": 0.0305665453017975, "learning_rate": 7.328536036503941e-06, "loss": 0.0001, "step": 218 }, { "epoch": 0.014113552877489206, "grad_norm": 0.03056080236649342, "learning_rate": 7.334765082839083e-06, "loss": 0.0002, "step": 219 }, { "epoch": 0.01417799832441838, "grad_norm": 0.01914917310382882, "learning_rate": 7.3409657507326235e-06, "loss": 0.0001, "step": 220 }, { "epoch": 0.014242443771347554, "grad_norm": 0.03466315135259134, "learning_rate": 7.347138297586717e-06, "loss": 0.0, "step": 221 }, { "epoch": 0.014306889218276728, "grad_norm": 0.005841761742250744, "learning_rate": 7.353282977317245e-06, "loss": 0.0001, "step": 222 }, { "epoch": 0.014371334665205904, "grad_norm": 0.014125468048465728, "learning_rate": 7.359400040416475e-06, "loss": 0.0001, "step": 223 }, { "epoch": 0.014435780112135078, "grad_norm": 0.1470687716683251, "learning_rate": 7.365489734014338e-06, "loss": 0.0013, "step": 224 }, { "epoch": 0.014500225559064252, "grad_norm": 0.1316313746000964, "learning_rate": 7.371552301938347e-06, "loss": 0.0011, "step": 225 }, { "epoch": 0.014564671005993426, "grad_norm": 1.216626395686913, "learning_rate": 7.377587984772161e-06, "loss": 0.003, "step": 226 }, { "epoch": 0.014629116452922602, "grad_norm": 0.030215975039620327, "learning_rate": 7.3835970199128815e-06, "loss": 0.0001, "step": 227 }, { "epoch": 0.014693561899851776, "grad_norm": 0.3925818447866758, "learning_rate": 7.3895796416270785e-06, "loss": 0.0014, "step": 228 }, { "epoch": 0.01475800734678095, "grad_norm": 0.011962584082899389, "learning_rate": 7.395536081105594e-06, "loss": 0.0, "step": 229 }, { "epoch": 0.014822452793710124, "grad_norm": 0.31894698647727304, "learning_rate": 7.401466566517146e-06, "loss": 0.0012, "step": 230 }, { "epoch": 0.0148868982406393, "grad_norm": 0.003858933499673059, "learning_rate": 7.407371323060776e-06, "loss": 0.0, "step": 231 }, { "epoch": 0.014951343687568473, "grad_norm": 0.002184925615570465, "learning_rate": 7.413250573017164e-06, "loss": 0.0, "step": 232 }, { "epoch": 0.015015789134497647, "grad_norm": 0.0575590729604499, "learning_rate": 7.4191045357988316e-06, "loss": 0.0002, "step": 233 }, { "epoch": 0.015080234581426823, "grad_norm": 0.0851483114943205, "learning_rate": 7.424933427999279e-06, "loss": 0.0001, "step": 234 }, { "epoch": 0.015144680028355997, "grad_norm": 0.023247510036130875, "learning_rate": 7.43073746344107e-06, "loss": 0.0002, "step": 235 }, { "epoch": 0.015209125475285171, "grad_norm": 0.005134519637574446, "learning_rate": 7.436516853222899e-06, "loss": 0.0, "step": 236 }, { "epoch": 0.015273570922214345, "grad_norm": 0.012576069907377694, "learning_rate": 7.442271805765659e-06, "loss": 0.0, "step": 237 }, { "epoch": 0.01533801636914352, "grad_norm": 0.0041381520945973395, "learning_rate": 7.448002526857543e-06, "loss": 0.0, "step": 238 }, { "epoch": 0.015402461816072695, "grad_norm": 0.1408810734446625, "learning_rate": 7.453709219698196e-06, "loss": 0.0014, "step": 239 }, { "epoch": 0.015466907263001869, "grad_norm": 0.02099080559759998, "learning_rate": 7.4593920849419495e-06, "loss": 0.0001, "step": 240 }, { "epoch": 0.015531352709931043, "grad_norm": 0.34798627897458273, "learning_rate": 7.465051320740162e-06, "loss": 0.0014, "step": 241 }, { "epoch": 0.015595798156860219, "grad_norm": 0.03753130495216673, "learning_rate": 7.470687122782663e-06, "loss": 0.0003, "step": 242 }, { "epoch": 0.01566024360378939, "grad_norm": 0.26285752177303706, "learning_rate": 7.476299684338365e-06, "loss": 0.0014, "step": 243 }, { "epoch": 0.015724689050718568, "grad_norm": 0.011810539302378758, "learning_rate": 7.481889196295035e-06, "loss": 0.0, "step": 244 }, { "epoch": 0.015789134497647742, "grad_norm": 0.2277299056097155, "learning_rate": 7.487455847198239e-06, "loss": 0.0012, "step": 245 }, { "epoch": 0.015853579944576916, "grad_norm": 0.004420655223345774, "learning_rate": 7.492999823289514e-06, "loss": 0.0, "step": 246 }, { "epoch": 0.01591802539150609, "grad_norm": 0.09830975739963607, "learning_rate": 7.498521308543755e-06, "loss": 0.0005, "step": 247 }, { "epoch": 0.015982470838435264, "grad_norm": 0.045239574227459146, "learning_rate": 7.504020484705845e-06, "loss": 0.0001, "step": 248 }, { "epoch": 0.01604691628536444, "grad_norm": 0.004480344683799468, "learning_rate": 7.5094975313265485e-06, "loss": 0.0, "step": 249 }, { "epoch": 0.016111361732293612, "grad_norm": 0.04672057822806287, "learning_rate": 7.5149526257976954e-06, "loss": 0.0001, "step": 250 }, { "epoch": 0.016175807179222786, "grad_norm": 0.27883213565129883, "learning_rate": 7.5203859433866514e-06, "loss": 0.001, "step": 251 }, { "epoch": 0.016240252626151964, "grad_norm": 0.06406857182641057, "learning_rate": 7.525797657270103e-06, "loss": 0.0001, "step": 252 }, { "epoch": 0.016304698073081138, "grad_norm": 0.1080837493277941, "learning_rate": 7.531187938567185e-06, "loss": 0.0004, "step": 253 }, { "epoch": 0.016369143520010312, "grad_norm": 0.03588783821123804, "learning_rate": 7.53655695637194e-06, "loss": 0.0003, "step": 254 }, { "epoch": 0.016433588966939486, "grad_norm": 0.16026049803218403, "learning_rate": 7.5419048777851525e-06, "loss": 0.0018, "step": 255 }, { "epoch": 0.01649803441386866, "grad_norm": 1.9165503701573037, "learning_rate": 7.547231867945552e-06, "loss": 0.0132, "step": 256 }, { "epoch": 0.016562479860797834, "grad_norm": 0.015958733921597468, "learning_rate": 7.55253809006041e-06, "loss": 0.0, "step": 257 }, { "epoch": 0.016626925307727008, "grad_norm": 0.0019534432535608308, "learning_rate": 7.557823705435545e-06, "loss": 0.0, "step": 258 }, { "epoch": 0.016691370754656182, "grad_norm": 0.02074439107330492, "learning_rate": 7.5630888735047455e-06, "loss": 0.0, "step": 259 }, { "epoch": 0.01675581620158536, "grad_norm": 1.1553521149673944, "learning_rate": 7.568333751858627e-06, "loss": 0.007, "step": 260 }, { "epoch": 0.016820261648514533, "grad_norm": 0.3606061372170696, "learning_rate": 7.573558496272929e-06, "loss": 0.0028, "step": 261 }, { "epoch": 0.016884707095443707, "grad_norm": 0.00275996815502147, "learning_rate": 7.578763260736281e-06, "loss": 0.0, "step": 262 }, { "epoch": 0.01694915254237288, "grad_norm": 0.004268297014106991, "learning_rate": 7.58394819747744e-06, "loss": 0.0, "step": 263 }, { "epoch": 0.017013597989302055, "grad_norm": 0.0367751156855776, "learning_rate": 7.589113456991989e-06, "loss": 0.0, "step": 264 }, { "epoch": 0.01707804343623123, "grad_norm": 0.03272693147392111, "learning_rate": 7.594259188068569e-06, "loss": 0.0004, "step": 265 }, { "epoch": 0.017142488883160403, "grad_norm": 0.2226594205262504, "learning_rate": 7.59938553781458e-06, "loss": 0.0008, "step": 266 }, { "epoch": 0.01720693433008958, "grad_norm": 4.17899426567084, "learning_rate": 7.604492651681422e-06, "loss": 0.0028, "step": 267 }, { "epoch": 0.017271379777018755, "grad_norm": 0.314194863453364, "learning_rate": 7.609580673489255e-06, "loss": 0.0017, "step": 268 }, { "epoch": 0.01733582522394793, "grad_norm": 0.009341551827583042, "learning_rate": 7.614649745451305e-06, "loss": 0.0, "step": 269 }, { "epoch": 0.017400270670877103, "grad_norm": 0.006294262625942665, "learning_rate": 7.6197000081977145e-06, "loss": 0.0, "step": 270 }, { "epoch": 0.017464716117806277, "grad_norm": 0.042152326765031375, "learning_rate": 7.624731600798942e-06, "loss": 0.0003, "step": 271 }, { "epoch": 0.01752916156473545, "grad_norm": 0.005840402112676206, "learning_rate": 7.629744660788755e-06, "loss": 0.0, "step": 272 }, { "epoch": 0.017593607011664625, "grad_norm": 0.003961071319983515, "learning_rate": 7.63473932418678e-06, "loss": 0.0, "step": 273 }, { "epoch": 0.0176580524585938, "grad_norm": 0.03416186617415247, "learning_rate": 7.639715725520648e-06, "loss": 0.0003, "step": 274 }, { "epoch": 0.017722497905522976, "grad_norm": 0.011693800006046062, "learning_rate": 7.644673997847735e-06, "loss": 0.0, "step": 275 }, { "epoch": 0.01778694335245215, "grad_norm": 0.013642164927569226, "learning_rate": 7.64961427277651e-06, "loss": 0.0001, "step": 276 }, { "epoch": 0.017851388799381324, "grad_norm": 0.005290876124771607, "learning_rate": 7.6545366804875e-06, "loss": 0.0, "step": 277 }, { "epoch": 0.0179158342463105, "grad_norm": 0.3685656717237363, "learning_rate": 7.659441349753862e-06, "loss": 0.0047, "step": 278 }, { "epoch": 0.017980279693239672, "grad_norm": 0.162786871985292, "learning_rate": 7.664328407961608e-06, "loss": 0.0003, "step": 279 }, { "epoch": 0.018044725140168846, "grad_norm": 0.0038340519806686416, "learning_rate": 7.669197981129452e-06, "loss": 0.0, "step": 280 }, { "epoch": 0.01810917058709802, "grad_norm": 1.1800076890145348, "learning_rate": 7.674050193928305e-06, "loss": 0.0107, "step": 281 }, { "epoch": 0.018173616034027194, "grad_norm": 0.4022661152723877, "learning_rate": 7.678885169700438e-06, "loss": 0.0011, "step": 282 }, { "epoch": 0.018238061480956372, "grad_norm": 0.0010258398165785125, "learning_rate": 7.683703030478275e-06, "loss": 0.0, "step": 283 }, { "epoch": 0.018302506927885546, "grad_norm": 0.03959304117747763, "learning_rate": 7.688503897002901e-06, "loss": 0.0002, "step": 284 }, { "epoch": 0.01836695237481472, "grad_norm": 0.003900586596470166, "learning_rate": 7.693287888742192e-06, "loss": 0.0, "step": 285 }, { "epoch": 0.018431397821743894, "grad_norm": 0.0016122389663264988, "learning_rate": 7.698055123908667e-06, "loss": 0.0, "step": 286 }, { "epoch": 0.018495843268673068, "grad_norm": 0.04600308794351545, "learning_rate": 7.702805719477016e-06, "loss": 0.0001, "step": 287 }, { "epoch": 0.018560288715602242, "grad_norm": 0.7386376867749295, "learning_rate": 7.707539791201318e-06, "loss": 0.0042, "step": 288 }, { "epoch": 0.018624734162531416, "grad_norm": 0.07433124536949527, "learning_rate": 7.712257453631959e-06, "loss": 0.0001, "step": 289 }, { "epoch": 0.018689179609460593, "grad_norm": 0.013048752330046877, "learning_rate": 7.716958820132276e-06, "loss": 0.0001, "step": 290 }, { "epoch": 0.018753625056389767, "grad_norm": 0.007703258584517692, "learning_rate": 7.721644002894899e-06, "loss": 0.0001, "step": 291 }, { "epoch": 0.01881807050331894, "grad_norm": 0.09900497813725398, "learning_rate": 7.726313112957798e-06, "loss": 0.0004, "step": 292 }, { "epoch": 0.018882515950248115, "grad_norm": 0.7346640890359516, "learning_rate": 7.730966260220105e-06, "loss": 0.0083, "step": 293 }, { "epoch": 0.01894696139717729, "grad_norm": 0.09893905196656488, "learning_rate": 7.735603553457605e-06, "loss": 0.0004, "step": 294 }, { "epoch": 0.019011406844106463, "grad_norm": 0.24004125974315293, "learning_rate": 7.740225100338013e-06, "loss": 0.0021, "step": 295 }, { "epoch": 0.019075852291035637, "grad_norm": 0.2778235435865419, "learning_rate": 7.74483100743596e-06, "loss": 0.0005, "step": 296 }, { "epoch": 0.01914029773796481, "grad_norm": 0.011061506543972114, "learning_rate": 7.749421380247754e-06, "loss": 0.0, "step": 297 }, { "epoch": 0.01920474318489399, "grad_norm": 0.0099751102256593, "learning_rate": 7.753996323205866e-06, "loss": 0.0, "step": 298 }, { "epoch": 0.019269188631823163, "grad_norm": 0.04293211545664707, "learning_rate": 7.758555939693189e-06, "loss": 0.0001, "step": 299 }, { "epoch": 0.019333634078752337, "grad_norm": 0.14890663148925204, "learning_rate": 7.763100332057061e-06, "loss": 0.0017, "step": 300 }, { "epoch": 0.01939807952568151, "grad_norm": 0.051856462016533725, "learning_rate": 7.767629601623046e-06, "loss": 0.0003, "step": 301 }, { "epoch": 0.019462524972610685, "grad_norm": 0.20118795719491775, "learning_rate": 7.77214384870849e-06, "loss": 0.0008, "step": 302 }, { "epoch": 0.01952697041953986, "grad_norm": 0.01699588268895743, "learning_rate": 7.776643172635842e-06, "loss": 0.0001, "step": 303 }, { "epoch": 0.019591415866469033, "grad_norm": 0.28016719067002793, "learning_rate": 7.781127671745794e-06, "loss": 0.0005, "step": 304 }, { "epoch": 0.019655861313398207, "grad_norm": 0.13740708235192936, "learning_rate": 7.785597443410148e-06, "loss": 0.0002, "step": 305 }, { "epoch": 0.019720306760327384, "grad_norm": 0.13476762217271904, "learning_rate": 7.79005258404452e-06, "loss": 0.0009, "step": 306 }, { "epoch": 0.01978475220725656, "grad_norm": 0.014737783093005689, "learning_rate": 7.794493189120813e-06, "loss": 0.0001, "step": 307 }, { "epoch": 0.019849197654185732, "grad_norm": 1.1847140609432278, "learning_rate": 7.79891935317949e-06, "loss": 0.0063, "step": 308 }, { "epoch": 0.019913643101114906, "grad_norm": 0.004352597997975502, "learning_rate": 7.803331169841663e-06, "loss": 0.0, "step": 309 }, { "epoch": 0.01997808854804408, "grad_norm": 0.025632327962959205, "learning_rate": 7.807728731820957e-06, "loss": 0.0002, "step": 310 }, { "epoch": 0.020042533994973254, "grad_norm": 0.054951094255651874, "learning_rate": 7.812112130935214e-06, "loss": 0.0003, "step": 311 }, { "epoch": 0.02010697944190243, "grad_norm": 0.10254521578536714, "learning_rate": 7.816481458117993e-06, "loss": 0.0002, "step": 312 }, { "epoch": 0.020171424888831602, "grad_norm": 0.14173059175345795, "learning_rate": 7.820836803429887e-06, "loss": 0.0004, "step": 313 }, { "epoch": 0.02023587033576078, "grad_norm": 0.010335749948364018, "learning_rate": 7.825178256069662e-06, "loss": 0.0, "step": 314 }, { "epoch": 0.020300315782689954, "grad_norm": 0.009398006668248197, "learning_rate": 7.829505904385217e-06, "loss": 0.0, "step": 315 }, { "epoch": 0.020364761229619128, "grad_norm": 0.0008854282801088228, "learning_rate": 7.833819835884374e-06, "loss": 0.0, "step": 316 }, { "epoch": 0.020429206676548302, "grad_norm": 0.01692642260677151, "learning_rate": 7.838120137245496e-06, "loss": 0.0001, "step": 317 }, { "epoch": 0.020493652123477476, "grad_norm": 0.012734900473579882, "learning_rate": 7.842406894327936e-06, "loss": 0.0001, "step": 318 }, { "epoch": 0.02055809757040665, "grad_norm": 0.04965355559724808, "learning_rate": 7.846680192182317e-06, "loss": 0.0002, "step": 319 }, { "epoch": 0.020622543017335824, "grad_norm": 0.027329077999220016, "learning_rate": 7.850940115060665e-06, "loss": 0.0002, "step": 320 }, { "epoch": 0.020686988464265, "grad_norm": 0.012715309583353273, "learning_rate": 7.855186746426372e-06, "loss": 0.0, "step": 321 }, { "epoch": 0.020751433911194175, "grad_norm": 0.022217729569863346, "learning_rate": 7.859420168964014e-06, "loss": 0.0001, "step": 322 }, { "epoch": 0.02081587935812335, "grad_norm": 0.009745268168007587, "learning_rate": 7.863640464588996e-06, "loss": 0.0, "step": 323 }, { "epoch": 0.020880324805052523, "grad_norm": 0.3033755741514754, "learning_rate": 7.86784771445708e-06, "loss": 0.001, "step": 324 }, { "epoch": 0.020944770251981697, "grad_norm": 0.012576282814093155, "learning_rate": 7.87204199897374e-06, "loss": 0.0, "step": 325 }, { "epoch": 0.02100921569891087, "grad_norm": 0.0025267722044404915, "learning_rate": 7.87622339780338e-06, "loss": 0.0, "step": 326 }, { "epoch": 0.021073661145840045, "grad_norm": 0.06525056075694663, "learning_rate": 7.88039198987842e-06, "loss": 0.0001, "step": 327 }, { "epoch": 0.02113810659276922, "grad_norm": 0.010063583865905463, "learning_rate": 7.88454785340823e-06, "loss": 0.0, "step": 328 }, { "epoch": 0.021202552039698397, "grad_norm": 1.3302956737899603, "learning_rate": 7.888691065887938e-06, "loss": 0.0081, "step": 329 }, { "epoch": 0.02126699748662757, "grad_norm": 0.17322090259007986, "learning_rate": 7.892821704107102e-06, "loss": 0.0023, "step": 330 }, { "epoch": 0.021331442933556745, "grad_norm": 0.011463568326494302, "learning_rate": 7.896939844158243e-06, "loss": 0.0, "step": 331 }, { "epoch": 0.02139588838048592, "grad_norm": 0.04771365515172803, "learning_rate": 7.901045561445263e-06, "loss": 0.0001, "step": 332 }, { "epoch": 0.021460333827415093, "grad_norm": 0.007986473007665173, "learning_rate": 7.905138930691726e-06, "loss": 0.0, "step": 333 }, { "epoch": 0.021524779274344267, "grad_norm": 0.13398699493738292, "learning_rate": 7.909220025949012e-06, "loss": 0.0001, "step": 334 }, { "epoch": 0.02158922472127344, "grad_norm": 0.17768507053513713, "learning_rate": 7.913288920604367e-06, "loss": 0.0024, "step": 335 }, { "epoch": 0.021653670168202615, "grad_norm": 1.1020287142645597, "learning_rate": 7.917345687388818e-06, "loss": 0.0027, "step": 336 }, { "epoch": 0.021718115615131792, "grad_norm": 0.005485560200914321, "learning_rate": 7.921390398384963e-06, "loss": 0.0, "step": 337 }, { "epoch": 0.021782561062060966, "grad_norm": 0.39644821828960675, "learning_rate": 7.925423125034672e-06, "loss": 0.0025, "step": 338 }, { "epoch": 0.02184700650899014, "grad_norm": 0.1476993275933789, "learning_rate": 7.92944393814664e-06, "loss": 0.0006, "step": 339 }, { "epoch": 0.021911451955919314, "grad_norm": 0.003948748132867952, "learning_rate": 7.933452907903869e-06, "loss": 0.0, "step": 340 }, { "epoch": 0.02197589740284849, "grad_norm": 0.1684808299627317, "learning_rate": 7.937450103870998e-06, "loss": 0.0007, "step": 341 }, { "epoch": 0.022040342849777662, "grad_norm": 0.03526063990809892, "learning_rate": 7.941435595001557e-06, "loss": 0.0001, "step": 342 }, { "epoch": 0.022104788296706836, "grad_norm": 0.00300194528628094, "learning_rate": 7.945409449645105e-06, "loss": 0.0, "step": 343 }, { "epoch": 0.02216923374363601, "grad_norm": 0.0015934897607704204, "learning_rate": 7.94937173555426e-06, "loss": 0.0, "step": 344 }, { "epoch": 0.022233679190565188, "grad_norm": 0.0058229976179194655, "learning_rate": 7.953322519891626e-06, "loss": 0.0, "step": 345 }, { "epoch": 0.022298124637494362, "grad_norm": 0.04014894770212523, "learning_rate": 7.957261869236626e-06, "loss": 0.0, "step": 346 }, { "epoch": 0.022362570084423536, "grad_norm": 0.3194126538626638, "learning_rate": 7.961189849592243e-06, "loss": 0.0056, "step": 347 }, { "epoch": 0.02242701553135271, "grad_norm": 0.0064705525401818125, "learning_rate": 7.965106526391645e-06, "loss": 0.0001, "step": 348 }, { "epoch": 0.022491460978281884, "grad_norm": 3.5872106349028567, "learning_rate": 7.969011964504728e-06, "loss": 0.0476, "step": 349 }, { "epoch": 0.022555906425211058, "grad_norm": 0.21691632631488653, "learning_rate": 7.972906228244563e-06, "loss": 0.0006, "step": 350 }, { "epoch": 0.022620351872140232, "grad_norm": 0.013143918484912772, "learning_rate": 7.976789381373757e-06, "loss": 0.0, "step": 351 }, { "epoch": 0.02268479731906941, "grad_norm": 0.02501359449813824, "learning_rate": 7.980661487110704e-06, "loss": 0.0001, "step": 352 }, { "epoch": 0.022749242765998583, "grad_norm": 0.0017974908439855096, "learning_rate": 7.98452260813577e-06, "loss": 0.0, "step": 353 }, { "epoch": 0.022813688212927757, "grad_norm": 0.023813737078225865, "learning_rate": 7.988372806597378e-06, "loss": 0.0, "step": 354 }, { "epoch": 0.02287813365985693, "grad_norm": 0.36353022749573466, "learning_rate": 7.992212144118013e-06, "loss": 0.0034, "step": 355 }, { "epoch": 0.022942579106786105, "grad_norm": 0.03440102538916525, "learning_rate": 7.996040681800137e-06, "loss": 0.0001, "step": 356 }, { "epoch": 0.02300702455371528, "grad_norm": 0.39249523673234826, "learning_rate": 7.99985848023202e-06, "loss": 0.0033, "step": 357 }, { "epoch": 0.023071470000644453, "grad_norm": 0.015484064838049798, "learning_rate": 8.003665599493508e-06, "loss": 0.0002, "step": 358 }, { "epoch": 0.023135915447573627, "grad_norm": 0.05831849892882333, "learning_rate": 8.007462099161673e-06, "loss": 0.0002, "step": 359 }, { "epoch": 0.023200360894502805, "grad_norm": 0.030189192169840014, "learning_rate": 8.01124803831643e-06, "loss": 0.0, "step": 360 }, { "epoch": 0.02326480634143198, "grad_norm": 0.2871898356727702, "learning_rate": 8.015023475546033e-06, "loss": 0.0009, "step": 361 }, { "epoch": 0.023329251788361153, "grad_norm": 0.0028194829163485577, "learning_rate": 8.018788468952538e-06, "loss": 0.0, "step": 362 }, { "epoch": 0.023393697235290327, "grad_norm": 0.13778555057274436, "learning_rate": 8.022543076157141e-06, "loss": 0.0039, "step": 363 }, { "epoch": 0.0234581426822195, "grad_norm": 0.021112274440135134, "learning_rate": 8.026287354305495e-06, "loss": 0.0001, "step": 364 }, { "epoch": 0.023522588129148675, "grad_norm": 0.004636861815034096, "learning_rate": 8.030021360072912e-06, "loss": 0.0, "step": 365 }, { "epoch": 0.02358703357607785, "grad_norm": 0.0028864734672826285, "learning_rate": 8.033745149669513e-06, "loss": 0.0, "step": 366 }, { "epoch": 0.023651479023007023, "grad_norm": 0.22539105605376755, "learning_rate": 8.037458778845317e-06, "loss": 0.0007, "step": 367 }, { "epoch": 0.0237159244699362, "grad_norm": 0.7308905296678736, "learning_rate": 8.041162302895227e-06, "loss": 0.0024, "step": 368 }, { "epoch": 0.023780369916865374, "grad_norm": 0.0055914223650806285, "learning_rate": 8.044855776663993e-06, "loss": 0.0, "step": 369 }, { "epoch": 0.02384481536379455, "grad_norm": 0.04858513212525571, "learning_rate": 8.048539254551072e-06, "loss": 0.0, "step": 370 }, { "epoch": 0.023909260810723722, "grad_norm": 0.0032946095109740066, "learning_rate": 8.052212790515438e-06, "loss": 0.0, "step": 371 }, { "epoch": 0.023973706257652896, "grad_norm": 0.00796063799442098, "learning_rate": 8.055876438080324e-06, "loss": 0.0, "step": 372 }, { "epoch": 0.02403815170458207, "grad_norm": 0.07456883882132498, "learning_rate": 8.059530250337904e-06, "loss": 0.0015, "step": 373 }, { "epoch": 0.024102597151511244, "grad_norm": 0.024193773638429922, "learning_rate": 8.063174279953908e-06, "loss": 0.0, "step": 374 }, { "epoch": 0.024167042598440422, "grad_norm": 0.08240639532182745, "learning_rate": 8.066808579172175e-06, "loss": 0.0014, "step": 375 }, { "epoch": 0.024231488045369596, "grad_norm": 0.06964492787404344, "learning_rate": 8.070433199819152e-06, "loss": 0.0001, "step": 376 }, { "epoch": 0.02429593349229877, "grad_norm": 0.13324233929377452, "learning_rate": 8.074048193308322e-06, "loss": 0.0014, "step": 377 }, { "epoch": 0.024360378939227944, "grad_norm": 0.31884756278114057, "learning_rate": 8.077653610644582e-06, "loss": 0.0025, "step": 378 }, { "epoch": 0.024424824386157118, "grad_norm": 0.005066331055183986, "learning_rate": 8.081249502428569e-06, "loss": 0.0001, "step": 379 }, { "epoch": 0.024489269833086292, "grad_norm": 0.11909933234504282, "learning_rate": 8.084835918860906e-06, "loss": 0.0006, "step": 380 }, { "epoch": 0.024553715280015466, "grad_norm": 0.21011827473200473, "learning_rate": 8.08841290974642e-06, "loss": 0.0004, "step": 381 }, { "epoch": 0.02461816072694464, "grad_norm": 0.027595556932529592, "learning_rate": 8.091980524498285e-06, "loss": 0.0, "step": 382 }, { "epoch": 0.024682606173873817, "grad_norm": 0.006354662520049507, "learning_rate": 8.095538812142118e-06, "loss": 0.0, "step": 383 }, { "epoch": 0.02474705162080299, "grad_norm": 0.013398883993908624, "learning_rate": 8.099087821320032e-06, "loss": 0.0001, "step": 384 }, { "epoch": 0.024811497067732165, "grad_norm": 0.018757425423641675, "learning_rate": 8.102627600294604e-06, "loss": 0.0, "step": 385 }, { "epoch": 0.02487594251466134, "grad_norm": 0.003749142883297565, "learning_rate": 8.106158196952835e-06, "loss": 0.0, "step": 386 }, { "epoch": 0.024940387961590513, "grad_norm": 0.37095629505467437, "learning_rate": 8.109679658810025e-06, "loss": 0.0019, "step": 387 }, { "epoch": 0.025004833408519687, "grad_norm": 0.049536866135149134, "learning_rate": 8.113192033013613e-06, "loss": 0.0001, "step": 388 }, { "epoch": 0.02506927885544886, "grad_norm": 0.05720039117501582, "learning_rate": 8.116695366346962e-06, "loss": 0.0002, "step": 389 }, { "epoch": 0.025133724302378035, "grad_norm": 0.013197769274937824, "learning_rate": 8.120189705233107e-06, "loss": 0.0, "step": 390 }, { "epoch": 0.025198169749307213, "grad_norm": 1.5365355994845276, "learning_rate": 8.12367509573843e-06, "loss": 0.0082, "step": 391 }, { "epoch": 0.025262615196236387, "grad_norm": 0.005337843706833008, "learning_rate": 8.127151583576319e-06, "loss": 0.0, "step": 392 }, { "epoch": 0.02532706064316556, "grad_norm": 0.009108647589636271, "learning_rate": 8.13061921411076e-06, "loss": 0.0001, "step": 393 }, { "epoch": 0.025391506090094735, "grad_norm": 0.012351875469581566, "learning_rate": 8.134078032359886e-06, "loss": 0.0001, "step": 394 }, { "epoch": 0.02545595153702391, "grad_norm": 0.0014107577675921742, "learning_rate": 8.137528082999486e-06, "loss": 0.0, "step": 395 }, { "epoch": 0.025520396983953083, "grad_norm": 0.05295340430722678, "learning_rate": 8.140969410366469e-06, "loss": 0.0001, "step": 396 }, { "epoch": 0.025584842430882257, "grad_norm": 0.5690354196975049, "learning_rate": 8.144402058462278e-06, "loss": 0.0023, "step": 397 }, { "epoch": 0.02564928787781143, "grad_norm": 0.03940801697463157, "learning_rate": 8.147826070956273e-06, "loss": 0.0001, "step": 398 }, { "epoch": 0.02571373332474061, "grad_norm": 0.011530897889634606, "learning_rate": 8.15124149118906e-06, "loss": 0.0001, "step": 399 }, { "epoch": 0.025778178771669782, "grad_norm": 0.02830996714923142, "learning_rate": 8.154648362175777e-06, "loss": 0.0002, "step": 400 }, { "epoch": 0.025842624218598956, "grad_norm": 0.00046499532858108194, "learning_rate": 8.15804672660936e-06, "loss": 0.0, "step": 401 }, { "epoch": 0.02590706966552813, "grad_norm": 0.26561429899763406, "learning_rate": 8.161436626863734e-06, "loss": 0.0004, "step": 402 }, { "epoch": 0.025971515112457304, "grad_norm": 0.014311267292635437, "learning_rate": 8.164818104996999e-06, "loss": 0.0001, "step": 403 }, { "epoch": 0.02603596055938648, "grad_norm": 0.033020361512505675, "learning_rate": 8.168191202754557e-06, "loss": 0.0001, "step": 404 }, { "epoch": 0.026100406006315652, "grad_norm": 0.01233642367780627, "learning_rate": 8.171555961572193e-06, "loss": 0.0001, "step": 405 }, { "epoch": 0.02616485145324483, "grad_norm": 0.030507303415610476, "learning_rate": 8.174912422579145e-06, "loss": 0.0, "step": 406 }, { "epoch": 0.026229296900174004, "grad_norm": 0.000771348551102021, "learning_rate": 8.178260626601112e-06, "loss": 0.0, "step": 407 }, { "epoch": 0.026293742347103178, "grad_norm": 0.04695325270252983, "learning_rate": 8.181600614163234e-06, "loss": 0.0001, "step": 408 }, { "epoch": 0.026358187794032352, "grad_norm": 0.004360910000568842, "learning_rate": 8.184932425493038e-06, "loss": 0.0, "step": 409 }, { "epoch": 0.026422633240961526, "grad_norm": 0.0022237242463113796, "learning_rate": 8.188256100523343e-06, "loss": 0.0, "step": 410 }, { "epoch": 0.0264870786878907, "grad_norm": 0.11268887113016282, "learning_rate": 8.191571678895127e-06, "loss": 0.0018, "step": 411 }, { "epoch": 0.026551524134819874, "grad_norm": 0.0014461989918299746, "learning_rate": 8.194879199960378e-06, "loss": 0.0, "step": 412 }, { "epoch": 0.026615969581749048, "grad_norm": 0.03791107941859059, "learning_rate": 8.19817870278488e-06, "loss": 0.0001, "step": 413 }, { "epoch": 0.026680415028678225, "grad_norm": 0.23998875971638906, "learning_rate": 8.20147022615099e-06, "loss": 0.002, "step": 414 }, { "epoch": 0.0267448604756074, "grad_norm": 0.012383002073283897, "learning_rate": 8.204753808560375e-06, "loss": 0.0, "step": 415 }, { "epoch": 0.026809305922536573, "grad_norm": 0.29471501301861663, "learning_rate": 8.208029488236709e-06, "loss": 0.0017, "step": 416 }, { "epoch": 0.026873751369465747, "grad_norm": 0.20318785025191824, "learning_rate": 8.211297303128342e-06, "loss": 0.0015, "step": 417 }, { "epoch": 0.02693819681639492, "grad_norm": 0.017450833706922735, "learning_rate": 8.214557290910945e-06, "loss": 0.0, "step": 418 }, { "epoch": 0.027002642263324095, "grad_norm": 0.012397289560402282, "learning_rate": 8.21780948899011e-06, "loss": 0.0, "step": 419 }, { "epoch": 0.02706708771025327, "grad_norm": 0.009342866096887397, "learning_rate": 8.22105393450393e-06, "loss": 0.0, "step": 420 }, { "epoch": 0.027131533157182443, "grad_norm": 0.7392758325675876, "learning_rate": 8.224290664325538e-06, "loss": 0.0034, "step": 421 }, { "epoch": 0.02719597860411162, "grad_norm": 0.19301211838897894, "learning_rate": 8.227519715065621e-06, "loss": 0.0007, "step": 422 }, { "epoch": 0.027260424051040795, "grad_norm": 0.04688608763614122, "learning_rate": 8.230741123074915e-06, "loss": 0.0001, "step": 423 }, { "epoch": 0.02732486949796997, "grad_norm": 0.04107700591487386, "learning_rate": 8.233954924446651e-06, "loss": 0.0005, "step": 424 }, { "epoch": 0.027389314944899143, "grad_norm": 0.0016944504864118827, "learning_rate": 8.23716115501898e-06, "loss": 0.0, "step": 425 }, { "epoch": 0.027453760391828317, "grad_norm": 0.14313535363424715, "learning_rate": 8.24035985037738e-06, "loss": 0.002, "step": 426 }, { "epoch": 0.02751820583875749, "grad_norm": 0.08606880084376932, "learning_rate": 8.243551045857016e-06, "loss": 0.0003, "step": 427 }, { "epoch": 0.027582651285686665, "grad_norm": 0.17907311266594605, "learning_rate": 8.246734776545088e-06, "loss": 0.0006, "step": 428 }, { "epoch": 0.02764709673261584, "grad_norm": 0.11154235338706936, "learning_rate": 8.249911077283146e-06, "loss": 0.0002, "step": 429 }, { "epoch": 0.027711542179545016, "grad_norm": 1.4785572282992487, "learning_rate": 8.253079982669373e-06, "loss": 0.0137, "step": 430 }, { "epoch": 0.02777598762647419, "grad_norm": 0.012928009033203125, "learning_rate": 8.25624152706085e-06, "loss": 0.0, "step": 431 }, { "epoch": 0.027840433073403364, "grad_norm": 0.003619215014048827, "learning_rate": 8.259395744575797e-06, "loss": 0.0, "step": 432 }, { "epoch": 0.02790487852033254, "grad_norm": 0.0014624501580332932, "learning_rate": 8.26254266909576e-06, "loss": 0.0, "step": 433 }, { "epoch": 0.027969323967261712, "grad_norm": 0.004199093101875544, "learning_rate": 8.265682334267826e-06, "loss": 0.0, "step": 434 }, { "epoch": 0.028033769414190886, "grad_norm": 0.021314337555709988, "learning_rate": 8.268814773506757e-06, "loss": 0.0001, "step": 435 }, { "epoch": 0.02809821486112006, "grad_norm": 0.03760360231074014, "learning_rate": 8.271940019997134e-06, "loss": 0.0001, "step": 436 }, { "epoch": 0.028162660308049238, "grad_norm": 0.005209692266127515, "learning_rate": 8.275058106695467e-06, "loss": 0.0, "step": 437 }, { "epoch": 0.028227105754978412, "grad_norm": 0.09817250645422441, "learning_rate": 8.278169066332278e-06, "loss": 0.0001, "step": 438 }, { "epoch": 0.028291551201907586, "grad_norm": 0.28345436264734336, "learning_rate": 8.28127293141416e-06, "loss": 0.0005, "step": 439 }, { "epoch": 0.02835599664883676, "grad_norm": 0.02705612753805196, "learning_rate": 8.284369734225816e-06, "loss": 0.0, "step": 440 }, { "epoch": 0.028420442095765934, "grad_norm": 0.07754971769711609, "learning_rate": 8.287459506832084e-06, "loss": 0.0001, "step": 441 }, { "epoch": 0.028484887542695108, "grad_norm": 0.03146103849257445, "learning_rate": 8.290542281079913e-06, "loss": 0.0, "step": 442 }, { "epoch": 0.028549332989624282, "grad_norm": 0.1758081044770802, "learning_rate": 8.293618088600338e-06, "loss": 0.0003, "step": 443 }, { "epoch": 0.028613778436553456, "grad_norm": 0.0029838889004879295, "learning_rate": 8.29668696081044e-06, "loss": 0.0, "step": 444 }, { "epoch": 0.028678223883482633, "grad_norm": 0.45164235672802683, "learning_rate": 8.299748928915249e-06, "loss": 0.001, "step": 445 }, { "epoch": 0.028742669330411807, "grad_norm": 0.0033308645237363874, "learning_rate": 8.302804023909669e-06, "loss": 0.0, "step": 446 }, { "epoch": 0.02880711477734098, "grad_norm": 0.025905336972041268, "learning_rate": 8.305852276580345e-06, "loss": 0.0001, "step": 447 }, { "epoch": 0.028871560224270156, "grad_norm": 0.0041218454884513304, "learning_rate": 8.308893717507532e-06, "loss": 0.0, "step": 448 }, { "epoch": 0.02893600567119933, "grad_norm": 0.12197986303560157, "learning_rate": 8.311928377066941e-06, "loss": 0.0004, "step": 449 }, { "epoch": 0.029000451118128504, "grad_norm": 0.08898476274019052, "learning_rate": 8.314956285431542e-06, "loss": 0.0018, "step": 450 }, { "epoch": 0.029064896565057678, "grad_norm": 0.09684278236993904, "learning_rate": 8.317977472573382e-06, "loss": 0.0006, "step": 451 }, { "epoch": 0.02912934201198685, "grad_norm": 0.05075467771608681, "learning_rate": 8.320991968265357e-06, "loss": 0.0005, "step": 452 }, { "epoch": 0.02919378745891603, "grad_norm": 0.02043870866338334, "learning_rate": 8.323999802082968e-06, "loss": 0.0, "step": 453 }, { "epoch": 0.029258232905845203, "grad_norm": 0.02380190989209756, "learning_rate": 8.327001003406075e-06, "loss": 0.0, "step": 454 }, { "epoch": 0.029322678352774377, "grad_norm": 0.19246463681493037, "learning_rate": 8.329995601420607e-06, "loss": 0.0007, "step": 455 }, { "epoch": 0.02938712379970355, "grad_norm": 0.0018403446570685027, "learning_rate": 8.332983625120273e-06, "loss": 0.0015, "step": 456 }, { "epoch": 0.029451569246632725, "grad_norm": 0.1614785790568277, "learning_rate": 8.335965103308235e-06, "loss": 0.0003, "step": 457 }, { "epoch": 0.0295160146935619, "grad_norm": 0.04677324625517432, "learning_rate": 8.338940064598788e-06, "loss": 0.0001, "step": 458 }, { "epoch": 0.029580460140491073, "grad_norm": 0.023722940740272376, "learning_rate": 8.341908537418999e-06, "loss": 0.0002, "step": 459 }, { "epoch": 0.029644905587420247, "grad_norm": 0.0071161812529181, "learning_rate": 8.34487055001034e-06, "loss": 0.0, "step": 460 }, { "epoch": 0.029709351034349425, "grad_norm": 0.01802171434180495, "learning_rate": 8.347826130430298e-06, "loss": 0.0001, "step": 461 }, { "epoch": 0.0297737964812786, "grad_norm": 0.35340842839961994, "learning_rate": 8.35077530655397e-06, "loss": 0.0026, "step": 462 }, { "epoch": 0.029838241928207773, "grad_norm": 0.061611551431616104, "learning_rate": 8.353718106075646e-06, "loss": 0.0002, "step": 463 }, { "epoch": 0.029902687375136947, "grad_norm": 0.04584732913277349, "learning_rate": 8.356654556510358e-06, "loss": 0.0002, "step": 464 }, { "epoch": 0.02996713282206612, "grad_norm": 0.1855844961474537, "learning_rate": 8.359584685195436e-06, "loss": 0.0005, "step": 465 }, { "epoch": 0.030031578268995295, "grad_norm": 0.02861397477113445, "learning_rate": 8.362508519292026e-06, "loss": 0.0003, "step": 466 }, { "epoch": 0.03009602371592447, "grad_norm": 0.04034164553115, "learning_rate": 8.365426085786605e-06, "loss": 0.0001, "step": 467 }, { "epoch": 0.030160469162853646, "grad_norm": 0.007763013674992482, "learning_rate": 8.368337411492474e-06, "loss": 0.0, "step": 468 }, { "epoch": 0.03022491460978282, "grad_norm": 0.0920300413920213, "learning_rate": 8.371242523051236e-06, "loss": 0.0001, "step": 469 }, { "epoch": 0.030289360056711994, "grad_norm": 0.13798576860239228, "learning_rate": 8.374141446934264e-06, "loss": 0.0006, "step": 470 }, { "epoch": 0.030353805503641168, "grad_norm": 0.013517930261967707, "learning_rate": 8.37703420944414e-06, "loss": 0.0, "step": 471 }, { "epoch": 0.030418250950570342, "grad_norm": 0.008036030340445328, "learning_rate": 8.379920836716092e-06, "loss": 0.0, "step": 472 }, { "epoch": 0.030482696397499516, "grad_norm": 0.005974472065890517, "learning_rate": 8.382801354719412e-06, "loss": 0.0, "step": 473 }, { "epoch": 0.03054714184442869, "grad_norm": 0.0008534880974867222, "learning_rate": 8.385675789258854e-06, "loss": 0.0, "step": 474 }, { "epoch": 0.030611587291357864, "grad_norm": 0.03526061844791487, "learning_rate": 8.388544165976018e-06, "loss": 0.0001, "step": 475 }, { "epoch": 0.03067603273828704, "grad_norm": 0.09276391030632433, "learning_rate": 8.391406510350738e-06, "loss": 0.0003, "step": 476 }, { "epoch": 0.030740478185216216, "grad_norm": 0.006227281517178593, "learning_rate": 8.394262847702416e-06, "loss": 0.0, "step": 477 }, { "epoch": 0.03080492363214539, "grad_norm": 0.0019055967033663934, "learning_rate": 8.39711320319139e-06, "loss": 0.0, "step": 478 }, { "epoch": 0.030869369079074564, "grad_norm": 0.008035130758411991, "learning_rate": 8.399957601820245e-06, "loss": 0.0, "step": 479 }, { "epoch": 0.030933814526003738, "grad_norm": 0.030565513157518043, "learning_rate": 8.402796068435144e-06, "loss": 0.0002, "step": 480 }, { "epoch": 0.03099825997293291, "grad_norm": 0.0010238980188911407, "learning_rate": 8.405628627727116e-06, "loss": 0.0, "step": 481 }, { "epoch": 0.031062705419862086, "grad_norm": 0.000852778331990603, "learning_rate": 8.408455304233356e-06, "loss": 0.0, "step": 482 }, { "epoch": 0.03112715086679126, "grad_norm": 0.009300486191113138, "learning_rate": 8.411276122338493e-06, "loss": 0.0001, "step": 483 }, { "epoch": 0.031191596313720437, "grad_norm": 0.006314212471481981, "learning_rate": 8.414091106275857e-06, "loss": 0.0001, "step": 484 }, { "epoch": 0.03125604176064961, "grad_norm": 0.13175345772435, "learning_rate": 8.416900280128724e-06, "loss": 0.0012, "step": 485 }, { "epoch": 0.03132048720757878, "grad_norm": 0.0034394697556541735, "learning_rate": 8.41970366783156e-06, "loss": 0.0, "step": 486 }, { "epoch": 0.03138493265450796, "grad_norm": 0.3510331194805854, "learning_rate": 8.422501293171231e-06, "loss": 0.0008, "step": 487 }, { "epoch": 0.031449378101437137, "grad_norm": 0.0006937932722181743, "learning_rate": 8.42529317978823e-06, "loss": 0.0, "step": 488 }, { "epoch": 0.03151382354836631, "grad_norm": 0.0395256834106489, "learning_rate": 8.42807935117786e-06, "loss": 0.0002, "step": 489 }, { "epoch": 0.031578268995295485, "grad_norm": 0.0195828954531926, "learning_rate": 8.430859830691432e-06, "loss": 0.0001, "step": 490 }, { "epoch": 0.031642714442224655, "grad_norm": 0.2022134980256737, "learning_rate": 8.43363464153744e-06, "loss": 0.0012, "step": 491 }, { "epoch": 0.03170715988915383, "grad_norm": 0.16900205486186845, "learning_rate": 8.436403806782708e-06, "loss": 0.0004, "step": 492 }, { "epoch": 0.031771605336083, "grad_norm": 0.0006769181918546838, "learning_rate": 8.43916734935356e-06, "loss": 0.0, "step": 493 }, { "epoch": 0.03183605078301218, "grad_norm": 0.013904361173820282, "learning_rate": 8.44192529203695e-06, "loss": 0.0001, "step": 494 }, { "epoch": 0.03190049622994136, "grad_norm": 0.004376416113878596, "learning_rate": 8.444677657481581e-06, "loss": 0.0, "step": 495 }, { "epoch": 0.03196494167687053, "grad_norm": 0.02817264340354307, "learning_rate": 8.44742446819904e-06, "loss": 0.0003, "step": 496 }, { "epoch": 0.032029387123799706, "grad_norm": 2.2377603134379584, "learning_rate": 8.450165746564882e-06, "loss": 0.0117, "step": 497 }, { "epoch": 0.03209383257072888, "grad_norm": 0.002553285998667623, "learning_rate": 8.452901514819742e-06, "loss": 0.0, "step": 498 }, { "epoch": 0.032158278017658054, "grad_norm": 0.00034067570616675476, "learning_rate": 8.455631795070407e-06, "loss": 0.0, "step": 499 }, { "epoch": 0.032222723464587225, "grad_norm": 0.35536890409234767, "learning_rate": 8.45835660929089e-06, "loss": 0.0018, "step": 500 }, { "epoch": 0.0322871689115164, "grad_norm": 0.003508252971943265, "learning_rate": 8.461075979323489e-06, "loss": 0.0, "step": 501 }, { "epoch": 0.03235161435844557, "grad_norm": 0.7101809909652864, "learning_rate": 8.463789926879845e-06, "loss": 0.0045, "step": 502 }, { "epoch": 0.03241605980537475, "grad_norm": 0.0013628442685876426, "learning_rate": 8.466498473541972e-06, "loss": 0.0, "step": 503 }, { "epoch": 0.03248050525230393, "grad_norm": 0.34192186522539375, "learning_rate": 8.469201640763297e-06, "loss": 0.0016, "step": 504 }, { "epoch": 0.0325449506992331, "grad_norm": 0.0034762216357610365, "learning_rate": 8.471899449869671e-06, "loss": 0.0, "step": 505 }, { "epoch": 0.032609396146162276, "grad_norm": 0.12617818828412725, "learning_rate": 8.474591922060378e-06, "loss": 0.0004, "step": 506 }, { "epoch": 0.032673841593091446, "grad_norm": 0.004196338433318039, "learning_rate": 8.47727907840915e-06, "loss": 0.0, "step": 507 }, { "epoch": 0.032738287040020624, "grad_norm": 0.00876693332198306, "learning_rate": 8.479960939865135e-06, "loss": 0.0001, "step": 508 }, { "epoch": 0.032802732486949794, "grad_norm": 0.0004212675822051207, "learning_rate": 8.482637527253888e-06, "loss": 0.0, "step": 509 }, { "epoch": 0.03286717793387897, "grad_norm": 0.004896906167491933, "learning_rate": 8.485308861278346e-06, "loss": 0.0, "step": 510 }, { "epoch": 0.03293162338080815, "grad_norm": 0.0006167679203622598, "learning_rate": 8.487974962519779e-06, "loss": 0.0, "step": 511 }, { "epoch": 0.03299606882773732, "grad_norm": 0.25578066656271004, "learning_rate": 8.490635851438747e-06, "loss": 0.0011, "step": 512 }, { "epoch": 0.0330605142746665, "grad_norm": 0.016222739160781905, "learning_rate": 8.493291548376036e-06, "loss": 0.0001, "step": 513 }, { "epoch": 0.03312495972159567, "grad_norm": 0.00399666735491616, "learning_rate": 8.495942073553605e-06, "loss": 0.0, "step": 514 }, { "epoch": 0.033189405168524845, "grad_norm": 0.9266883816050113, "learning_rate": 8.498587447075492e-06, "loss": 0.0033, "step": 515 }, { "epoch": 0.033253850615454016, "grad_norm": 0.0028993043429181614, "learning_rate": 8.50122768892874e-06, "loss": 0.0, "step": 516 }, { "epoch": 0.03331829606238319, "grad_norm": 0.27063704065226896, "learning_rate": 8.503862818984304e-06, "loss": 0.0021, "step": 517 }, { "epoch": 0.033382741509312364, "grad_norm": 0.010379239929541071, "learning_rate": 8.50649285699794e-06, "loss": 0.0, "step": 518 }, { "epoch": 0.03344718695624154, "grad_norm": 0.35729035839734724, "learning_rate": 8.509117822611103e-06, "loss": 0.0023, "step": 519 }, { "epoch": 0.03351163240317072, "grad_norm": 0.30007881033376443, "learning_rate": 8.511737735351822e-06, "loss": 0.0011, "step": 520 }, { "epoch": 0.03357607785009989, "grad_norm": 0.12294530839587339, "learning_rate": 8.514352614635567e-06, "loss": 0.0001, "step": 521 }, { "epoch": 0.03364052329702907, "grad_norm": 0.11914525257566484, "learning_rate": 8.516962479766123e-06, "loss": 0.0002, "step": 522 }, { "epoch": 0.03370496874395824, "grad_norm": 0.03495502834888825, "learning_rate": 8.51956734993644e-06, "loss": 0.0001, "step": 523 }, { "epoch": 0.033769414190887415, "grad_norm": 0.34509190983321814, "learning_rate": 8.522167244229476e-06, "loss": 0.0104, "step": 524 }, { "epoch": 0.033833859637816585, "grad_norm": 0.2735239028658671, "learning_rate": 8.524762181619042e-06, "loss": 0.0005, "step": 525 }, { "epoch": 0.03389830508474576, "grad_norm": 0.4540046998770102, "learning_rate": 8.527352180970633e-06, "loss": 0.0051, "step": 526 }, { "epoch": 0.03396275053167494, "grad_norm": 0.016232761018629926, "learning_rate": 8.529937261042241e-06, "loss": 0.0, "step": 527 }, { "epoch": 0.03402719597860411, "grad_norm": 0.08212446411675045, "learning_rate": 8.532517440485183e-06, "loss": 0.0001, "step": 528 }, { "epoch": 0.03409164142553329, "grad_norm": 0.077195666137957, "learning_rate": 8.535092737844902e-06, "loss": 0.0001, "step": 529 }, { "epoch": 0.03415608687246246, "grad_norm": 0.3339083512543114, "learning_rate": 8.537663171561763e-06, "loss": 0.0034, "step": 530 }, { "epoch": 0.034220532319391636, "grad_norm": 0.06010152089013261, "learning_rate": 8.540228759971857e-06, "loss": 0.0001, "step": 531 }, { "epoch": 0.03428497776632081, "grad_norm": 0.10862170243882367, "learning_rate": 8.542789521307773e-06, "loss": 0.0001, "step": 532 }, { "epoch": 0.034349423213249984, "grad_norm": 0.0018621187038332806, "learning_rate": 8.545345473699385e-06, "loss": 0.0, "step": 533 }, { "epoch": 0.03441386866017916, "grad_norm": 0.5433106478048796, "learning_rate": 8.547896635174616e-06, "loss": 0.0025, "step": 534 }, { "epoch": 0.03447831410710833, "grad_norm": 0.02162303799301777, "learning_rate": 8.550443023660201e-06, "loss": 0.0, "step": 535 }, { "epoch": 0.03454275955403751, "grad_norm": 0.16319136153064215, "learning_rate": 8.55298465698245e-06, "loss": 0.0017, "step": 536 }, { "epoch": 0.03460720500096668, "grad_norm": 0.03575839023267142, "learning_rate": 8.555521552867987e-06, "loss": 0.0, "step": 537 }, { "epoch": 0.03467165044789586, "grad_norm": 0.21404037256804487, "learning_rate": 8.558053728944501e-06, "loss": 0.0002, "step": 538 }, { "epoch": 0.03473609589482503, "grad_norm": 0.007927826619897955, "learning_rate": 8.560581202741473e-06, "loss": 0.0, "step": 539 }, { "epoch": 0.034800541341754206, "grad_norm": 0.0006026644939430921, "learning_rate": 8.563103991690909e-06, "loss": 0.0, "step": 540 }, { "epoch": 0.034864986788683376, "grad_norm": 0.001941445168647103, "learning_rate": 8.565622113128059e-06, "loss": 0.0, "step": 541 }, { "epoch": 0.034929432235612554, "grad_norm": 0.048530455674446166, "learning_rate": 8.568135584292137e-06, "loss": 0.0007, "step": 542 }, { "epoch": 0.03499387768254173, "grad_norm": 0.0007215121741700993, "learning_rate": 8.570644422327015e-06, "loss": 0.0, "step": 543 }, { "epoch": 0.0350583231294709, "grad_norm": 0.0026185945721582315, "learning_rate": 8.57314864428195e-06, "loss": 0.0, "step": 544 }, { "epoch": 0.03512276857640008, "grad_norm": 0.3393699807556369, "learning_rate": 8.575648267112246e-06, "loss": 0.0049, "step": 545 }, { "epoch": 0.03518721402332925, "grad_norm": 0.19151880886172956, "learning_rate": 8.578143307679974e-06, "loss": 0.0008, "step": 546 }, { "epoch": 0.03525165947025843, "grad_norm": 0.04460153439407813, "learning_rate": 8.580633782754635e-06, "loss": 0.0005, "step": 547 }, { "epoch": 0.0353161049171876, "grad_norm": 0.271443576628783, "learning_rate": 8.583119709013842e-06, "loss": 0.0013, "step": 548 }, { "epoch": 0.035380550364116775, "grad_norm": 0.0041845092527643, "learning_rate": 8.585601103043993e-06, "loss": 0.0, "step": 549 }, { "epoch": 0.03544499581104595, "grad_norm": 0.007688605143187043, "learning_rate": 8.58807798134093e-06, "loss": 0.0, "step": 550 }, { "epoch": 0.03550944125797512, "grad_norm": 0.0011524979515473482, "learning_rate": 8.5905503603106e-06, "loss": 0.0, "step": 551 }, { "epoch": 0.0355738867049043, "grad_norm": 0.004469706196756287, "learning_rate": 8.593018256269704e-06, "loss": 0.0, "step": 552 }, { "epoch": 0.03563833215183347, "grad_norm": 0.0017258531883411576, "learning_rate": 8.595481685446355e-06, "loss": 0.0, "step": 553 }, { "epoch": 0.03570277759876265, "grad_norm": 0.011111525603690426, "learning_rate": 8.597940663980695e-06, "loss": 0.0, "step": 554 }, { "epoch": 0.03576722304569182, "grad_norm": 0.0024214964647003984, "learning_rate": 8.600395207925551e-06, "loss": 0.0, "step": 555 }, { "epoch": 0.035831668492621, "grad_norm": 0.002982207597299224, "learning_rate": 8.602845333247056e-06, "loss": 0.0, "step": 556 }, { "epoch": 0.035896113939550174, "grad_norm": 0.0003296500930890877, "learning_rate": 8.605291055825273e-06, "loss": 0.0, "step": 557 }, { "epoch": 0.035960559386479345, "grad_norm": 0.02805379203644968, "learning_rate": 8.607732391454803e-06, "loss": 0.0, "step": 558 }, { "epoch": 0.03602500483340852, "grad_norm": 0.007168128548352624, "learning_rate": 8.610169355845417e-06, "loss": 0.0, "step": 559 }, { "epoch": 0.03608945028033769, "grad_norm": 0.008010737554365606, "learning_rate": 8.612601964622646e-06, "loss": 0.0, "step": 560 }, { "epoch": 0.03615389572726687, "grad_norm": 0.006762966873986022, "learning_rate": 8.615030233328387e-06, "loss": 0.0001, "step": 561 }, { "epoch": 0.03621834117419604, "grad_norm": 0.00104232572711473, "learning_rate": 8.617454177421499e-06, "loss": 0.0, "step": 562 }, { "epoch": 0.03628278662112522, "grad_norm": 2.3966805549541053, "learning_rate": 8.619873812278397e-06, "loss": 0.0212, "step": 563 }, { "epoch": 0.03634723206805439, "grad_norm": 0.002023202811533298, "learning_rate": 8.622289153193632e-06, "loss": 0.0, "step": 564 }, { "epoch": 0.036411677514983566, "grad_norm": 0.08835435758202559, "learning_rate": 8.624700215380469e-06, "loss": 0.0, "step": 565 }, { "epoch": 0.036476122961912744, "grad_norm": 0.008966135734965502, "learning_rate": 8.62710701397147e-06, "loss": 0.0, "step": 566 }, { "epoch": 0.036540568408841914, "grad_norm": 0.3744501413902579, "learning_rate": 8.62950956401906e-06, "loss": 0.0006, "step": 567 }, { "epoch": 0.03660501385577109, "grad_norm": 0.074906811338477, "learning_rate": 8.631907880496095e-06, "loss": 0.0002, "step": 568 }, { "epoch": 0.03666945930270026, "grad_norm": 0.0002814712078328361, "learning_rate": 8.634301978296409e-06, "loss": 0.0, "step": 569 }, { "epoch": 0.03673390474962944, "grad_norm": 0.0007091569765689754, "learning_rate": 8.636691872235386e-06, "loss": 0.0, "step": 570 }, { "epoch": 0.03679835019655861, "grad_norm": 0.15370993098858876, "learning_rate": 8.639077577050498e-06, "loss": 0.0012, "step": 571 }, { "epoch": 0.03686279564348779, "grad_norm": 0.09234985727520918, "learning_rate": 8.64145910740186e-06, "loss": 0.002, "step": 572 }, { "epoch": 0.036927241090416965, "grad_norm": 0.19519613614308395, "learning_rate": 8.643836477872763e-06, "loss": 0.0006, "step": 573 }, { "epoch": 0.036991686537346136, "grad_norm": 0.13928936096695185, "learning_rate": 8.64620970297021e-06, "loss": 0.0021, "step": 574 }, { "epoch": 0.03705613198427531, "grad_norm": 0.013526277478610292, "learning_rate": 8.648578797125451e-06, "loss": 0.0, "step": 575 }, { "epoch": 0.037120577431204484, "grad_norm": 0.036371678731640995, "learning_rate": 8.65094377469451e-06, "loss": 0.0001, "step": 576 }, { "epoch": 0.03718502287813366, "grad_norm": 0.011606647574347847, "learning_rate": 8.6533046499587e-06, "loss": 0.0, "step": 577 }, { "epoch": 0.03724946832506283, "grad_norm": 0.00018151496333934717, "learning_rate": 8.655661437125153e-06, "loss": 0.0, "step": 578 }, { "epoch": 0.03731391377199201, "grad_norm": 0.017874406288697416, "learning_rate": 8.658014150327315e-06, "loss": 0.0002, "step": 579 }, { "epoch": 0.03737835921892119, "grad_norm": 0.0006531733163911647, "learning_rate": 8.66036280362547e-06, "loss": 0.0, "step": 580 }, { "epoch": 0.03744280466585036, "grad_norm": 0.0015501879208044541, "learning_rate": 8.662707411007244e-06, "loss": 0.0, "step": 581 }, { "epoch": 0.037507250112779535, "grad_norm": 0.0014950575720748577, "learning_rate": 8.665047986388091e-06, "loss": 0.0, "step": 582 }, { "epoch": 0.037571695559708705, "grad_norm": 0.00018370767475683753, "learning_rate": 8.667384543611802e-06, "loss": 0.0, "step": 583 }, { "epoch": 0.03763614100663788, "grad_norm": 0.5265942085748113, "learning_rate": 8.669717096450994e-06, "loss": 0.0023, "step": 584 }, { "epoch": 0.03770058645356705, "grad_norm": 0.0009574933177824663, "learning_rate": 8.672045658607586e-06, "loss": 0.0, "step": 585 }, { "epoch": 0.03776503190049623, "grad_norm": 0.6674461997778207, "learning_rate": 8.674370243713298e-06, "loss": 0.0003, "step": 586 }, { "epoch": 0.0378294773474254, "grad_norm": 0.7010925375594455, "learning_rate": 8.676690865330125e-06, "loss": 0.0029, "step": 587 }, { "epoch": 0.03789392279435458, "grad_norm": 0.00532852021202004, "learning_rate": 8.6790075369508e-06, "loss": 0.0, "step": 588 }, { "epoch": 0.037958368241283756, "grad_norm": 0.005449903198530545, "learning_rate": 8.68132027199928e-06, "loss": 0.0, "step": 589 }, { "epoch": 0.03802281368821293, "grad_norm": 0.10746727764211438, "learning_rate": 8.683629083831205e-06, "loss": 0.0002, "step": 590 }, { "epoch": 0.038087259135142104, "grad_norm": 0.016064785820550875, "learning_rate": 8.685933985734367e-06, "loss": 0.0, "step": 591 }, { "epoch": 0.038151704582071275, "grad_norm": 0.0029501290312424723, "learning_rate": 8.688234990929155e-06, "loss": 0.0, "step": 592 }, { "epoch": 0.03821615002900045, "grad_norm": 0.0013237907721366717, "learning_rate": 8.690532112569025e-06, "loss": 0.0, "step": 593 }, { "epoch": 0.03828059547592962, "grad_norm": 0.2628805365475962, "learning_rate": 8.692825363740948e-06, "loss": 0.0008, "step": 594 }, { "epoch": 0.0383450409228588, "grad_norm": 0.002112624002434478, "learning_rate": 8.69511475746585e-06, "loss": 0.0, "step": 595 }, { "epoch": 0.03840948636978798, "grad_norm": 0.14168079395771904, "learning_rate": 8.69740030669906e-06, "loss": 0.0013, "step": 596 }, { "epoch": 0.03847393181671715, "grad_norm": 0.21190947426606038, "learning_rate": 8.699682024330754e-06, "loss": 0.003, "step": 597 }, { "epoch": 0.038538377263646326, "grad_norm": 0.003101969641557718, "learning_rate": 8.701959923186383e-06, "loss": 0.0, "step": 598 }, { "epoch": 0.038602822710575496, "grad_norm": 0.009290021857474596, "learning_rate": 8.704234016027115e-06, "loss": 0.0, "step": 599 }, { "epoch": 0.038667268157504674, "grad_norm": 0.004114591117807323, "learning_rate": 8.706504315550256e-06, "loss": 0.0, "step": 600 }, { "epoch": 0.038731713604433844, "grad_norm": 0.16130475782513984, "learning_rate": 8.708770834389678e-06, "loss": 0.002, "step": 601 }, { "epoch": 0.03879615905136302, "grad_norm": 0.003502346821683403, "learning_rate": 8.71103358511624e-06, "loss": 0.0, "step": 602 }, { "epoch": 0.03886060449829219, "grad_norm": 0.002125771925744141, "learning_rate": 8.713292580238214e-06, "loss": 0.0, "step": 603 }, { "epoch": 0.03892504994522137, "grad_norm": 0.00033336581023886793, "learning_rate": 8.715547832201683e-06, "loss": 0.0, "step": 604 }, { "epoch": 0.03898949539215055, "grad_norm": 0.032290644222526255, "learning_rate": 8.71779935339097e-06, "loss": 0.0001, "step": 605 }, { "epoch": 0.03905394083907972, "grad_norm": 14.158548400095667, "learning_rate": 8.720047156129036e-06, "loss": 0.0439, "step": 606 }, { "epoch": 0.039118386286008895, "grad_norm": 0.12495020157562878, "learning_rate": 8.722291252677892e-06, "loss": 0.0006, "step": 607 }, { "epoch": 0.039182831732938066, "grad_norm": 0.0017411839254716896, "learning_rate": 8.724531655238988e-06, "loss": 0.0, "step": 608 }, { "epoch": 0.03924727717986724, "grad_norm": 0.0005025559677200699, "learning_rate": 8.726768375953625e-06, "loss": 0.0, "step": 609 }, { "epoch": 0.039311722626796414, "grad_norm": 0.00043016709712481065, "learning_rate": 8.729001426903342e-06, "loss": 0.0, "step": 610 }, { "epoch": 0.03937616807372559, "grad_norm": 0.011355258759439064, "learning_rate": 8.731230820110307e-06, "loss": 0.0, "step": 611 }, { "epoch": 0.03944061352065477, "grad_norm": 0.018387622737482987, "learning_rate": 8.733456567537714e-06, "loss": 0.0002, "step": 612 }, { "epoch": 0.03950505896758394, "grad_norm": 0.012411244035460034, "learning_rate": 8.735678681090153e-06, "loss": 0.0, "step": 613 }, { "epoch": 0.03956950441451312, "grad_norm": 0.01917335761000915, "learning_rate": 8.737897172614007e-06, "loss": 0.0001, "step": 614 }, { "epoch": 0.03963394986144229, "grad_norm": 0.0012099117817259695, "learning_rate": 8.74011205389782e-06, "loss": 0.0, "step": 615 }, { "epoch": 0.039698395308371465, "grad_norm": 0.011013451182717668, "learning_rate": 8.742323336672685e-06, "loss": 0.0, "step": 616 }, { "epoch": 0.039762840755300635, "grad_norm": 0.0009947618007444038, "learning_rate": 8.744531032612602e-06, "loss": 0.0, "step": 617 }, { "epoch": 0.03982728620222981, "grad_norm": 0.007576749125775888, "learning_rate": 8.746735153334857e-06, "loss": 0.0001, "step": 618 }, { "epoch": 0.03989173164915899, "grad_norm": 0.117186272137885, "learning_rate": 8.74893571040039e-06, "loss": 0.0017, "step": 619 }, { "epoch": 0.03995617709608816, "grad_norm": 0.11402360435857845, "learning_rate": 8.751132715314151e-06, "loss": 0.0002, "step": 620 }, { "epoch": 0.04002062254301734, "grad_norm": 2.0487442217612113, "learning_rate": 8.75332617952547e-06, "loss": 0.0063, "step": 621 }, { "epoch": 0.04008506798994651, "grad_norm": 0.2021698686969095, "learning_rate": 8.755516114428408e-06, "loss": 0.0005, "step": 622 }, { "epoch": 0.040149513436875686, "grad_norm": 0.0006979889077255704, "learning_rate": 8.757702531362116e-06, "loss": 0.0, "step": 623 }, { "epoch": 0.04021395888380486, "grad_norm": 0.0057224222695230215, "learning_rate": 8.759885441611187e-06, "loss": 0.0, "step": 624 }, { "epoch": 0.040278404330734034, "grad_norm": 0.0025807465767095474, "learning_rate": 8.762064856406002e-06, "loss": 0.0, "step": 625 }, { "epoch": 0.040342849777663205, "grad_norm": 0.3693147077263644, "learning_rate": 8.76424078692308e-06, "loss": 0.0005, "step": 626 }, { "epoch": 0.04040729522459238, "grad_norm": 0.0006090807770591003, "learning_rate": 8.766413244285424e-06, "loss": 0.0, "step": 627 }, { "epoch": 0.04047174067152156, "grad_norm": 0.03648462138970883, "learning_rate": 8.768582239562856e-06, "loss": 0.0001, "step": 628 }, { "epoch": 0.04053618611845073, "grad_norm": 0.0005767880342411257, "learning_rate": 8.770747783772357e-06, "loss": 0.0, "step": 629 }, { "epoch": 0.04060063156537991, "grad_norm": 0.0006805918616400947, "learning_rate": 8.77290988787841e-06, "loss": 0.0, "step": 630 }, { "epoch": 0.04066507701230908, "grad_norm": 0.004117036655468099, "learning_rate": 8.775068562793323e-06, "loss": 0.0, "step": 631 }, { "epoch": 0.040729522459238256, "grad_norm": 0.0007426168119818877, "learning_rate": 8.777223819377568e-06, "loss": 0.0, "step": 632 }, { "epoch": 0.040793967906167426, "grad_norm": 0.011304413014642186, "learning_rate": 8.7793756684401e-06, "loss": 0.0, "step": 633 }, { "epoch": 0.040858413353096604, "grad_norm": 0.0004935244143254786, "learning_rate": 8.78152412073869e-06, "loss": 0.0, "step": 634 }, { "epoch": 0.04092285880002578, "grad_norm": 0.007054770802510865, "learning_rate": 8.783669186980247e-06, "loss": 0.0, "step": 635 }, { "epoch": 0.04098730424695495, "grad_norm": 0.01369389714554964, "learning_rate": 8.78581087782113e-06, "loss": 0.0, "step": 636 }, { "epoch": 0.04105174969388413, "grad_norm": 0.011906267839883657, "learning_rate": 8.787949203867476e-06, "loss": 0.0, "step": 637 }, { "epoch": 0.0411161951408133, "grad_norm": 0.006885284197584247, "learning_rate": 8.790084175675511e-06, "loss": 0.0, "step": 638 }, { "epoch": 0.04118064058774248, "grad_norm": 0.4487910847199546, "learning_rate": 8.792215803751859e-06, "loss": 0.0024, "step": 639 }, { "epoch": 0.04124508603467165, "grad_norm": 0.9246561480973362, "learning_rate": 8.794344098553859e-06, "loss": 0.0008, "step": 640 }, { "epoch": 0.041309531481600825, "grad_norm": 0.04488668151610257, "learning_rate": 8.796469070489866e-06, "loss": 0.0005, "step": 641 }, { "epoch": 0.04137397692853, "grad_norm": 0.027408085647675932, "learning_rate": 8.798590729919567e-06, "loss": 0.0, "step": 642 }, { "epoch": 0.04143842237545917, "grad_norm": 0.0017001050288757774, "learning_rate": 8.800709087154267e-06, "loss": 0.0, "step": 643 }, { "epoch": 0.04150286782238835, "grad_norm": 0.16256402501267625, "learning_rate": 8.802824152457208e-06, "loss": 0.0003, "step": 644 }, { "epoch": 0.04156731326931752, "grad_norm": 0.011062200472460433, "learning_rate": 8.804935936043852e-06, "loss": 0.0, "step": 645 }, { "epoch": 0.0416317587162467, "grad_norm": 0.021831654093761303, "learning_rate": 8.80704444808219e-06, "loss": 0.0001, "step": 646 }, { "epoch": 0.04169620416317587, "grad_norm": 0.06078771214529044, "learning_rate": 8.809149698693027e-06, "loss": 0.0001, "step": 647 }, { "epoch": 0.04176064961010505, "grad_norm": 0.005953470628247643, "learning_rate": 8.811251697950276e-06, "loss": 0.0, "step": 648 }, { "epoch": 0.04182509505703422, "grad_norm": 0.2050906677043541, "learning_rate": 8.813350455881246e-06, "loss": 0.0007, "step": 649 }, { "epoch": 0.041889540503963395, "grad_norm": 0.1971951160380812, "learning_rate": 8.815445982466934e-06, "loss": 0.0003, "step": 650 }, { "epoch": 0.04195398595089257, "grad_norm": 0.00236395117113748, "learning_rate": 8.817538287642305e-06, "loss": 0.0, "step": 651 }, { "epoch": 0.04201843139782174, "grad_norm": 0.1256566977010292, "learning_rate": 8.819627381296574e-06, "loss": 0.0007, "step": 652 }, { "epoch": 0.04208287684475092, "grad_norm": 0.07903941680245861, "learning_rate": 8.82171327327349e-06, "loss": 0.0003, "step": 653 }, { "epoch": 0.04214732229168009, "grad_norm": 0.39863904829441993, "learning_rate": 8.823795973371614e-06, "loss": 0.0012, "step": 654 }, { "epoch": 0.04221176773860927, "grad_norm": 0.035371382107604585, "learning_rate": 8.825875491344588e-06, "loss": 0.0001, "step": 655 }, { "epoch": 0.04227621318553844, "grad_norm": 0.02279703556094779, "learning_rate": 8.827951836901422e-06, "loss": 0.0, "step": 656 }, { "epoch": 0.042340658632467616, "grad_norm": 0.26524165484813206, "learning_rate": 8.830025019706755e-06, "loss": 0.0005, "step": 657 }, { "epoch": 0.042405104079396794, "grad_norm": 0.01918668875669408, "learning_rate": 8.832095049381132e-06, "loss": 0.0001, "step": 658 }, { "epoch": 0.042469549526325964, "grad_norm": 0.02418418424942438, "learning_rate": 8.834161935501262e-06, "loss": 0.0002, "step": 659 }, { "epoch": 0.04253399497325514, "grad_norm": 0.0029525133059810048, "learning_rate": 8.836225687600296e-06, "loss": 0.0, "step": 660 }, { "epoch": 0.04259844042018431, "grad_norm": 0.06152243840450862, "learning_rate": 8.838286315168083e-06, "loss": 0.0001, "step": 661 }, { "epoch": 0.04266288586711349, "grad_norm": 0.05055057189679331, "learning_rate": 8.840343827651438e-06, "loss": 0.0004, "step": 662 }, { "epoch": 0.04272733131404266, "grad_norm": 0.00030230320615462757, "learning_rate": 8.842398234454391e-06, "loss": 0.0, "step": 663 }, { "epoch": 0.04279177676097184, "grad_norm": 0.022563544469127325, "learning_rate": 8.844449544938457e-06, "loss": 0.0, "step": 664 }, { "epoch": 0.042856222207901015, "grad_norm": 0.0010129811466353747, "learning_rate": 8.846497768422887e-06, "loss": 0.0, "step": 665 }, { "epoch": 0.042920667654830186, "grad_norm": 0.10487420670137693, "learning_rate": 8.84854291418492e-06, "loss": 0.0001, "step": 666 }, { "epoch": 0.04298511310175936, "grad_norm": 0.13032998283350847, "learning_rate": 8.850584991460033e-06, "loss": 0.0024, "step": 667 }, { "epoch": 0.043049558548688534, "grad_norm": 0.12460349571325573, "learning_rate": 8.852624009442204e-06, "loss": 0.0001, "step": 668 }, { "epoch": 0.04311400399561771, "grad_norm": 0.0003381800828124894, "learning_rate": 8.854659977284147e-06, "loss": 0.0, "step": 669 }, { "epoch": 0.04317844944254688, "grad_norm": 0.007937901512867782, "learning_rate": 8.856692904097561e-06, "loss": 0.0001, "step": 670 }, { "epoch": 0.04324289488947606, "grad_norm": 0.0008805466292661837, "learning_rate": 8.85872279895338e-06, "loss": 0.0, "step": 671 }, { "epoch": 0.04330734033640523, "grad_norm": 0.17617806573889097, "learning_rate": 8.860749670882013e-06, "loss": 0.0005, "step": 672 }, { "epoch": 0.04337178578333441, "grad_norm": 0.00395744069753078, "learning_rate": 8.862773528873578e-06, "loss": 0.0, "step": 673 }, { "epoch": 0.043436231230263585, "grad_norm": 0.007247134466748965, "learning_rate": 8.864794381878157e-06, "loss": 0.0, "step": 674 }, { "epoch": 0.043500676677192755, "grad_norm": 0.0030663660709385806, "learning_rate": 8.86681223880602e-06, "loss": 0.0, "step": 675 }, { "epoch": 0.04356512212412193, "grad_norm": 0.017393769671059423, "learning_rate": 8.868827108527866e-06, "loss": 0.0, "step": 676 }, { "epoch": 0.0436295675710511, "grad_norm": 0.0017702782273335328, "learning_rate": 8.87083899987505e-06, "loss": 0.0, "step": 677 }, { "epoch": 0.04369401301798028, "grad_norm": 0.00022959416712921537, "learning_rate": 8.872847921639834e-06, "loss": 0.0, "step": 678 }, { "epoch": 0.04375845846490945, "grad_norm": 0.003605868948068563, "learning_rate": 8.874853882575593e-06, "loss": 0.0, "step": 679 }, { "epoch": 0.04382290391183863, "grad_norm": 0.01744342059980127, "learning_rate": 8.876856891397061e-06, "loss": 0.0001, "step": 680 }, { "epoch": 0.043887349358767806, "grad_norm": 7.388464259112656e-05, "learning_rate": 8.878856956780554e-06, "loss": 0.0, "step": 681 }, { "epoch": 0.04395179480569698, "grad_norm": 0.0013601676504224963, "learning_rate": 8.880854087364192e-06, "loss": 0.0, "step": 682 }, { "epoch": 0.044016240252626154, "grad_norm": 0.2798160049980701, "learning_rate": 8.882848291748122e-06, "loss": 0.0011, "step": 683 }, { "epoch": 0.044080685699555325, "grad_norm": 0.14433741459071334, "learning_rate": 8.884839578494751e-06, "loss": 0.0004, "step": 684 }, { "epoch": 0.0441451311464845, "grad_norm": 0.24179156302192892, "learning_rate": 8.886827956128954e-06, "loss": 0.002, "step": 685 }, { "epoch": 0.04420957659341367, "grad_norm": 0.0021927636562543086, "learning_rate": 8.8888134331383e-06, "loss": 0.0, "step": 686 }, { "epoch": 0.04427402204034285, "grad_norm": 0.0022068358536655036, "learning_rate": 8.890796017973267e-06, "loss": 0.0, "step": 687 }, { "epoch": 0.04433846748727202, "grad_norm": 0.005372660175485759, "learning_rate": 8.892775719047455e-06, "loss": 0.0, "step": 688 }, { "epoch": 0.0444029129342012, "grad_norm": 0.03758966528205878, "learning_rate": 8.894752544737809e-06, "loss": 0.0001, "step": 689 }, { "epoch": 0.044467358381130376, "grad_norm": 0.012306964234103844, "learning_rate": 8.896726503384818e-06, "loss": 0.0, "step": 690 }, { "epoch": 0.044531803828059546, "grad_norm": 0.00020675266164698914, "learning_rate": 8.898697603292746e-06, "loss": 0.0, "step": 691 }, { "epoch": 0.044596249274988724, "grad_norm": 0.0004554410369827967, "learning_rate": 8.900665852729818e-06, "loss": 0.0, "step": 692 }, { "epoch": 0.044660694721917894, "grad_norm": 0.007791950816193934, "learning_rate": 8.90263125992845e-06, "loss": 0.0, "step": 693 }, { "epoch": 0.04472514016884707, "grad_norm": 0.18602995977878675, "learning_rate": 8.904593833085437e-06, "loss": 0.0005, "step": 694 }, { "epoch": 0.04478958561577624, "grad_norm": 0.013594795936494878, "learning_rate": 8.90655358036217e-06, "loss": 0.0001, "step": 695 }, { "epoch": 0.04485403106270542, "grad_norm": 0.00023714633586855382, "learning_rate": 8.908510509884837e-06, "loss": 0.0, "step": 696 }, { "epoch": 0.0449184765096346, "grad_norm": 6.832725945019009e-05, "learning_rate": 8.910464629744626e-06, "loss": 0.0, "step": 697 }, { "epoch": 0.04498292195656377, "grad_norm": 0.016889844296641223, "learning_rate": 8.912415947997922e-06, "loss": 0.0003, "step": 698 }, { "epoch": 0.045047367403492945, "grad_norm": 0.2184353592157856, "learning_rate": 8.914364472666504e-06, "loss": 0.0026, "step": 699 }, { "epoch": 0.045111812850422116, "grad_norm": 0.25465223325641045, "learning_rate": 8.916310211737758e-06, "loss": 0.0008, "step": 700 }, { "epoch": 0.04517625829735129, "grad_norm": 0.003655815925584125, "learning_rate": 8.918253173164853e-06, "loss": 0.0, "step": 701 }, { "epoch": 0.045240703744280464, "grad_norm": 0.0006114981310893153, "learning_rate": 8.92019336486695e-06, "loss": 0.0, "step": 702 }, { "epoch": 0.04530514919120964, "grad_norm": 0.00010137673386137189, "learning_rate": 8.922130794729396e-06, "loss": 0.0, "step": 703 }, { "epoch": 0.04536959463813882, "grad_norm": 0.07228969160342366, "learning_rate": 8.924065470603898e-06, "loss": 0.0001, "step": 704 }, { "epoch": 0.04543404008506799, "grad_norm": 0.1913084734088233, "learning_rate": 8.925997400308744e-06, "loss": 0.0004, "step": 705 }, { "epoch": 0.04549848553199717, "grad_norm": 0.07917869010187947, "learning_rate": 8.927926591628964e-06, "loss": 0.0003, "step": 706 }, { "epoch": 0.04556293097892634, "grad_norm": 0.06977438315324237, "learning_rate": 8.929853052316538e-06, "loss": 0.0004, "step": 707 }, { "epoch": 0.045627376425855515, "grad_norm": 0.034029532033426, "learning_rate": 8.931776790090572e-06, "loss": 0.0001, "step": 708 }, { "epoch": 0.045691821872784685, "grad_norm": 0.08348560267343069, "learning_rate": 8.933697812637488e-06, "loss": 0.0002, "step": 709 }, { "epoch": 0.04575626731971386, "grad_norm": 0.051544153316252506, "learning_rate": 8.935616127611207e-06, "loss": 0.0017, "step": 710 }, { "epoch": 0.04582071276664303, "grad_norm": 0.0763091812179522, "learning_rate": 8.937531742633331e-06, "loss": 0.0001, "step": 711 }, { "epoch": 0.04588515821357221, "grad_norm": 0.0007173089122636117, "learning_rate": 8.939444665293331e-06, "loss": 0.0, "step": 712 }, { "epoch": 0.04594960366050139, "grad_norm": 0.004598107264150894, "learning_rate": 8.941354903148714e-06, "loss": 0.0, "step": 713 }, { "epoch": 0.04601404910743056, "grad_norm": 0.000392021647295444, "learning_rate": 8.943262463725215e-06, "loss": 0.0, "step": 714 }, { "epoch": 0.046078494554359736, "grad_norm": 0.005239661838366242, "learning_rate": 8.945167354516973e-06, "loss": 0.0, "step": 715 }, { "epoch": 0.04614294000128891, "grad_norm": 0.013039220056873272, "learning_rate": 8.947069582986702e-06, "loss": 0.0, "step": 716 }, { "epoch": 0.046207385448218084, "grad_norm": 0.7156779940111023, "learning_rate": 8.948969156565867e-06, "loss": 0.0028, "step": 717 }, { "epoch": 0.046271830895147255, "grad_norm": 0.0014232633069768458, "learning_rate": 8.950866082654867e-06, "loss": 0.0, "step": 718 }, { "epoch": 0.04633627634207643, "grad_norm": 0.010360478298695039, "learning_rate": 8.952760368623196e-06, "loss": 0.0, "step": 719 }, { "epoch": 0.04640072178900561, "grad_norm": 0.04740542080782063, "learning_rate": 8.954652021809624e-06, "loss": 0.0002, "step": 720 }, { "epoch": 0.04646516723593478, "grad_norm": 0.0013033683657919647, "learning_rate": 8.956541049522359e-06, "loss": 0.0, "step": 721 }, { "epoch": 0.04652961268286396, "grad_norm": 0.00024011095477306243, "learning_rate": 8.958427459039227e-06, "loss": 0.0, "step": 722 }, { "epoch": 0.04659405812979313, "grad_norm": 0.03165036091424995, "learning_rate": 8.960311257607835e-06, "loss": 0.0016, "step": 723 }, { "epoch": 0.046658503576722306, "grad_norm": 0.0037302307404365937, "learning_rate": 8.96219245244573e-06, "loss": 0.0, "step": 724 }, { "epoch": 0.046722949023651476, "grad_norm": 0.11715760644466812, "learning_rate": 8.964071050740584e-06, "loss": 0.0004, "step": 725 }, { "epoch": 0.046787394470580654, "grad_norm": 0.011645889650838596, "learning_rate": 8.965947059650336e-06, "loss": 0.0001, "step": 726 }, { "epoch": 0.04685183991750983, "grad_norm": 0.06984104940667903, "learning_rate": 8.967820486303374e-06, "loss": 0.0003, "step": 727 }, { "epoch": 0.046916285364439, "grad_norm": 0.32142027752446717, "learning_rate": 8.96969133779869e-06, "loss": 0.0005, "step": 728 }, { "epoch": 0.04698073081136818, "grad_norm": 2.551444979388958, "learning_rate": 8.971559621206039e-06, "loss": 0.0197, "step": 729 }, { "epoch": 0.04704517625829735, "grad_norm": 0.002081532217039823, "learning_rate": 8.973425343566106e-06, "loss": 0.0, "step": 730 }, { "epoch": 0.04710962170522653, "grad_norm": 0.25030308471193197, "learning_rate": 8.975288511890657e-06, "loss": 0.0052, "step": 731 }, { "epoch": 0.0471740671521557, "grad_norm": 0.005732779290674802, "learning_rate": 8.977149133162707e-06, "loss": 0.0, "step": 732 }, { "epoch": 0.047238512599084875, "grad_norm": 0.10988615433689451, "learning_rate": 8.979007214336669e-06, "loss": 0.0003, "step": 733 }, { "epoch": 0.047302958046014046, "grad_norm": 0.12829035230675495, "learning_rate": 8.98086276233851e-06, "loss": 0.0003, "step": 734 }, { "epoch": 0.04736740349294322, "grad_norm": 0.007249099234747826, "learning_rate": 8.982715784065911e-06, "loss": 0.0, "step": 735 }, { "epoch": 0.0474318489398724, "grad_norm": 0.019692113896025192, "learning_rate": 8.984566286388422e-06, "loss": 0.0001, "step": 736 }, { "epoch": 0.04749629438680157, "grad_norm": 0.05206044806916652, "learning_rate": 8.986414276147602e-06, "loss": 0.0002, "step": 737 }, { "epoch": 0.04756073983373075, "grad_norm": 0.028860954812302157, "learning_rate": 8.988259760157187e-06, "loss": 0.0001, "step": 738 }, { "epoch": 0.04762518528065992, "grad_norm": 0.001301629719628138, "learning_rate": 8.990102745203234e-06, "loss": 0.0, "step": 739 }, { "epoch": 0.0476896307275891, "grad_norm": 0.002345407602635431, "learning_rate": 8.991943238044267e-06, "loss": 0.0, "step": 740 }, { "epoch": 0.04775407617451827, "grad_norm": 0.12231212697510714, "learning_rate": 8.993781245411428e-06, "loss": 0.0004, "step": 741 }, { "epoch": 0.047818521621447445, "grad_norm": 0.049014023716665675, "learning_rate": 8.995616774008632e-06, "loss": 0.0001, "step": 742 }, { "epoch": 0.04788296706837662, "grad_norm": 0.009068422831759652, "learning_rate": 8.9974498305127e-06, "loss": 0.0, "step": 743 }, { "epoch": 0.04794741251530579, "grad_norm": 0.004244489514567139, "learning_rate": 8.999280421573518e-06, "loss": 0.0, "step": 744 }, { "epoch": 0.04801185796223497, "grad_norm": 0.10947748900064816, "learning_rate": 9.001108553814172e-06, "loss": 0.0003, "step": 745 }, { "epoch": 0.04807630340916414, "grad_norm": 0.008424072505390963, "learning_rate": 9.002934233831098e-06, "loss": 0.0, "step": 746 }, { "epoch": 0.04814074885609332, "grad_norm": 0.006270599434854108, "learning_rate": 9.004757468194222e-06, "loss": 0.0, "step": 747 }, { "epoch": 0.04820519430302249, "grad_norm": 0.01742854568377238, "learning_rate": 9.006578263447102e-06, "loss": 0.0001, "step": 748 }, { "epoch": 0.048269639749951666, "grad_norm": 0.024492512070331957, "learning_rate": 9.008396626107069e-06, "loss": 0.0, "step": 749 }, { "epoch": 0.048334085196880844, "grad_norm": 0.12119974988300376, "learning_rate": 9.010212562665369e-06, "loss": 0.0001, "step": 750 }, { "epoch": 0.048398530643810014, "grad_norm": 0.06679123539568732, "learning_rate": 9.012026079587298e-06, "loss": 0.0001, "step": 751 }, { "epoch": 0.04846297609073919, "grad_norm": 0.07117144798083709, "learning_rate": 9.013837183312346e-06, "loss": 0.0001, "step": 752 }, { "epoch": 0.04852742153766836, "grad_norm": 0.00487018289232718, "learning_rate": 9.015645880254325e-06, "loss": 0.0, "step": 753 }, { "epoch": 0.04859186698459754, "grad_norm": 0.612146846985091, "learning_rate": 9.017452176801516e-06, "loss": 0.0025, "step": 754 }, { "epoch": 0.04865631243152671, "grad_norm": 0.0034136926349857006, "learning_rate": 9.019256079316795e-06, "loss": 0.0, "step": 755 }, { "epoch": 0.04872075787845589, "grad_norm": 0.12108867773077127, "learning_rate": 9.021057594137776e-06, "loss": 0.0002, "step": 756 }, { "epoch": 0.04878520332538506, "grad_norm": 0.004504167955212749, "learning_rate": 9.022856727576939e-06, "loss": 0.0, "step": 757 }, { "epoch": 0.048849648772314236, "grad_norm": 0.07505442996143205, "learning_rate": 9.024653485921763e-06, "loss": 0.0015, "step": 758 }, { "epoch": 0.04891409421924341, "grad_norm": 0.0005453544489624827, "learning_rate": 9.026447875434859e-06, "loss": 0.0, "step": 759 }, { "epoch": 0.048978539666172584, "grad_norm": 0.34459969236495225, "learning_rate": 9.0282399023541e-06, "loss": 0.0002, "step": 760 }, { "epoch": 0.04904298511310176, "grad_norm": 0.18131343537161349, "learning_rate": 9.030029572892756e-06, "loss": 0.0007, "step": 761 }, { "epoch": 0.04910743056003093, "grad_norm": 0.02098021280926811, "learning_rate": 9.031816893239614e-06, "loss": 0.0, "step": 762 }, { "epoch": 0.04917187600696011, "grad_norm": 0.08627074285048919, "learning_rate": 9.033601869559115e-06, "loss": 0.0035, "step": 763 }, { "epoch": 0.04923632145388928, "grad_norm": 0.002511861290816773, "learning_rate": 9.035384507991479e-06, "loss": 0.0, "step": 764 }, { "epoch": 0.04930076690081846, "grad_norm": 0.40511111030437946, "learning_rate": 9.037164814652826e-06, "loss": 0.0008, "step": 765 }, { "epoch": 0.049365212347747635, "grad_norm": 0.14034011913080038, "learning_rate": 9.038942795635312e-06, "loss": 0.0001, "step": 766 }, { "epoch": 0.049429657794676805, "grad_norm": 0.06861259699001675, "learning_rate": 9.040718457007249e-06, "loss": 0.0001, "step": 767 }, { "epoch": 0.04949410324160598, "grad_norm": 0.001581944552875584, "learning_rate": 9.042491804813226e-06, "loss": 0.0, "step": 768 }, { "epoch": 0.04955854868853515, "grad_norm": 0.005308040387458921, "learning_rate": 9.044262845074235e-06, "loss": 0.0, "step": 769 }, { "epoch": 0.04962299413546433, "grad_norm": 0.0019728464218824234, "learning_rate": 9.046031583787798e-06, "loss": 0.0, "step": 770 }, { "epoch": 0.0496874395823935, "grad_norm": 0.003223276699460636, "learning_rate": 9.047798026928083e-06, "loss": 0.0, "step": 771 }, { "epoch": 0.04975188502932268, "grad_norm": 0.021868800717484028, "learning_rate": 9.049562180446029e-06, "loss": 0.0001, "step": 772 }, { "epoch": 0.04981633047625185, "grad_norm": 0.1826315175518166, "learning_rate": 9.051324050269462e-06, "loss": 0.0003, "step": 773 }, { "epoch": 0.04988077592318103, "grad_norm": 0.29998356456240793, "learning_rate": 9.05308364230322e-06, "loss": 0.0011, "step": 774 }, { "epoch": 0.049945221370110204, "grad_norm": 0.5431729488475763, "learning_rate": 9.054840962429265e-06, "loss": 0.0004, "step": 775 }, { "epoch": 0.050009666817039375, "grad_norm": 0.009489328773548649, "learning_rate": 9.056596016506807e-06, "loss": 0.0, "step": 776 }, { "epoch": 0.05007411226396855, "grad_norm": 0.00046785632344008304, "learning_rate": 9.05834881037242e-06, "loss": 0.0, "step": 777 }, { "epoch": 0.05013855771089772, "grad_norm": 0.01111245605224189, "learning_rate": 9.060099349840158e-06, "loss": 0.0001, "step": 778 }, { "epoch": 0.0502030031578269, "grad_norm": 0.07606922165075893, "learning_rate": 9.061847640701665e-06, "loss": 0.0005, "step": 779 }, { "epoch": 0.05026744860475607, "grad_norm": 0.000699417674606527, "learning_rate": 9.063593688726301e-06, "loss": 0.0, "step": 780 }, { "epoch": 0.05033189405168525, "grad_norm": 0.010236254823261563, "learning_rate": 9.065337499661248e-06, "loss": 0.0, "step": 781 }, { "epoch": 0.050396339498614426, "grad_norm": 0.0017980719947457786, "learning_rate": 9.067079079231624e-06, "loss": 0.0, "step": 782 }, { "epoch": 0.050460784945543596, "grad_norm": 0.000939549117843024, "learning_rate": 9.068818433140602e-06, "loss": 0.0, "step": 783 }, { "epoch": 0.050525230392472774, "grad_norm": 0.002781551161266302, "learning_rate": 9.070555567069513e-06, "loss": 0.0, "step": 784 }, { "epoch": 0.050589675839401944, "grad_norm": 0.0035230904250238056, "learning_rate": 9.072290486677968e-06, "loss": 0.0, "step": 785 }, { "epoch": 0.05065412128633112, "grad_norm": 0.01560564350924316, "learning_rate": 9.074023197603955e-06, "loss": 0.0001, "step": 786 }, { "epoch": 0.05071856673326029, "grad_norm": 0.010400567996276689, "learning_rate": 9.075753705463962e-06, "loss": 0.0, "step": 787 }, { "epoch": 0.05078301218018947, "grad_norm": 0.00015725166531810745, "learning_rate": 9.07748201585308e-06, "loss": 0.0, "step": 788 }, { "epoch": 0.05084745762711865, "grad_norm": 0.0016245494880248916, "learning_rate": 9.079208134345112e-06, "loss": 0.0, "step": 789 }, { "epoch": 0.05091190307404782, "grad_norm": 0.001113785553319258, "learning_rate": 9.080932066492682e-06, "loss": 0.0, "step": 790 }, { "epoch": 0.050976348520976995, "grad_norm": 0.46446961485109706, "learning_rate": 9.082653817827336e-06, "loss": 0.0007, "step": 791 }, { "epoch": 0.051040793967906166, "grad_norm": 0.0013411261450823374, "learning_rate": 9.084373393859663e-06, "loss": 0.0, "step": 792 }, { "epoch": 0.05110523941483534, "grad_norm": 0.004991003306657957, "learning_rate": 9.086090800079385e-06, "loss": 0.0, "step": 793 }, { "epoch": 0.051169684861764514, "grad_norm": 0.0013036785024449445, "learning_rate": 9.087806041955472e-06, "loss": 0.0, "step": 794 }, { "epoch": 0.05123413030869369, "grad_norm": 0.00048091056047600136, "learning_rate": 9.089519124936242e-06, "loss": 0.0, "step": 795 }, { "epoch": 0.05129857575562286, "grad_norm": 0.0016136776036838984, "learning_rate": 9.091230054449467e-06, "loss": 0.0, "step": 796 }, { "epoch": 0.05136302120255204, "grad_norm": 0.4099738669208059, "learning_rate": 9.092938835902477e-06, "loss": 0.0029, "step": 797 }, { "epoch": 0.05142746664948122, "grad_norm": 0.003942835183172, "learning_rate": 9.094645474682253e-06, "loss": 0.0, "step": 798 }, { "epoch": 0.05149191209641039, "grad_norm": 0.0030768112635003782, "learning_rate": 9.096349976155548e-06, "loss": 0.0, "step": 799 }, { "epoch": 0.051556357543339565, "grad_norm": 0.006397829046396678, "learning_rate": 9.098052345668971e-06, "loss": 0.0, "step": 800 }, { "epoch": 0.051620802990268735, "grad_norm": 0.06002623182371448, "learning_rate": 9.099752588549096e-06, "loss": 0.0007, "step": 801 }, { "epoch": 0.05168524843719791, "grad_norm": 3.3841404940161315, "learning_rate": 9.101450710102554e-06, "loss": 0.034, "step": 802 }, { "epoch": 0.05174969388412708, "grad_norm": 0.0013937209491712162, "learning_rate": 9.103146715616145e-06, "loss": 0.0, "step": 803 }, { "epoch": 0.05181413933105626, "grad_norm": 6.458562080363265e-05, "learning_rate": 9.104840610356928e-06, "loss": 0.0, "step": 804 }, { "epoch": 0.05187858477798544, "grad_norm": 0.3579401105183063, "learning_rate": 9.10653239957232e-06, "loss": 0.0025, "step": 805 }, { "epoch": 0.05194303022491461, "grad_norm": 0.008360636412067086, "learning_rate": 9.108222088490193e-06, "loss": 0.0, "step": 806 }, { "epoch": 0.052007475671843786, "grad_norm": 0.005285542514094032, "learning_rate": 9.10990968231898e-06, "loss": 0.0, "step": 807 }, { "epoch": 0.05207192111877296, "grad_norm": 0.0002770779211620817, "learning_rate": 9.11159518624775e-06, "loss": 0.0, "step": 808 }, { "epoch": 0.052136366565702134, "grad_norm": 0.0011585447889919547, "learning_rate": 9.113278605446331e-06, "loss": 0.0, "step": 809 }, { "epoch": 0.052200812012631305, "grad_norm": 0.049277994096405396, "learning_rate": 9.114959945065387e-06, "loss": 0.0004, "step": 810 }, { "epoch": 0.05226525745956048, "grad_norm": 0.00015442997540374582, "learning_rate": 9.116639210236513e-06, "loss": 0.0, "step": 811 }, { "epoch": 0.05232970290648966, "grad_norm": 0.0013635697356662283, "learning_rate": 9.11831640607234e-06, "loss": 0.0, "step": 812 }, { "epoch": 0.05239414835341883, "grad_norm": 0.016716470890307505, "learning_rate": 9.119991537666614e-06, "loss": 0.0002, "step": 813 }, { "epoch": 0.05245859380034801, "grad_norm": 0.0005278616955096009, "learning_rate": 9.121664610094306e-06, "loss": 0.0, "step": 814 }, { "epoch": 0.05252303924727718, "grad_norm": 0.0025750142539637693, "learning_rate": 9.123335628411687e-06, "loss": 0.0, "step": 815 }, { "epoch": 0.052587484694206356, "grad_norm": 0.00020521592683312736, "learning_rate": 9.125004597656428e-06, "loss": 0.0, "step": 816 }, { "epoch": 0.052651930141135526, "grad_norm": 0.19445270116812716, "learning_rate": 9.126671522847696e-06, "loss": 0.0004, "step": 817 }, { "epoch": 0.052716375588064704, "grad_norm": 0.3771810287388212, "learning_rate": 9.128336408986232e-06, "loss": 0.0025, "step": 818 }, { "epoch": 0.052780821034993874, "grad_norm": 0.05887705540169662, "learning_rate": 9.129999261054454e-06, "loss": 0.0001, "step": 819 }, { "epoch": 0.05284526648192305, "grad_norm": 0.1534011955489274, "learning_rate": 9.131660084016536e-06, "loss": 0.0003, "step": 820 }, { "epoch": 0.05290971192885223, "grad_norm": 0.013809870076849355, "learning_rate": 9.133318882818504e-06, "loss": 0.0001, "step": 821 }, { "epoch": 0.0529741573757814, "grad_norm": 0.0032549074867175556, "learning_rate": 9.134975662388321e-06, "loss": 0.0, "step": 822 }, { "epoch": 0.05303860282271058, "grad_norm": 0.009309798570618937, "learning_rate": 9.136630427635978e-06, "loss": 0.0001, "step": 823 }, { "epoch": 0.05310304826963975, "grad_norm": 0.0033493742577040365, "learning_rate": 9.138283183453572e-06, "loss": 0.0, "step": 824 }, { "epoch": 0.053167493716568925, "grad_norm": 0.018859710507561127, "learning_rate": 9.139933934715408e-06, "loss": 0.0001, "step": 825 }, { "epoch": 0.053231939163498096, "grad_norm": 0.01560803326672018, "learning_rate": 9.141582686278072e-06, "loss": 0.0, "step": 826 }, { "epoch": 0.05329638461042727, "grad_norm": 0.00016964226944774627, "learning_rate": 9.143229442980527e-06, "loss": 0.0, "step": 827 }, { "epoch": 0.05336083005735645, "grad_norm": 0.022061608980455484, "learning_rate": 9.144874209644185e-06, "loss": 0.0016, "step": 828 }, { "epoch": 0.05342527550428562, "grad_norm": 0.006649718737306569, "learning_rate": 9.146516991073004e-06, "loss": 0.0, "step": 829 }, { "epoch": 0.0534897209512148, "grad_norm": 0.00042874270227666826, "learning_rate": 9.148157792053569e-06, "loss": 0.0, "step": 830 }, { "epoch": 0.05355416639814397, "grad_norm": 0.2741124949858014, "learning_rate": 9.149796617355175e-06, "loss": 0.0042, "step": 831 }, { "epoch": 0.05361861184507315, "grad_norm": 0.03657944854462583, "learning_rate": 9.151433471729903e-06, "loss": 0.0001, "step": 832 }, { "epoch": 0.05368305729200232, "grad_norm": 0.001000460494932859, "learning_rate": 9.153068359912718e-06, "loss": 0.0, "step": 833 }, { "epoch": 0.053747502738931495, "grad_norm": 0.4291703666910219, "learning_rate": 9.154701286621536e-06, "loss": 0.0005, "step": 834 }, { "epoch": 0.05381194818586067, "grad_norm": 0.011767546566647835, "learning_rate": 9.156332256557316e-06, "loss": 0.0, "step": 835 }, { "epoch": 0.05387639363278984, "grad_norm": 0.02017886370790413, "learning_rate": 9.157961274404139e-06, "loss": 0.0001, "step": 836 }, { "epoch": 0.05394083907971902, "grad_norm": 0.1635306058218916, "learning_rate": 9.159588344829283e-06, "loss": 0.0003, "step": 837 }, { "epoch": 0.05400528452664819, "grad_norm": 0.03241790156834949, "learning_rate": 9.161213472483306e-06, "loss": 0.0001, "step": 838 }, { "epoch": 0.05406972997357737, "grad_norm": 0.03645090452502005, "learning_rate": 9.162836662000131e-06, "loss": 0.0, "step": 839 }, { "epoch": 0.05413417542050654, "grad_norm": 0.001018123489854683, "learning_rate": 9.164457917997124e-06, "loss": 0.0, "step": 840 }, { "epoch": 0.054198620867435716, "grad_norm": 0.0006147523546883594, "learning_rate": 9.166077245075165e-06, "loss": 0.0, "step": 841 }, { "epoch": 0.05426306631436489, "grad_norm": 0.03234380830020073, "learning_rate": 9.167694647818732e-06, "loss": 0.0002, "step": 842 }, { "epoch": 0.054327511761294064, "grad_norm": 0.30908209151377397, "learning_rate": 9.16931013079598e-06, "loss": 0.0003, "step": 843 }, { "epoch": 0.05439195720822324, "grad_norm": 0.03522377846818942, "learning_rate": 9.170923698558816e-06, "loss": 0.0005, "step": 844 }, { "epoch": 0.05445640265515241, "grad_norm": 0.000277197422768193, "learning_rate": 9.172535355642978e-06, "loss": 0.0, "step": 845 }, { "epoch": 0.05452084810208159, "grad_norm": 0.0016432842093718164, "learning_rate": 9.174145106568109e-06, "loss": 0.0, "step": 846 }, { "epoch": 0.05458529354901076, "grad_norm": 0.013591225137140847, "learning_rate": 9.175752955837838e-06, "loss": 0.0, "step": 847 }, { "epoch": 0.05464973899593994, "grad_norm": 0.8462395497498096, "learning_rate": 9.177358907939845e-06, "loss": 0.0031, "step": 848 }, { "epoch": 0.05471418444286911, "grad_norm": 0.04412306889026068, "learning_rate": 9.178962967345948e-06, "loss": 0.0001, "step": 849 }, { "epoch": 0.054778629889798286, "grad_norm": 0.005405304038502618, "learning_rate": 9.180565138512175e-06, "loss": 0.0, "step": 850 }, { "epoch": 0.05484307533672746, "grad_norm": 0.3794935924368555, "learning_rate": 9.182165425878829e-06, "loss": 0.0019, "step": 851 }, { "epoch": 0.054907520783656634, "grad_norm": 0.0020777573880359597, "learning_rate": 9.183763833870574e-06, "loss": 0.0, "step": 852 }, { "epoch": 0.05497196623058581, "grad_norm": 0.004563794466339452, "learning_rate": 9.185360366896503e-06, "loss": 0.0, "step": 853 }, { "epoch": 0.05503641167751498, "grad_norm": 0.007595697852477659, "learning_rate": 9.186955029350209e-06, "loss": 0.0001, "step": 854 }, { "epoch": 0.05510085712444416, "grad_norm": 0.021470438571486155, "learning_rate": 9.188547825609863e-06, "loss": 0.0003, "step": 855 }, { "epoch": 0.05516530257137333, "grad_norm": 0.03807929940940631, "learning_rate": 9.190138760038282e-06, "loss": 0.0001, "step": 856 }, { "epoch": 0.05522974801830251, "grad_norm": 0.052610970411618635, "learning_rate": 9.191727836983e-06, "loss": 0.0002, "step": 857 }, { "epoch": 0.05529419346523168, "grad_norm": 0.011456006523472022, "learning_rate": 9.19331506077634e-06, "loss": 0.0001, "step": 858 }, { "epoch": 0.055358638912160855, "grad_norm": 0.010611982148422123, "learning_rate": 9.19490043573549e-06, "loss": 0.0, "step": 859 }, { "epoch": 0.05542308435909003, "grad_norm": 0.00036207342749774215, "learning_rate": 9.196483966162567e-06, "loss": 0.0, "step": 860 }, { "epoch": 0.055487529806019203, "grad_norm": 0.006096769717651004, "learning_rate": 9.198065656344689e-06, "loss": 0.0, "step": 861 }, { "epoch": 0.05555197525294838, "grad_norm": 0.0033176289929112016, "learning_rate": 9.199645510554044e-06, "loss": 0.0, "step": 862 }, { "epoch": 0.05561642069987755, "grad_norm": 0.07427952499156017, "learning_rate": 9.201223533047966e-06, "loss": 0.0007, "step": 863 }, { "epoch": 0.05568086614680673, "grad_norm": 0.007857354582267988, "learning_rate": 9.20279972806899e-06, "loss": 0.0, "step": 864 }, { "epoch": 0.0557453115937359, "grad_norm": 0.43490041132666496, "learning_rate": 9.204374099844932e-06, "loss": 0.003, "step": 865 }, { "epoch": 0.05580975704066508, "grad_norm": 0.004341780147231225, "learning_rate": 9.205946652588954e-06, "loss": 0.0, "step": 866 }, { "epoch": 0.055874202487594254, "grad_norm": 0.02007426507979371, "learning_rate": 9.20751739049963e-06, "loss": 0.0001, "step": 867 }, { "epoch": 0.055938647934523425, "grad_norm": 6.445465882152348e-05, "learning_rate": 9.20908631776102e-06, "loss": 0.0, "step": 868 }, { "epoch": 0.0560030933814526, "grad_norm": 0.01036632348269938, "learning_rate": 9.21065343854272e-06, "loss": 0.0001, "step": 869 }, { "epoch": 0.05606753882838177, "grad_norm": 0.03182250548398591, "learning_rate": 9.21221875699995e-06, "loss": 0.0001, "step": 870 }, { "epoch": 0.05613198427531095, "grad_norm": 0.0027731863618260023, "learning_rate": 9.213782277273607e-06, "loss": 0.0, "step": 871 }, { "epoch": 0.05619642972224012, "grad_norm": 0.02141632130848708, "learning_rate": 9.215344003490328e-06, "loss": 0.0, "step": 872 }, { "epoch": 0.0562608751691693, "grad_norm": 0.00841137676276659, "learning_rate": 9.21690393976257e-06, "loss": 0.0, "step": 873 }, { "epoch": 0.056325320616098476, "grad_norm": 0.0026180638976685206, "learning_rate": 9.21846209018866e-06, "loss": 0.0, "step": 874 }, { "epoch": 0.056389766063027646, "grad_norm": 0.1417267476215166, "learning_rate": 9.220018458852871e-06, "loss": 0.0002, "step": 875 }, { "epoch": 0.056454211509956824, "grad_norm": 0.0016003214834285813, "learning_rate": 9.221573049825472e-06, "loss": 0.0, "step": 876 }, { "epoch": 0.056518656956885995, "grad_norm": 0.0008617063924082198, "learning_rate": 9.223125867162807e-06, "loss": 0.0, "step": 877 }, { "epoch": 0.05658310240381517, "grad_norm": 0.00019133509686306216, "learning_rate": 9.224676914907353e-06, "loss": 0.0, "step": 878 }, { "epoch": 0.05664754785074434, "grad_norm": 0.09435838250675625, "learning_rate": 9.226226197087778e-06, "loss": 0.0009, "step": 879 }, { "epoch": 0.05671199329767352, "grad_norm": 0.002137334572342066, "learning_rate": 9.22777371771901e-06, "loss": 0.0, "step": 880 }, { "epoch": 0.05677643874460269, "grad_norm": 0.00500561291614537, "learning_rate": 9.229319480802301e-06, "loss": 0.0, "step": 881 }, { "epoch": 0.05684088419153187, "grad_norm": 0.0018617593146569935, "learning_rate": 9.230863490325278e-06, "loss": 0.0, "step": 882 }, { "epoch": 0.056905329638461045, "grad_norm": 0.003249829210823486, "learning_rate": 9.232405750262018e-06, "loss": 0.0, "step": 883 }, { "epoch": 0.056969775085390216, "grad_norm": 4.155492727945956, "learning_rate": 9.233946264573107e-06, "loss": 0.0309, "step": 884 }, { "epoch": 0.057034220532319393, "grad_norm": 0.0004600202922205456, "learning_rate": 9.235485037205686e-06, "loss": 0.0, "step": 885 }, { "epoch": 0.057098665979248564, "grad_norm": 0.0009887524614545788, "learning_rate": 9.237022072093532e-06, "loss": 0.0, "step": 886 }, { "epoch": 0.05716311142617774, "grad_norm": 0.12449591667280237, "learning_rate": 9.238557373157111e-06, "loss": 0.0002, "step": 887 }, { "epoch": 0.05722755687310691, "grad_norm": 0.015552755601544609, "learning_rate": 9.240090944303633e-06, "loss": 0.0001, "step": 888 }, { "epoch": 0.05729200232003609, "grad_norm": 0.23141060741991862, "learning_rate": 9.241622789427116e-06, "loss": 0.0004, "step": 889 }, { "epoch": 0.05735644776696527, "grad_norm": 0.8822941904283358, "learning_rate": 9.243152912408443e-06, "loss": 0.0065, "step": 890 }, { "epoch": 0.05742089321389444, "grad_norm": 0.00029090730639586807, "learning_rate": 9.244681317115427e-06, "loss": 0.0, "step": 891 }, { "epoch": 0.057485338660823615, "grad_norm": 0.001579554953343648, "learning_rate": 9.246208007402863e-06, "loss": 0.0, "step": 892 }, { "epoch": 0.057549784107752786, "grad_norm": 0.023213539630016298, "learning_rate": 9.247732987112587e-06, "loss": 0.0001, "step": 893 }, { "epoch": 0.05761422955468196, "grad_norm": 0.0024890347909475733, "learning_rate": 9.249256260073538e-06, "loss": 0.0, "step": 894 }, { "epoch": 0.057678675001611134, "grad_norm": 0.000585152467749838, "learning_rate": 9.250777830101814e-06, "loss": 0.0, "step": 895 }, { "epoch": 0.05774312044854031, "grad_norm": 0.028285490200250774, "learning_rate": 9.252297701000728e-06, "loss": 0.0, "step": 896 }, { "epoch": 0.05780756589546949, "grad_norm": 0.10915553690810925, "learning_rate": 9.253815876560862e-06, "loss": 0.0008, "step": 897 }, { "epoch": 0.05787201134239866, "grad_norm": 0.0002786805844268407, "learning_rate": 9.255332360560135e-06, "loss": 0.0, "step": 898 }, { "epoch": 0.057936456789327836, "grad_norm": 0.005925699088651387, "learning_rate": 9.256847156763845e-06, "loss": 0.0001, "step": 899 }, { "epoch": 0.05800090223625701, "grad_norm": 0.0033125726030387532, "learning_rate": 9.258360268924736e-06, "loss": 0.0, "step": 900 }, { "epoch": 0.058065347683186185, "grad_norm": 0.010476595702934775, "learning_rate": 9.259871700783048e-06, "loss": 0.0, "step": 901 }, { "epoch": 0.058129793130115355, "grad_norm": 0.02884480152810691, "learning_rate": 9.261381456066576e-06, "loss": 0.0001, "step": 902 }, { "epoch": 0.05819423857704453, "grad_norm": 0.02782736705714902, "learning_rate": 9.26288953849072e-06, "loss": 0.0003, "step": 903 }, { "epoch": 0.0582586840239737, "grad_norm": 0.10276730883429447, "learning_rate": 9.264395951758548e-06, "loss": 0.0014, "step": 904 }, { "epoch": 0.05832312947090288, "grad_norm": 0.021272532663555836, "learning_rate": 9.265900699560844e-06, "loss": 0.0, "step": 905 }, { "epoch": 0.05838757491783206, "grad_norm": 0.32919439257374566, "learning_rate": 9.267403785576162e-06, "loss": 0.0004, "step": 906 }, { "epoch": 0.05845202036476123, "grad_norm": 0.03212588290219352, "learning_rate": 9.268905213470882e-06, "loss": 0.0001, "step": 907 }, { "epoch": 0.058516465811690406, "grad_norm": 0.005368526072576281, "learning_rate": 9.270404986899269e-06, "loss": 0.0, "step": 908 }, { "epoch": 0.05858091125861958, "grad_norm": 0.004630751419976039, "learning_rate": 9.271903109503515e-06, "loss": 0.0, "step": 909 }, { "epoch": 0.058645356705548754, "grad_norm": 0.04599784677651563, "learning_rate": 9.273399584913803e-06, "loss": 0.0001, "step": 910 }, { "epoch": 0.058709802152477925, "grad_norm": 0.14592824909655777, "learning_rate": 9.274894416748349e-06, "loss": 0.0019, "step": 911 }, { "epoch": 0.0587742475994071, "grad_norm": 0.165071235996826, "learning_rate": 9.276387608613467e-06, "loss": 0.0003, "step": 912 }, { "epoch": 0.05883869304633628, "grad_norm": 0.0045161075821298306, "learning_rate": 9.27787916410361e-06, "loss": 0.0, "step": 913 }, { "epoch": 0.05890313849326545, "grad_norm": 0.0609009449695222, "learning_rate": 9.279369086801427e-06, "loss": 0.0001, "step": 914 }, { "epoch": 0.05896758394019463, "grad_norm": 0.01850613402613245, "learning_rate": 9.28085738027782e-06, "loss": 0.0001, "step": 915 }, { "epoch": 0.0590320293871238, "grad_norm": 0.15463124032407352, "learning_rate": 9.28234404809198e-06, "loss": 0.0007, "step": 916 }, { "epoch": 0.059096474834052976, "grad_norm": 0.003664506187491668, "learning_rate": 9.283829093791457e-06, "loss": 0.0, "step": 917 }, { "epoch": 0.059160920280982146, "grad_norm": 0.00880322416467816, "learning_rate": 9.285312520912193e-06, "loss": 0.0001, "step": 918 }, { "epoch": 0.059225365727911324, "grad_norm": 3.7186074109349265, "learning_rate": 9.286794332978585e-06, "loss": 0.0237, "step": 919 }, { "epoch": 0.059289811174840494, "grad_norm": 8.817468231810405e-05, "learning_rate": 9.288274533503533e-06, "loss": 0.0, "step": 920 }, { "epoch": 0.05935425662176967, "grad_norm": 0.004592331110218876, "learning_rate": 9.289753125988485e-06, "loss": 0.0, "step": 921 }, { "epoch": 0.05941870206869885, "grad_norm": 1.0849440167306779, "learning_rate": 9.291230113923492e-06, "loss": 0.0055, "step": 922 }, { "epoch": 0.05948314751562802, "grad_norm": 0.0024849397582142455, "learning_rate": 9.29270550078725e-06, "loss": 0.0, "step": 923 }, { "epoch": 0.0595475929625572, "grad_norm": 0.9088584914373267, "learning_rate": 9.294179290047164e-06, "loss": 0.0059, "step": 924 }, { "epoch": 0.05961203840948637, "grad_norm": 0.00017863757655568402, "learning_rate": 9.29565148515938e-06, "loss": 0.0, "step": 925 }, { "epoch": 0.059676483856415545, "grad_norm": 0.16478213790474924, "learning_rate": 9.29712208956884e-06, "loss": 0.0028, "step": 926 }, { "epoch": 0.059740929303344716, "grad_norm": 0.009325136506692883, "learning_rate": 9.298591106709336e-06, "loss": 0.0, "step": 927 }, { "epoch": 0.05980537475027389, "grad_norm": 0.0018313403949757983, "learning_rate": 9.300058540003553e-06, "loss": 0.0, "step": 928 }, { "epoch": 0.05986982019720307, "grad_norm": 0.036417811764517145, "learning_rate": 9.301524392863113e-06, "loss": 0.0001, "step": 929 }, { "epoch": 0.05993426564413224, "grad_norm": 1.021395560902065, "learning_rate": 9.30298866868863e-06, "loss": 0.0049, "step": 930 }, { "epoch": 0.05999871109106142, "grad_norm": 0.0024827359936627643, "learning_rate": 9.304451370869755e-06, "loss": 0.0, "step": 931 }, { "epoch": 0.06006315653799059, "grad_norm": 0.7179352247112939, "learning_rate": 9.30591250278522e-06, "loss": 0.0021, "step": 932 }, { "epoch": 0.06012760198491977, "grad_norm": 0.0631933717664277, "learning_rate": 9.307372067802889e-06, "loss": 0.0003, "step": 933 }, { "epoch": 0.06019204743184894, "grad_norm": 0.060907736335031146, "learning_rate": 9.308830069279799e-06, "loss": 0.0004, "step": 934 }, { "epoch": 0.060256492878778115, "grad_norm": 0.0008149091579210628, "learning_rate": 9.310286510562214e-06, "loss": 0.0, "step": 935 }, { "epoch": 0.06032093832570729, "grad_norm": 0.006663428570939788, "learning_rate": 9.311741394985666e-06, "loss": 0.0, "step": 936 }, { "epoch": 0.06038538377263646, "grad_norm": 0.12337773360999044, "learning_rate": 9.313194725875004e-06, "loss": 0.001, "step": 937 }, { "epoch": 0.06044982921956564, "grad_norm": 0.19178069720437993, "learning_rate": 9.31464650654443e-06, "loss": 0.0005, "step": 938 }, { "epoch": 0.06051427466649481, "grad_norm": 0.0015436894352676542, "learning_rate": 9.316096740297561e-06, "loss": 0.0, "step": 939 }, { "epoch": 0.06057872011342399, "grad_norm": 0.15529906463269208, "learning_rate": 9.317545430427458e-06, "loss": 0.0009, "step": 940 }, { "epoch": 0.06064316556035316, "grad_norm": 0.15753065047540682, "learning_rate": 9.318992580216683e-06, "loss": 0.0025, "step": 941 }, { "epoch": 0.060707611007282336, "grad_norm": 0.0037847305096929324, "learning_rate": 9.320438192937334e-06, "loss": 0.0, "step": 942 }, { "epoch": 0.06077205645421151, "grad_norm": 0.560417957966202, "learning_rate": 9.321882271851098e-06, "loss": 0.0015, "step": 943 }, { "epoch": 0.060836501901140684, "grad_norm": 0.0015783399253200654, "learning_rate": 9.323324820209286e-06, "loss": 0.0, "step": 944 }, { "epoch": 0.06090094734806986, "grad_norm": 0.08633490002918111, "learning_rate": 9.32476584125289e-06, "loss": 0.0003, "step": 945 }, { "epoch": 0.06096539279499903, "grad_norm": 0.17250897245119487, "learning_rate": 9.326205338212606e-06, "loss": 0.0006, "step": 946 }, { "epoch": 0.06102983824192821, "grad_norm": 0.11453406446909774, "learning_rate": 9.327643314308903e-06, "loss": 0.0003, "step": 947 }, { "epoch": 0.06109428368885738, "grad_norm": 0.3036814978949101, "learning_rate": 9.329079772752047e-06, "loss": 0.0015, "step": 948 }, { "epoch": 0.06115872913578656, "grad_norm": 0.011376020798526345, "learning_rate": 9.33051471674215e-06, "loss": 0.0, "step": 949 }, { "epoch": 0.06122317458271573, "grad_norm": 0.012309696487391423, "learning_rate": 9.331948149469212e-06, "loss": 0.0001, "step": 950 }, { "epoch": 0.061287620029644906, "grad_norm": 0.006174902085906057, "learning_rate": 9.33338007411317e-06, "loss": 0.0, "step": 951 }, { "epoch": 0.06135206547657408, "grad_norm": 0.19661972321282462, "learning_rate": 9.33481049384393e-06, "loss": 0.0008, "step": 952 }, { "epoch": 0.061416510923503254, "grad_norm": 0.005731411898032642, "learning_rate": 9.336239411821416e-06, "loss": 0.0, "step": 953 }, { "epoch": 0.06148095637043243, "grad_norm": 0.6383001896675805, "learning_rate": 9.33766683119561e-06, "loss": 0.004, "step": 954 }, { "epoch": 0.0615454018173616, "grad_norm": 0.0018123395465073323, "learning_rate": 9.33909275510659e-06, "loss": 0.0, "step": 955 }, { "epoch": 0.06160984726429078, "grad_norm": 0.5576655806137351, "learning_rate": 9.340517186684582e-06, "loss": 0.002, "step": 956 }, { "epoch": 0.06167429271121995, "grad_norm": 0.11143409554222802, "learning_rate": 9.34194012904999e-06, "loss": 0.0011, "step": 957 }, { "epoch": 0.06173873815814913, "grad_norm": 0.163548453280282, "learning_rate": 9.34336158531344e-06, "loss": 0.0002, "step": 958 }, { "epoch": 0.061803183605078305, "grad_norm": 0.0179151870815518, "learning_rate": 9.344781558575823e-06, "loss": 0.0, "step": 959 }, { "epoch": 0.061867629052007475, "grad_norm": 0.00805664830135211, "learning_rate": 9.346200051928338e-06, "loss": 0.0001, "step": 960 }, { "epoch": 0.06193207449893665, "grad_norm": 0.035669773452513494, "learning_rate": 9.347617068452525e-06, "loss": 0.0001, "step": 961 }, { "epoch": 0.06199651994586582, "grad_norm": 0.016276996306397545, "learning_rate": 9.34903261122031e-06, "loss": 0.0001, "step": 962 }, { "epoch": 0.062060965392795, "grad_norm": 0.006962232185746814, "learning_rate": 9.350446683294047e-06, "loss": 0.0, "step": 963 }, { "epoch": 0.06212541083972417, "grad_norm": 0.017856100377398786, "learning_rate": 9.35185928772655e-06, "loss": 0.0, "step": 964 }, { "epoch": 0.06218985628665335, "grad_norm": 0.25958305769630685, "learning_rate": 9.353270427561143e-06, "loss": 0.0011, "step": 965 }, { "epoch": 0.06225430173358252, "grad_norm": 0.06486360650927218, "learning_rate": 9.354680105831685e-06, "loss": 0.0001, "step": 966 }, { "epoch": 0.0623187471805117, "grad_norm": 3.134478879749426, "learning_rate": 9.356088325562633e-06, "loss": 0.0352, "step": 967 }, { "epoch": 0.062383192627440874, "grad_norm": 0.19477207757671616, "learning_rate": 9.35749508976905e-06, "loss": 0.0002, "step": 968 }, { "epoch": 0.062447638074370045, "grad_norm": 0.0303179640337168, "learning_rate": 9.35890040145667e-06, "loss": 0.0, "step": 969 }, { "epoch": 0.06251208352129922, "grad_norm": 0.22131384688029207, "learning_rate": 9.360304263621919e-06, "loss": 0.0006, "step": 970 }, { "epoch": 0.06257652896822839, "grad_norm": 0.021000072329624038, "learning_rate": 9.361706679251967e-06, "loss": 0.0, "step": 971 }, { "epoch": 0.06264097441515756, "grad_norm": 0.017301594668236765, "learning_rate": 9.363107651324754e-06, "loss": 0.0, "step": 972 }, { "epoch": 0.06270541986208675, "grad_norm": 0.005622783541268775, "learning_rate": 9.364507182809037e-06, "loss": 0.0, "step": 973 }, { "epoch": 0.06276986530901592, "grad_norm": 0.23653247762425234, "learning_rate": 9.365905276664425e-06, "loss": 0.0012, "step": 974 }, { "epoch": 0.06283431075594509, "grad_norm": 0.0019284558066157138, "learning_rate": 9.367301935841413e-06, "loss": 0.0, "step": 975 }, { "epoch": 0.06289875620287427, "grad_norm": 0.013986864143152953, "learning_rate": 9.368697163281422e-06, "loss": 0.0, "step": 976 }, { "epoch": 0.06296320164980344, "grad_norm": 0.0022328048482544325, "learning_rate": 9.37009096191684e-06, "loss": 0.0, "step": 977 }, { "epoch": 0.06302764709673261, "grad_norm": 0.05739005222315148, "learning_rate": 9.371483334671052e-06, "loss": 0.0001, "step": 978 }, { "epoch": 0.06309209254366178, "grad_norm": 0.0010009926174638238, "learning_rate": 9.372874284458484e-06, "loss": 0.0, "step": 979 }, { "epoch": 0.06315653799059097, "grad_norm": 0.002213090174786511, "learning_rate": 9.374263814184626e-06, "loss": 0.0, "step": 980 }, { "epoch": 0.06322098343752014, "grad_norm": 0.037727668382915094, "learning_rate": 9.375651926746093e-06, "loss": 0.0, "step": 981 }, { "epoch": 0.06328542888444931, "grad_norm": 0.5743290801677684, "learning_rate": 9.377038625030634e-06, "loss": 0.0035, "step": 982 }, { "epoch": 0.0633498743313785, "grad_norm": 0.2047254163261985, "learning_rate": 9.378423911917185e-06, "loss": 0.0007, "step": 983 }, { "epoch": 0.06341431977830767, "grad_norm": 0.001514966733781814, "learning_rate": 9.379807790275902e-06, "loss": 0.0, "step": 984 }, { "epoch": 0.06347876522523684, "grad_norm": 0.025498008379182133, "learning_rate": 9.381190262968194e-06, "loss": 0.0001, "step": 985 }, { "epoch": 0.063543210672166, "grad_norm": 0.009340591807369656, "learning_rate": 9.382571332846755e-06, "loss": 0.0, "step": 986 }, { "epoch": 0.06360765611909519, "grad_norm": 0.001099268463229261, "learning_rate": 9.383951002755613e-06, "loss": 0.0, "step": 987 }, { "epoch": 0.06367210156602436, "grad_norm": 0.21566121032299157, "learning_rate": 9.385329275530144e-06, "loss": 0.0006, "step": 988 }, { "epoch": 0.06373654701295353, "grad_norm": 0.0011949991086739977, "learning_rate": 9.38670615399713e-06, "loss": 0.0, "step": 989 }, { "epoch": 0.06380099245988272, "grad_norm": 0.001856079925364757, "learning_rate": 9.388081640974775e-06, "loss": 0.0, "step": 990 }, { "epoch": 0.06386543790681189, "grad_norm": 0.618543390978522, "learning_rate": 9.389455739272754e-06, "loss": 0.0067, "step": 991 }, { "epoch": 0.06392988335374106, "grad_norm": 0.00102082338563046, "learning_rate": 9.390828451692232e-06, "loss": 0.0, "step": 992 }, { "epoch": 0.06399432880067023, "grad_norm": 0.0004849908016925305, "learning_rate": 9.392199781025916e-06, "loss": 0.0, "step": 993 }, { "epoch": 0.06405877424759941, "grad_norm": 0.006964614517327541, "learning_rate": 9.393569730058076e-06, "loss": 0.0, "step": 994 }, { "epoch": 0.06412321969452858, "grad_norm": 0.0680559337719936, "learning_rate": 9.394938301564581e-06, "loss": 0.0001, "step": 995 }, { "epoch": 0.06418766514145775, "grad_norm": 0.02046442622393038, "learning_rate": 9.396305498312936e-06, "loss": 0.0001, "step": 996 }, { "epoch": 0.06425211058838692, "grad_norm": 0.022657457270191042, "learning_rate": 9.397671323062319e-06, "loss": 0.0002, "step": 997 }, { "epoch": 0.06431655603531611, "grad_norm": 0.0010187417482833032, "learning_rate": 9.399035778563601e-06, "loss": 0.0, "step": 998 }, { "epoch": 0.06438100148224528, "grad_norm": 0.011742133910265604, "learning_rate": 9.400398867559398e-06, "loss": 0.0001, "step": 999 }, { "epoch": 0.06444544692917445, "grad_norm": 0.013586182883457138, "learning_rate": 9.401760592784085e-06, "loss": 0.0, "step": 1000 }, { "epoch": 0.06450989237610363, "grad_norm": 0.2551176516648852, "learning_rate": 9.403120956963842e-06, "loss": 0.0034, "step": 1001 }, { "epoch": 0.0645743378230328, "grad_norm": 0.03086066982715626, "learning_rate": 9.404479962816683e-06, "loss": 0.0001, "step": 1002 }, { "epoch": 0.06463878326996197, "grad_norm": 0.2412145436747094, "learning_rate": 9.405837613052491e-06, "loss": 0.0006, "step": 1003 }, { "epoch": 0.06470322871689115, "grad_norm": 0.037208169947243144, "learning_rate": 9.407193910373039e-06, "loss": 0.0004, "step": 1004 }, { "epoch": 0.06476767416382033, "grad_norm": 0.0024333425810042523, "learning_rate": 9.40854885747204e-06, "loss": 0.0, "step": 1005 }, { "epoch": 0.0648321196107495, "grad_norm": 0.08621493714841584, "learning_rate": 9.409902457035166e-06, "loss": 0.0002, "step": 1006 }, { "epoch": 0.06489656505767867, "grad_norm": 0.02889134757488118, "learning_rate": 9.411254711740086e-06, "loss": 0.0004, "step": 1007 }, { "epoch": 0.06496101050460786, "grad_norm": 0.228104563999611, "learning_rate": 9.412605624256493e-06, "loss": 0.0019, "step": 1008 }, { "epoch": 0.06502545595153703, "grad_norm": 0.015025817973177263, "learning_rate": 9.413955197246137e-06, "loss": 0.0001, "step": 1009 }, { "epoch": 0.0650899013984662, "grad_norm": 0.8140691497003968, "learning_rate": 9.415303433362863e-06, "loss": 0.0038, "step": 1010 }, { "epoch": 0.06515434684539537, "grad_norm": 0.004443188188472599, "learning_rate": 9.416650335252638e-06, "loss": 0.0, "step": 1011 }, { "epoch": 0.06521879229232455, "grad_norm": 0.16307514977324722, "learning_rate": 9.417995905553573e-06, "loss": 0.0004, "step": 1012 }, { "epoch": 0.06528323773925372, "grad_norm": 0.08674513548595156, "learning_rate": 9.419340146895971e-06, "loss": 0.0003, "step": 1013 }, { "epoch": 0.06534768318618289, "grad_norm": 0.0863520286012186, "learning_rate": 9.420683061902345e-06, "loss": 0.0004, "step": 1014 }, { "epoch": 0.06541212863311208, "grad_norm": 0.0015631516269431248, "learning_rate": 9.422024653187451e-06, "loss": 0.0, "step": 1015 }, { "epoch": 0.06547657408004125, "grad_norm": 0.0009583769723558712, "learning_rate": 9.423364923358329e-06, "loss": 0.0, "step": 1016 }, { "epoch": 0.06554101952697042, "grad_norm": 0.3525857656721515, "learning_rate": 9.424703875014313e-06, "loss": 0.0033, "step": 1017 }, { "epoch": 0.06560546497389959, "grad_norm": 0.01980719319717165, "learning_rate": 9.426041510747082e-06, "loss": 0.0001, "step": 1018 }, { "epoch": 0.06566991042082877, "grad_norm": 0.05776367316483845, "learning_rate": 9.42737783314068e-06, "loss": 0.0019, "step": 1019 }, { "epoch": 0.06573435586775794, "grad_norm": 0.029714163614339636, "learning_rate": 9.42871284477154e-06, "loss": 0.0001, "step": 1020 }, { "epoch": 0.06579880131468711, "grad_norm": 0.1090029194822556, "learning_rate": 9.430046548208533e-06, "loss": 0.0002, "step": 1021 }, { "epoch": 0.0658632467616163, "grad_norm": 0.79815705332845, "learning_rate": 9.431378946012973e-06, "loss": 0.0021, "step": 1022 }, { "epoch": 0.06592769220854547, "grad_norm": 0.12051999387583368, "learning_rate": 9.432710040738669e-06, "loss": 0.0002, "step": 1023 }, { "epoch": 0.06599213765547464, "grad_norm": 0.009057564683849412, "learning_rate": 9.434039834931941e-06, "loss": 0.0001, "step": 1024 }, { "epoch": 0.06605658310240381, "grad_norm": 0.0362359697607633, "learning_rate": 9.43536833113165e-06, "loss": 0.0001, "step": 1025 }, { "epoch": 0.066121028549333, "grad_norm": 0.029041901339865995, "learning_rate": 9.436695531869232e-06, "loss": 0.0001, "step": 1026 }, { "epoch": 0.06618547399626216, "grad_norm": 0.005182049662164661, "learning_rate": 9.438021439668724e-06, "loss": 0.0, "step": 1027 }, { "epoch": 0.06624991944319134, "grad_norm": 0.005158991102843603, "learning_rate": 9.439346057046797e-06, "loss": 0.0001, "step": 1028 }, { "epoch": 0.06631436489012052, "grad_norm": 0.4060666330698772, "learning_rate": 9.44066938651278e-06, "loss": 0.0057, "step": 1029 }, { "epoch": 0.06637881033704969, "grad_norm": 0.0013174037296815595, "learning_rate": 9.441991430568686e-06, "loss": 0.0, "step": 1030 }, { "epoch": 0.06644325578397886, "grad_norm": 0.0033868625919964246, "learning_rate": 9.443312191709244e-06, "loss": 0.0, "step": 1031 }, { "epoch": 0.06650770123090803, "grad_norm": 0.006117924134115885, "learning_rate": 9.444631672421934e-06, "loss": 0.0001, "step": 1032 }, { "epoch": 0.06657214667783722, "grad_norm": 0.13950942529862392, "learning_rate": 9.445949875187002e-06, "loss": 0.0019, "step": 1033 }, { "epoch": 0.06663659212476639, "grad_norm": 0.01851590001806397, "learning_rate": 9.447266802477499e-06, "loss": 0.0004, "step": 1034 }, { "epoch": 0.06670103757169556, "grad_norm": 0.0016811966371559576, "learning_rate": 9.448582456759298e-06, "loss": 0.0, "step": 1035 }, { "epoch": 0.06676548301862473, "grad_norm": 0.1653637147615305, "learning_rate": 9.449896840491135e-06, "loss": 0.0022, "step": 1036 }, { "epoch": 0.06682992846555391, "grad_norm": 0.022010529514772826, "learning_rate": 9.451209956124626e-06, "loss": 0.0001, "step": 1037 }, { "epoch": 0.06689437391248308, "grad_norm": 0.0071250187018150595, "learning_rate": 9.452521806104297e-06, "loss": 0.0, "step": 1038 }, { "epoch": 0.06695881935941225, "grad_norm": 0.02253860670889963, "learning_rate": 9.453832392867618e-06, "loss": 0.0002, "step": 1039 }, { "epoch": 0.06702326480634144, "grad_norm": 0.024827336967876906, "learning_rate": 9.455141718845016e-06, "loss": 0.0001, "step": 1040 }, { "epoch": 0.06708771025327061, "grad_norm": 0.028985878209440076, "learning_rate": 9.456449786459916e-06, "loss": 0.0001, "step": 1041 }, { "epoch": 0.06715215570019978, "grad_norm": 0.06560310114761658, "learning_rate": 9.45775659812876e-06, "loss": 0.0002, "step": 1042 }, { "epoch": 0.06721660114712895, "grad_norm": 0.007292826751741267, "learning_rate": 9.459062156261041e-06, "loss": 0.0, "step": 1043 }, { "epoch": 0.06728104659405813, "grad_norm": 0.001488974553572595, "learning_rate": 9.460366463259316e-06, "loss": 0.0, "step": 1044 }, { "epoch": 0.0673454920409873, "grad_norm": 0.058802188234950835, "learning_rate": 9.461669521519253e-06, "loss": 0.0001, "step": 1045 }, { "epoch": 0.06740993748791647, "grad_norm": 0.015528356733507939, "learning_rate": 9.462971333429634e-06, "loss": 0.0, "step": 1046 }, { "epoch": 0.06747438293484566, "grad_norm": 0.010762498561983332, "learning_rate": 9.4642719013724e-06, "loss": 0.0001, "step": 1047 }, { "epoch": 0.06753882838177483, "grad_norm": 0.027761143204603284, "learning_rate": 9.46557122772267e-06, "loss": 0.0001, "step": 1048 }, { "epoch": 0.067603273828704, "grad_norm": 0.13193905647539747, "learning_rate": 9.466869314848766e-06, "loss": 0.0001, "step": 1049 }, { "epoch": 0.06766771927563317, "grad_norm": 0.06437447835886814, "learning_rate": 9.468166165112236e-06, "loss": 0.0, "step": 1050 }, { "epoch": 0.06773216472256235, "grad_norm": 0.39598322422926224, "learning_rate": 9.469461780867895e-06, "loss": 0.0015, "step": 1051 }, { "epoch": 0.06779661016949153, "grad_norm": 0.020346673344546363, "learning_rate": 9.470756164463827e-06, "loss": 0.0001, "step": 1052 }, { "epoch": 0.0678610556164207, "grad_norm": 0.0005355693822581008, "learning_rate": 9.472049318241431e-06, "loss": 0.0, "step": 1053 }, { "epoch": 0.06792550106334988, "grad_norm": 0.025392949474745102, "learning_rate": 9.473341244535435e-06, "loss": 0.0, "step": 1054 }, { "epoch": 0.06798994651027905, "grad_norm": 0.040790192372236425, "learning_rate": 9.474631945673927e-06, "loss": 0.0003, "step": 1055 }, { "epoch": 0.06805439195720822, "grad_norm": 0.033763923209698024, "learning_rate": 9.475921423978379e-06, "loss": 0.0001, "step": 1056 }, { "epoch": 0.06811883740413739, "grad_norm": 0.020406697261917767, "learning_rate": 9.477209681763664e-06, "loss": 0.0, "step": 1057 }, { "epoch": 0.06818328285106658, "grad_norm": 0.0010645021910689245, "learning_rate": 9.478496721338096e-06, "loss": 0.0, "step": 1058 }, { "epoch": 0.06824772829799575, "grad_norm": 0.05344905297559554, "learning_rate": 9.479782545003442e-06, "loss": 0.0006, "step": 1059 }, { "epoch": 0.06831217374492492, "grad_norm": 0.22296419932981829, "learning_rate": 9.481067155054957e-06, "loss": 0.0005, "step": 1060 }, { "epoch": 0.0683766191918541, "grad_norm": 0.0006634595339300624, "learning_rate": 9.482350553781397e-06, "loss": 0.0, "step": 1061 }, { "epoch": 0.06844106463878327, "grad_norm": 0.15285856729547703, "learning_rate": 9.48363274346505e-06, "loss": 0.0003, "step": 1062 }, { "epoch": 0.06850551008571244, "grad_norm": 0.030757339862208403, "learning_rate": 9.484913726381766e-06, "loss": 0.0002, "step": 1063 }, { "epoch": 0.06856995553264161, "grad_norm": 0.20174093222005063, "learning_rate": 9.486193504800969e-06, "loss": 0.0006, "step": 1064 }, { "epoch": 0.0686344009795708, "grad_norm": 0.030571583646065718, "learning_rate": 9.487472080985686e-06, "loss": 0.0001, "step": 1065 }, { "epoch": 0.06869884642649997, "grad_norm": 0.0009779026849065075, "learning_rate": 9.48874945719258e-06, "loss": 0.0, "step": 1066 }, { "epoch": 0.06876329187342914, "grad_norm": 0.00596599383421079, "learning_rate": 9.49002563567196e-06, "loss": 0.0001, "step": 1067 }, { "epoch": 0.06882773732035832, "grad_norm": 0.12309505639517282, "learning_rate": 9.49130061866781e-06, "loss": 0.002, "step": 1068 }, { "epoch": 0.0688921827672875, "grad_norm": 0.00540315072705124, "learning_rate": 9.492574408417818e-06, "loss": 0.0, "step": 1069 }, { "epoch": 0.06895662821421666, "grad_norm": 0.0008161722316358159, "learning_rate": 9.493847007153396e-06, "loss": 0.0, "step": 1070 }, { "epoch": 0.06902107366114583, "grad_norm": 0.0047131316080134674, "learning_rate": 9.495118417099695e-06, "loss": 0.0, "step": 1071 }, { "epoch": 0.06908551910807502, "grad_norm": 0.051150246774193775, "learning_rate": 9.496388640475643e-06, "loss": 0.0002, "step": 1072 }, { "epoch": 0.06914996455500419, "grad_norm": 0.0019308822291618391, "learning_rate": 9.49765767949396e-06, "loss": 0.0, "step": 1073 }, { "epoch": 0.06921441000193336, "grad_norm": 0.006845832426320013, "learning_rate": 9.498925536361181e-06, "loss": 0.0001, "step": 1074 }, { "epoch": 0.06927885544886254, "grad_norm": 0.07802347799089825, "learning_rate": 9.500192213277679e-06, "loss": 0.0001, "step": 1075 }, { "epoch": 0.06934330089579172, "grad_norm": 0.06181839032778164, "learning_rate": 9.501457712437695e-06, "loss": 0.0, "step": 1076 }, { "epoch": 0.06940774634272089, "grad_norm": 0.005407418144409315, "learning_rate": 9.502722036029348e-06, "loss": 0.0, "step": 1077 }, { "epoch": 0.06947219178965006, "grad_norm": 0.013561958584136294, "learning_rate": 9.503985186234666e-06, "loss": 0.0016, "step": 1078 }, { "epoch": 0.06953663723657924, "grad_norm": 0.041943084762526915, "learning_rate": 9.505247165229614e-06, "loss": 0.0001, "step": 1079 }, { "epoch": 0.06960108268350841, "grad_norm": 0.4291865810230572, "learning_rate": 9.506507975184103e-06, "loss": 0.0015, "step": 1080 }, { "epoch": 0.06966552813043758, "grad_norm": 0.18459478347266867, "learning_rate": 9.507767618262019e-06, "loss": 0.0009, "step": 1081 }, { "epoch": 0.06972997357736675, "grad_norm": 0.55468446435232, "learning_rate": 9.509026096621253e-06, "loss": 0.0038, "step": 1082 }, { "epoch": 0.06979441902429594, "grad_norm": 0.007788673964636509, "learning_rate": 9.510283412413708e-06, "loss": 0.0001, "step": 1083 }, { "epoch": 0.06985886447122511, "grad_norm": 0.005881688910034065, "learning_rate": 9.511539567785331e-06, "loss": 0.0, "step": 1084 }, { "epoch": 0.06992330991815428, "grad_norm": 0.010063309122665664, "learning_rate": 9.512794564876132e-06, "loss": 0.0, "step": 1085 }, { "epoch": 0.06998775536508346, "grad_norm": 0.16376256774401285, "learning_rate": 9.51404840582021e-06, "loss": 0.0003, "step": 1086 }, { "epoch": 0.07005220081201263, "grad_norm": 0.012820581196048885, "learning_rate": 9.51530109274577e-06, "loss": 0.0001, "step": 1087 }, { "epoch": 0.0701166462589418, "grad_norm": 0.3957807941035052, "learning_rate": 9.516552627775143e-06, "loss": 0.0029, "step": 1088 }, { "epoch": 0.07018109170587097, "grad_norm": 0.021148173145825185, "learning_rate": 9.517803013024814e-06, "loss": 0.0001, "step": 1089 }, { "epoch": 0.07024553715280016, "grad_norm": 0.005577600092877411, "learning_rate": 9.51905225060544e-06, "loss": 0.0001, "step": 1090 }, { "epoch": 0.07030998259972933, "grad_norm": 0.00015751397743556044, "learning_rate": 9.52030034262187e-06, "loss": 0.0, "step": 1091 }, { "epoch": 0.0703744280466585, "grad_norm": 0.002895350355714667, "learning_rate": 9.521547291173168e-06, "loss": 0.0, "step": 1092 }, { "epoch": 0.07043887349358768, "grad_norm": 0.0023124064473402568, "learning_rate": 9.522793098352637e-06, "loss": 0.0, "step": 1093 }, { "epoch": 0.07050331894051685, "grad_norm": 0.20981965745450876, "learning_rate": 9.524037766247829e-06, "loss": 0.0017, "step": 1094 }, { "epoch": 0.07056776438744602, "grad_norm": 0.016446824720652117, "learning_rate": 9.525281296940584e-06, "loss": 0.0, "step": 1095 }, { "epoch": 0.0706322098343752, "grad_norm": 0.13535662307196408, "learning_rate": 9.526523692507037e-06, "loss": 0.0005, "step": 1096 }, { "epoch": 0.07069665528130438, "grad_norm": 0.030537059287996025, "learning_rate": 9.527764955017639e-06, "loss": 0.0, "step": 1097 }, { "epoch": 0.07076110072823355, "grad_norm": 0.014719741351277675, "learning_rate": 9.529005086537187e-06, "loss": 0.0001, "step": 1098 }, { "epoch": 0.07082554617516272, "grad_norm": 0.22880329055924983, "learning_rate": 9.530244089124837e-06, "loss": 0.0012, "step": 1099 }, { "epoch": 0.0708899916220919, "grad_norm": 0.00448704140852509, "learning_rate": 9.531481964834122e-06, "loss": 0.0, "step": 1100 }, { "epoch": 0.07095443706902108, "grad_norm": 0.009209617813428274, "learning_rate": 9.532718715712988e-06, "loss": 0.0, "step": 1101 }, { "epoch": 0.07101888251595025, "grad_norm": 0.09858833858855938, "learning_rate": 9.533954343803793e-06, "loss": 0.001, "step": 1102 }, { "epoch": 0.07108332796287942, "grad_norm": 0.0003334525690795657, "learning_rate": 9.535188851143343e-06, "loss": 0.0, "step": 1103 }, { "epoch": 0.0711477734098086, "grad_norm": 0.001099738770532882, "learning_rate": 9.536422239762899e-06, "loss": 0.0, "step": 1104 }, { "epoch": 0.07121221885673777, "grad_norm": 0.012566891242134792, "learning_rate": 9.537654511688219e-06, "loss": 0.0, "step": 1105 }, { "epoch": 0.07127666430366694, "grad_norm": 0.029662996653041767, "learning_rate": 9.538885668939549e-06, "loss": 0.0001, "step": 1106 }, { "epoch": 0.07134110975059613, "grad_norm": 0.0039183110992792244, "learning_rate": 9.540115713531667e-06, "loss": 0.0, "step": 1107 }, { "epoch": 0.0714055551975253, "grad_norm": 0.06460220983321396, "learning_rate": 9.541344647473889e-06, "loss": 0.0006, "step": 1108 }, { "epoch": 0.07147000064445447, "grad_norm": 0.01392801660340169, "learning_rate": 9.542572472770095e-06, "loss": 0.0001, "step": 1109 }, { "epoch": 0.07153444609138364, "grad_norm": 0.0075267292844853, "learning_rate": 9.543799191418745e-06, "loss": 0.0001, "step": 1110 }, { "epoch": 0.07159889153831282, "grad_norm": 0.4668644333477323, "learning_rate": 9.545024805412904e-06, "loss": 0.0024, "step": 1111 }, { "epoch": 0.071663336985242, "grad_norm": 0.04064895583802354, "learning_rate": 9.546249316740252e-06, "loss": 0.0001, "step": 1112 }, { "epoch": 0.07172778243217116, "grad_norm": 0.01834313036665756, "learning_rate": 9.54747272738311e-06, "loss": 0.0, "step": 1113 }, { "epoch": 0.07179222787910035, "grad_norm": 0.15668289299222576, "learning_rate": 9.548695039318467e-06, "loss": 0.0001, "step": 1114 }, { "epoch": 0.07185667332602952, "grad_norm": 0.04903259619234823, "learning_rate": 9.549916254517975e-06, "loss": 0.0001, "step": 1115 }, { "epoch": 0.07192111877295869, "grad_norm": 0.023982815736462482, "learning_rate": 9.551136374947997e-06, "loss": 0.0, "step": 1116 }, { "epoch": 0.07198556421988786, "grad_norm": 0.0032611786740441946, "learning_rate": 9.552355402569607e-06, "loss": 0.0, "step": 1117 }, { "epoch": 0.07205000966681704, "grad_norm": 0.03591521896676965, "learning_rate": 9.553573339338611e-06, "loss": 0.0001, "step": 1118 }, { "epoch": 0.07211445511374621, "grad_norm": 0.002452457086472786, "learning_rate": 9.554790187205577e-06, "loss": 0.0, "step": 1119 }, { "epoch": 0.07217890056067539, "grad_norm": 0.005269897617250751, "learning_rate": 9.55600594811584e-06, "loss": 0.0, "step": 1120 }, { "epoch": 0.07224334600760456, "grad_norm": 0.4053084576654436, "learning_rate": 9.557220624009528e-06, "loss": 0.0009, "step": 1121 }, { "epoch": 0.07230779145453374, "grad_norm": 0.04937713962457996, "learning_rate": 9.55843421682158e-06, "loss": 0.0004, "step": 1122 }, { "epoch": 0.07237223690146291, "grad_norm": 0.31367174055920827, "learning_rate": 9.559646728481764e-06, "loss": 0.0005, "step": 1123 }, { "epoch": 0.07243668234839208, "grad_norm": 0.0021878403973614306, "learning_rate": 9.560858160914693e-06, "loss": 0.0, "step": 1124 }, { "epoch": 0.07250112779532127, "grad_norm": 0.04644155774287131, "learning_rate": 9.562068516039848e-06, "loss": 0.0001, "step": 1125 }, { "epoch": 0.07256557324225044, "grad_norm": 0.005334428075335696, "learning_rate": 9.563277795771592e-06, "loss": 0.0, "step": 1126 }, { "epoch": 0.0726300186891796, "grad_norm": 0.0009123395890019158, "learning_rate": 9.564486002019189e-06, "loss": 0.0, "step": 1127 }, { "epoch": 0.07269446413610878, "grad_norm": 0.0032421208617262635, "learning_rate": 9.565693136686826e-06, "loss": 0.0, "step": 1128 }, { "epoch": 0.07275890958303796, "grad_norm": 0.20209974356677035, "learning_rate": 9.566899201673622e-06, "loss": 0.0004, "step": 1129 }, { "epoch": 0.07282335502996713, "grad_norm": 0.0017558729881317762, "learning_rate": 9.568104198873661e-06, "loss": 0.0, "step": 1130 }, { "epoch": 0.0728878004768963, "grad_norm": 0.0007344998898286816, "learning_rate": 9.569308130175994e-06, "loss": 0.0, "step": 1131 }, { "epoch": 0.07295224592382549, "grad_norm": 0.07438051013009608, "learning_rate": 9.570510997464664e-06, "loss": 0.0004, "step": 1132 }, { "epoch": 0.07301669137075466, "grad_norm": 0.05245505196256475, "learning_rate": 9.571712802618725e-06, "loss": 0.0002, "step": 1133 }, { "epoch": 0.07308113681768383, "grad_norm": 0.0009284186006608998, "learning_rate": 9.572913547512255e-06, "loss": 0.0, "step": 1134 }, { "epoch": 0.073145582264613, "grad_norm": 0.0034999118506301983, "learning_rate": 9.574113234014383e-06, "loss": 0.0, "step": 1135 }, { "epoch": 0.07321002771154218, "grad_norm": 0.07515344754801417, "learning_rate": 9.57531186398929e-06, "loss": 0.0002, "step": 1136 }, { "epoch": 0.07327447315847135, "grad_norm": 0.008738924023498379, "learning_rate": 9.576509439296242e-06, "loss": 0.0, "step": 1137 }, { "epoch": 0.07333891860540052, "grad_norm": 0.0008081805016731883, "learning_rate": 9.577705961789603e-06, "loss": 0.0, "step": 1138 }, { "epoch": 0.07340336405232971, "grad_norm": 0.03078131770414565, "learning_rate": 9.578901433318846e-06, "loss": 0.0001, "step": 1139 }, { "epoch": 0.07346780949925888, "grad_norm": 0.03254781257906103, "learning_rate": 9.580095855728579e-06, "loss": 0.0003, "step": 1140 }, { "epoch": 0.07353225494618805, "grad_norm": 0.10584629442542785, "learning_rate": 9.581289230858554e-06, "loss": 0.0018, "step": 1141 }, { "epoch": 0.07359670039311722, "grad_norm": 0.013347870485689773, "learning_rate": 9.582481560543692e-06, "loss": 0.0, "step": 1142 }, { "epoch": 0.0736611458400464, "grad_norm": 0.0043998961185891345, "learning_rate": 9.583672846614092e-06, "loss": 0.0, "step": 1143 }, { "epoch": 0.07372559128697558, "grad_norm": 0.04458813463929904, "learning_rate": 9.584863090895054e-06, "loss": 0.0001, "step": 1144 }, { "epoch": 0.07379003673390475, "grad_norm": 0.026909427188295067, "learning_rate": 9.586052295207094e-06, "loss": 0.0, "step": 1145 }, { "epoch": 0.07385448218083393, "grad_norm": 0.008740718491162603, "learning_rate": 9.587240461365958e-06, "loss": 0.0, "step": 1146 }, { "epoch": 0.0739189276277631, "grad_norm": 0.2052019199750452, "learning_rate": 9.58842759118264e-06, "loss": 0.002, "step": 1147 }, { "epoch": 0.07398337307469227, "grad_norm": 0.0039053163539573186, "learning_rate": 9.589613686463404e-06, "loss": 0.0001, "step": 1148 }, { "epoch": 0.07404781852162144, "grad_norm": 0.007556923226473166, "learning_rate": 9.590798749009793e-06, "loss": 0.0, "step": 1149 }, { "epoch": 0.07411226396855063, "grad_norm": 0.02777061566941155, "learning_rate": 9.591982780618645e-06, "loss": 0.0001, "step": 1150 }, { "epoch": 0.0741767094154798, "grad_norm": 0.3086832496171256, "learning_rate": 9.593165783082122e-06, "loss": 0.0012, "step": 1151 }, { "epoch": 0.07424115486240897, "grad_norm": 0.0005099813533373637, "learning_rate": 9.594347758187705e-06, "loss": 0.0, "step": 1152 }, { "epoch": 0.07430560030933815, "grad_norm": 0.0010335636522138616, "learning_rate": 9.59552870771823e-06, "loss": 0.0, "step": 1153 }, { "epoch": 0.07437004575626732, "grad_norm": 0.0005503441681810711, "learning_rate": 9.596708633451897e-06, "loss": 0.0, "step": 1154 }, { "epoch": 0.07443449120319649, "grad_norm": 0.29321193774320214, "learning_rate": 9.597887537162277e-06, "loss": 0.0007, "step": 1155 }, { "epoch": 0.07449893665012566, "grad_norm": 0.0003677550358705728, "learning_rate": 9.599065420618347e-06, "loss": 0.0, "step": 1156 }, { "epoch": 0.07456338209705485, "grad_norm": 0.09427091300374382, "learning_rate": 9.600242285584487e-06, "loss": 0.0001, "step": 1157 }, { "epoch": 0.07462782754398402, "grad_norm": 0.1102941329864884, "learning_rate": 9.601418133820508e-06, "loss": 0.0002, "step": 1158 }, { "epoch": 0.07469227299091319, "grad_norm": 0.0016408962246154898, "learning_rate": 9.602592967081665e-06, "loss": 0.0, "step": 1159 }, { "epoch": 0.07475671843784237, "grad_norm": 0.015343294270169556, "learning_rate": 9.603766787118666e-06, "loss": 0.0001, "step": 1160 }, { "epoch": 0.07482116388477154, "grad_norm": 0.007299553047901523, "learning_rate": 9.604939595677697e-06, "loss": 0.0001, "step": 1161 }, { "epoch": 0.07488560933170071, "grad_norm": 0.06251656417098318, "learning_rate": 9.606111394500438e-06, "loss": 0.0002, "step": 1162 }, { "epoch": 0.07495005477862988, "grad_norm": 0.005736968608914812, "learning_rate": 9.607282185324068e-06, "loss": 0.0, "step": 1163 }, { "epoch": 0.07501450022555907, "grad_norm": 0.0003761041372193296, "learning_rate": 9.608451969881285e-06, "loss": 0.0, "step": 1164 }, { "epoch": 0.07507894567248824, "grad_norm": 0.004082930690161441, "learning_rate": 9.609620749900332e-06, "loss": 0.0, "step": 1165 }, { "epoch": 0.07514339111941741, "grad_norm": 0.001868907810488472, "learning_rate": 9.610788527104996e-06, "loss": 0.0, "step": 1166 }, { "epoch": 0.07520783656634658, "grad_norm": 0.01672035118957357, "learning_rate": 9.611955303214636e-06, "loss": 0.0, "step": 1167 }, { "epoch": 0.07527228201327577, "grad_norm": 0.0003677767328121933, "learning_rate": 9.613121079944188e-06, "loss": 0.0, "step": 1168 }, { "epoch": 0.07533672746020494, "grad_norm": 0.20056341800735186, "learning_rate": 9.614285859004185e-06, "loss": 0.0018, "step": 1169 }, { "epoch": 0.0754011729071341, "grad_norm": 0.672537078225248, "learning_rate": 9.61544964210078e-06, "loss": 0.0034, "step": 1170 }, { "epoch": 0.07546561835406329, "grad_norm": 0.0004565966029069957, "learning_rate": 9.616612430935744e-06, "loss": 0.0, "step": 1171 }, { "epoch": 0.07553006380099246, "grad_norm": 0.00035533163882761317, "learning_rate": 9.617774227206492e-06, "loss": 0.0, "step": 1172 }, { "epoch": 0.07559450924792163, "grad_norm": 0.192594228075119, "learning_rate": 9.618935032606104e-06, "loss": 0.0015, "step": 1173 }, { "epoch": 0.0756589546948508, "grad_norm": 0.10144572697475107, "learning_rate": 9.620094848823319e-06, "loss": 0.0002, "step": 1174 }, { "epoch": 0.07572340014177999, "grad_norm": 0.007117030604680507, "learning_rate": 9.621253677542571e-06, "loss": 0.0, "step": 1175 }, { "epoch": 0.07578784558870916, "grad_norm": 0.0061437197610423756, "learning_rate": 9.622411520443993e-06, "loss": 0.0, "step": 1176 }, { "epoch": 0.07585229103563833, "grad_norm": 0.04613563697317982, "learning_rate": 9.623568379203435e-06, "loss": 0.0001, "step": 1177 }, { "epoch": 0.07591673648256751, "grad_norm": 0.058720324813408255, "learning_rate": 9.624724255492474e-06, "loss": 0.0001, "step": 1178 }, { "epoch": 0.07598118192949668, "grad_norm": 0.006014910631233619, "learning_rate": 9.625879150978434e-06, "loss": 0.0, "step": 1179 }, { "epoch": 0.07604562737642585, "grad_norm": 0.0006228941474103343, "learning_rate": 9.6270330673244e-06, "loss": 0.0, "step": 1180 }, { "epoch": 0.07611007282335502, "grad_norm": 0.006748228304873041, "learning_rate": 9.628186006189226e-06, "loss": 0.0, "step": 1181 }, { "epoch": 0.07617451827028421, "grad_norm": 0.5808579312109688, "learning_rate": 9.62933796922756e-06, "loss": 0.0007, "step": 1182 }, { "epoch": 0.07623896371721338, "grad_norm": 0.043795489180758776, "learning_rate": 9.630488958089847e-06, "loss": 0.0002, "step": 1183 }, { "epoch": 0.07630340916414255, "grad_norm": 0.0515382355616, "learning_rate": 9.631638974422349e-06, "loss": 0.0001, "step": 1184 }, { "epoch": 0.07636785461107173, "grad_norm": 0.031008705248851732, "learning_rate": 9.632788019867159e-06, "loss": 0.0001, "step": 1185 }, { "epoch": 0.0764323000580009, "grad_norm": 0.003564128836485725, "learning_rate": 9.63393609606222e-06, "loss": 0.0, "step": 1186 }, { "epoch": 0.07649674550493007, "grad_norm": 0.0021392358347696247, "learning_rate": 9.635083204641324e-06, "loss": 0.0, "step": 1187 }, { "epoch": 0.07656119095185925, "grad_norm": 0.0004078006923083002, "learning_rate": 9.636229347234144e-06, "loss": 0.0, "step": 1188 }, { "epoch": 0.07662563639878843, "grad_norm": 0.20509931364044576, "learning_rate": 9.637374525466229e-06, "loss": 0.0004, "step": 1189 }, { "epoch": 0.0766900818457176, "grad_norm": 0.06311748250374623, "learning_rate": 9.638518740959042e-06, "loss": 0.0004, "step": 1190 }, { "epoch": 0.07675452729264677, "grad_norm": 0.022872006103306926, "learning_rate": 9.639661995329951e-06, "loss": 0.0, "step": 1191 }, { "epoch": 0.07681897273957596, "grad_norm": 0.0007420031287808976, "learning_rate": 9.640804290192255e-06, "loss": 0.0, "step": 1192 }, { "epoch": 0.07688341818650513, "grad_norm": 0.005457022771712689, "learning_rate": 9.641945627155186e-06, "loss": 0.0, "step": 1193 }, { "epoch": 0.0769478636334343, "grad_norm": 0.008290051615120557, "learning_rate": 9.643086007823948e-06, "loss": 0.0001, "step": 1194 }, { "epoch": 0.07701230908036347, "grad_norm": 0.023992458922295828, "learning_rate": 9.644225433799694e-06, "loss": 0.0001, "step": 1195 }, { "epoch": 0.07707675452729265, "grad_norm": 0.003102753117205217, "learning_rate": 9.645363906679577e-06, "loss": 0.0, "step": 1196 }, { "epoch": 0.07714119997422182, "grad_norm": 0.058512229636302565, "learning_rate": 9.646501428056732e-06, "loss": 0.0001, "step": 1197 }, { "epoch": 0.07720564542115099, "grad_norm": 0.00014176378035999393, "learning_rate": 9.64763799952031e-06, "loss": 0.0, "step": 1198 }, { "epoch": 0.07727009086808018, "grad_norm": 0.0003108339992887321, "learning_rate": 9.64877362265548e-06, "loss": 0.0, "step": 1199 }, { "epoch": 0.07733453631500935, "grad_norm": 0.0032003990062005197, "learning_rate": 9.649908299043451e-06, "loss": 0.0, "step": 1200 }, { "epoch": 0.07739898176193852, "grad_norm": 0.5838421549541228, "learning_rate": 9.651042030261476e-06, "loss": 0.0034, "step": 1201 }, { "epoch": 0.07746342720886769, "grad_norm": 0.0008412995426426008, "learning_rate": 9.652174817882872e-06, "loss": 0.0, "step": 1202 }, { "epoch": 0.07752787265579687, "grad_norm": 9.185586311117465e-05, "learning_rate": 9.653306663477033e-06, "loss": 0.0, "step": 1203 }, { "epoch": 0.07759231810272604, "grad_norm": 0.23148305400119992, "learning_rate": 9.654437568609434e-06, "loss": 0.0019, "step": 1204 }, { "epoch": 0.07765676354965521, "grad_norm": 0.00339606142341171, "learning_rate": 9.655567534841662e-06, "loss": 0.0, "step": 1205 }, { "epoch": 0.07772120899658438, "grad_norm": 0.07896940352611387, "learning_rate": 9.656696563731408e-06, "loss": 0.0002, "step": 1206 }, { "epoch": 0.07778565444351357, "grad_norm": 0.08238037779996558, "learning_rate": 9.657824656832492e-06, "loss": 0.0002, "step": 1207 }, { "epoch": 0.07785009989044274, "grad_norm": 0.002891572609876939, "learning_rate": 9.658951815694877e-06, "loss": 0.0, "step": 1208 }, { "epoch": 0.07791454533737191, "grad_norm": 0.0007835520232595968, "learning_rate": 9.660078041864675e-06, "loss": 0.0, "step": 1209 }, { "epoch": 0.0779789907843011, "grad_norm": 0.0002144706980715807, "learning_rate": 9.661203336884163e-06, "loss": 0.0, "step": 1210 }, { "epoch": 0.07804343623123026, "grad_norm": 0.141667232412957, "learning_rate": 9.6623277022918e-06, "loss": 0.0008, "step": 1211 }, { "epoch": 0.07810788167815944, "grad_norm": 0.013447958714361321, "learning_rate": 9.66345113962223e-06, "loss": 0.0001, "step": 1212 }, { "epoch": 0.0781723271250886, "grad_norm": 0.015480042443949904, "learning_rate": 9.664573650406304e-06, "loss": 0.0, "step": 1213 }, { "epoch": 0.07823677257201779, "grad_norm": 0.023478707705058638, "learning_rate": 9.665695236171086e-06, "loss": 0.0001, "step": 1214 }, { "epoch": 0.07830121801894696, "grad_norm": 0.19705356870810878, "learning_rate": 9.666815898439866e-06, "loss": 0.0026, "step": 1215 }, { "epoch": 0.07836566346587613, "grad_norm": 0.03781223988640594, "learning_rate": 9.667935638732182e-06, "loss": 0.0, "step": 1216 }, { "epoch": 0.07843010891280532, "grad_norm": 0.08106810676990649, "learning_rate": 9.669054458563815e-06, "loss": 0.0016, "step": 1217 }, { "epoch": 0.07849455435973449, "grad_norm": 0.0032037268903943607, "learning_rate": 9.67017235944682e-06, "loss": 0.0, "step": 1218 }, { "epoch": 0.07855899980666366, "grad_norm": 0.0014909723492173954, "learning_rate": 9.67128934288952e-06, "loss": 0.0, "step": 1219 }, { "epoch": 0.07862344525359283, "grad_norm": 0.05132170474820264, "learning_rate": 9.672405410396536e-06, "loss": 0.0, "step": 1220 }, { "epoch": 0.07868789070052201, "grad_norm": 0.05448380494674192, "learning_rate": 9.673520563468786e-06, "loss": 0.0002, "step": 1221 }, { "epoch": 0.07875233614745118, "grad_norm": 0.00041079875761307306, "learning_rate": 9.674634803603501e-06, "loss": 0.0, "step": 1222 }, { "epoch": 0.07881678159438035, "grad_norm": 1.6507721048111714, "learning_rate": 9.675748132294243e-06, "loss": 0.0026, "step": 1223 }, { "epoch": 0.07888122704130954, "grad_norm": 0.0002573480081688759, "learning_rate": 9.676860551030908e-06, "loss": 0.0, "step": 1224 }, { "epoch": 0.07894567248823871, "grad_norm": 0.05452053945885145, "learning_rate": 9.677972061299738e-06, "loss": 0.0001, "step": 1225 }, { "epoch": 0.07901011793516788, "grad_norm": 0.0009042614824524367, "learning_rate": 9.679082664583347e-06, "loss": 0.0, "step": 1226 }, { "epoch": 0.07907456338209705, "grad_norm": 0.12650184384658905, "learning_rate": 9.68019236236071e-06, "loss": 0.0005, "step": 1227 }, { "epoch": 0.07913900882902623, "grad_norm": 0.0003593917699330583, "learning_rate": 9.6813011561072e-06, "loss": 0.0, "step": 1228 }, { "epoch": 0.0792034542759554, "grad_norm": 0.00784809323102259, "learning_rate": 9.682409047294576e-06, "loss": 0.0, "step": 1229 }, { "epoch": 0.07926789972288457, "grad_norm": 0.0034080110459679502, "learning_rate": 9.683516037391016e-06, "loss": 0.0, "step": 1230 }, { "epoch": 0.07933234516981376, "grad_norm": 0.0008000263937962497, "learning_rate": 9.684622127861108e-06, "loss": 0.0, "step": 1231 }, { "epoch": 0.07939679061674293, "grad_norm": 0.4572995042527525, "learning_rate": 9.685727320165879e-06, "loss": 0.0051, "step": 1232 }, { "epoch": 0.0794612360636721, "grad_norm": 0.14509120287519361, "learning_rate": 9.6868316157628e-06, "loss": 0.0004, "step": 1233 }, { "epoch": 0.07952568151060127, "grad_norm": 0.0019696790708333524, "learning_rate": 9.687935016105796e-06, "loss": 0.0, "step": 1234 }, { "epoch": 0.07959012695753045, "grad_norm": 0.2617435443577594, "learning_rate": 9.689037522645257e-06, "loss": 0.0007, "step": 1235 }, { "epoch": 0.07965457240445963, "grad_norm": 0.1585587871704741, "learning_rate": 9.690139136828051e-06, "loss": 0.0002, "step": 1236 }, { "epoch": 0.0797190178513888, "grad_norm": 0.0007139002751616745, "learning_rate": 9.69123986009754e-06, "loss": 0.0, "step": 1237 }, { "epoch": 0.07978346329831798, "grad_norm": 0.0005685128388628978, "learning_rate": 9.692339693893584e-06, "loss": 0.0, "step": 1238 }, { "epoch": 0.07984790874524715, "grad_norm": 0.00020046062008239465, "learning_rate": 9.693438639652553e-06, "loss": 0.0, "step": 1239 }, { "epoch": 0.07991235419217632, "grad_norm": 0.018666319797578546, "learning_rate": 9.694536698807345e-06, "loss": 0.0002, "step": 1240 }, { "epoch": 0.07997679963910549, "grad_norm": 0.002108454970512216, "learning_rate": 9.69563387278739e-06, "loss": 0.0, "step": 1241 }, { "epoch": 0.08004124508603468, "grad_norm": 0.005928479550208142, "learning_rate": 9.696730163018664e-06, "loss": 0.0001, "step": 1242 }, { "epoch": 0.08010569053296385, "grad_norm": 0.007256955744477934, "learning_rate": 9.697825570923702e-06, "loss": 0.0, "step": 1243 }, { "epoch": 0.08017013597989302, "grad_norm": 0.012520196155166474, "learning_rate": 9.698920097921602e-06, "loss": 0.0, "step": 1244 }, { "epoch": 0.0802345814268222, "grad_norm": 0.0049347868056927174, "learning_rate": 9.700013745428048e-06, "loss": 0.0, "step": 1245 }, { "epoch": 0.08029902687375137, "grad_norm": 0.14433231710557265, "learning_rate": 9.701106514855312e-06, "loss": 0.0002, "step": 1246 }, { "epoch": 0.08036347232068054, "grad_norm": 0.16086143413594908, "learning_rate": 9.70219840761226e-06, "loss": 0.0004, "step": 1247 }, { "epoch": 0.08042791776760971, "grad_norm": 0.022894449012242864, "learning_rate": 9.703289425104382e-06, "loss": 0.0001, "step": 1248 }, { "epoch": 0.0804923632145389, "grad_norm": 0.0004916579296406989, "learning_rate": 9.70437956873378e-06, "loss": 0.0, "step": 1249 }, { "epoch": 0.08055680866146807, "grad_norm": 0.006341910971692203, "learning_rate": 9.705468839899196e-06, "loss": 0.0, "step": 1250 }, { "epoch": 0.08062125410839724, "grad_norm": 0.000566196866322031, "learning_rate": 9.706557239996017e-06, "loss": 0.0, "step": 1251 }, { "epoch": 0.08068569955532641, "grad_norm": 4.660119618530064, "learning_rate": 9.707644770416276e-06, "loss": 0.0008, "step": 1252 }, { "epoch": 0.0807501450022556, "grad_norm": 0.0011596501754506218, "learning_rate": 9.708731432548683e-06, "loss": 0.0, "step": 1253 }, { "epoch": 0.08081459044918476, "grad_norm": 0.0002413284823318843, "learning_rate": 9.70981722777862e-06, "loss": 0.0, "step": 1254 }, { "epoch": 0.08087903589611393, "grad_norm": 0.00018585046524750463, "learning_rate": 9.71090215748815e-06, "loss": 0.0, "step": 1255 }, { "epoch": 0.08094348134304312, "grad_norm": 0.004867257547187822, "learning_rate": 9.71198622305605e-06, "loss": 0.0, "step": 1256 }, { "epoch": 0.08100792678997229, "grad_norm": 0.006004401052693547, "learning_rate": 9.713069425857785e-06, "loss": 0.0, "step": 1257 }, { "epoch": 0.08107237223690146, "grad_norm": 0.0062596726703548775, "learning_rate": 9.714151767265551e-06, "loss": 0.0, "step": 1258 }, { "epoch": 0.08113681768383063, "grad_norm": 0.0001596291398008103, "learning_rate": 9.71523324864827e-06, "loss": 0.0, "step": 1259 }, { "epoch": 0.08120126313075982, "grad_norm": 0.003137489508600633, "learning_rate": 9.716313871371605e-06, "loss": 0.0, "step": 1260 }, { "epoch": 0.08126570857768899, "grad_norm": 0.03389845337090403, "learning_rate": 9.717393636797964e-06, "loss": 0.0002, "step": 1261 }, { "epoch": 0.08133015402461816, "grad_norm": 0.0015749193613697652, "learning_rate": 9.718472546286517e-06, "loss": 0.0, "step": 1262 }, { "epoch": 0.08139459947154734, "grad_norm": 0.0016629746994158803, "learning_rate": 9.71955060119321e-06, "loss": 0.0, "step": 1263 }, { "epoch": 0.08145904491847651, "grad_norm": 0.9630747342468597, "learning_rate": 9.720627802870762e-06, "loss": 0.0016, "step": 1264 }, { "epoch": 0.08152349036540568, "grad_norm": 0.005799075443889431, "learning_rate": 9.721704152668686e-06, "loss": 0.0, "step": 1265 }, { "epoch": 0.08158793581233485, "grad_norm": 0.9226179200559602, "learning_rate": 9.722779651933294e-06, "loss": 0.0038, "step": 1266 }, { "epoch": 0.08165238125926404, "grad_norm": 0.009570234618307968, "learning_rate": 9.723854302007711e-06, "loss": 0.0, "step": 1267 }, { "epoch": 0.08171682670619321, "grad_norm": 0.007609505717263756, "learning_rate": 9.724928104231886e-06, "loss": 0.0, "step": 1268 }, { "epoch": 0.08178127215312238, "grad_norm": 0.0037044836505944105, "learning_rate": 9.72600105994259e-06, "loss": 0.0, "step": 1269 }, { "epoch": 0.08184571760005156, "grad_norm": 0.3324494028852565, "learning_rate": 9.727073170473441e-06, "loss": 0.0003, "step": 1270 }, { "epoch": 0.08191016304698073, "grad_norm": 0.22125266870947377, "learning_rate": 9.72814443715491e-06, "loss": 0.0015, "step": 1271 }, { "epoch": 0.0819746084939099, "grad_norm": 0.8368448698447373, "learning_rate": 9.729214861314324e-06, "loss": 0.0008, "step": 1272 }, { "epoch": 0.08203905394083907, "grad_norm": 0.03696941188445865, "learning_rate": 9.730284444275884e-06, "loss": 0.0, "step": 1273 }, { "epoch": 0.08210349938776826, "grad_norm": 0.09513810317509, "learning_rate": 9.73135318736067e-06, "loss": 0.0001, "step": 1274 }, { "epoch": 0.08216794483469743, "grad_norm": 0.1407959614094262, "learning_rate": 9.732421091886653e-06, "loss": 0.0001, "step": 1275 }, { "epoch": 0.0822323902816266, "grad_norm": 0.11572725150082791, "learning_rate": 9.733488159168705e-06, "loss": 0.0002, "step": 1276 }, { "epoch": 0.08229683572855578, "grad_norm": 0.20268866551076187, "learning_rate": 9.734554390518605e-06, "loss": 0.0006, "step": 1277 }, { "epoch": 0.08236128117548495, "grad_norm": 0.034702215271124964, "learning_rate": 9.735619787245053e-06, "loss": 0.0, "step": 1278 }, { "epoch": 0.08242572662241412, "grad_norm": 0.618313305814074, "learning_rate": 9.73668435065368e-06, "loss": 0.0058, "step": 1279 }, { "epoch": 0.0824901720693433, "grad_norm": 0.12879200409898806, "learning_rate": 9.737748082047053e-06, "loss": 0.002, "step": 1280 }, { "epoch": 0.08255461751627248, "grad_norm": 0.15517708649995626, "learning_rate": 9.738810982724688e-06, "loss": 0.0014, "step": 1281 }, { "epoch": 0.08261906296320165, "grad_norm": 0.022942057466187164, "learning_rate": 9.739873053983062e-06, "loss": 0.0, "step": 1282 }, { "epoch": 0.08268350841013082, "grad_norm": 0.32524165834202085, "learning_rate": 9.740934297115614e-06, "loss": 0.0001, "step": 1283 }, { "epoch": 0.08274795385706, "grad_norm": 0.017645141056179724, "learning_rate": 9.74199471341276e-06, "loss": 0.0001, "step": 1284 }, { "epoch": 0.08281239930398918, "grad_norm": 0.001701689789313395, "learning_rate": 9.743054304161912e-06, "loss": 0.0, "step": 1285 }, { "epoch": 0.08287684475091835, "grad_norm": 0.11248027509383528, "learning_rate": 9.744113070647462e-06, "loss": 0.002, "step": 1286 }, { "epoch": 0.08294129019784752, "grad_norm": 0.009134959493058783, "learning_rate": 9.745171014150819e-06, "loss": 0.0, "step": 1287 }, { "epoch": 0.0830057356447767, "grad_norm": 0.001851569614579538, "learning_rate": 9.746228135950402e-06, "loss": 0.0, "step": 1288 }, { "epoch": 0.08307018109170587, "grad_norm": 0.004553438642060542, "learning_rate": 9.747284437321652e-06, "loss": 0.0, "step": 1289 }, { "epoch": 0.08313462653863504, "grad_norm": 0.010993416473078717, "learning_rate": 9.748339919537046e-06, "loss": 0.0001, "step": 1290 }, { "epoch": 0.08319907198556421, "grad_norm": 0.046090326040599205, "learning_rate": 9.749394583866101e-06, "loss": 0.0002, "step": 1291 }, { "epoch": 0.0832635174324934, "grad_norm": 0.016645441025789916, "learning_rate": 9.750448431575384e-06, "loss": 0.0002, "step": 1292 }, { "epoch": 0.08332796287942257, "grad_norm": 0.07861484502338792, "learning_rate": 9.751501463928525e-06, "loss": 0.0002, "step": 1293 }, { "epoch": 0.08339240832635174, "grad_norm": 0.14598362209163068, "learning_rate": 9.75255368218622e-06, "loss": 0.0012, "step": 1294 }, { "epoch": 0.08345685377328092, "grad_norm": 0.005253576712949334, "learning_rate": 9.753605087606247e-06, "loss": 0.0, "step": 1295 }, { "epoch": 0.0835212992202101, "grad_norm": 0.06470684124601767, "learning_rate": 9.75465568144347e-06, "loss": 0.0001, "step": 1296 }, { "epoch": 0.08358574466713926, "grad_norm": 0.0001312365358307035, "learning_rate": 9.755705464949845e-06, "loss": 0.0, "step": 1297 }, { "epoch": 0.08365019011406843, "grad_norm": 0.009228063738194224, "learning_rate": 9.756754439374438e-06, "loss": 0.0, "step": 1298 }, { "epoch": 0.08371463556099762, "grad_norm": 0.022493899301872504, "learning_rate": 9.757802605963434e-06, "loss": 0.0001, "step": 1299 }, { "epoch": 0.08377908100792679, "grad_norm": 0.025017351223788283, "learning_rate": 9.758849965960128e-06, "loss": 0.0, "step": 1300 }, { "epoch": 0.08384352645485596, "grad_norm": 0.0033973402368583138, "learning_rate": 9.759896520604957e-06, "loss": 0.0, "step": 1301 }, { "epoch": 0.08390797190178514, "grad_norm": 0.016653590335366984, "learning_rate": 9.760942271135499e-06, "loss": 0.0001, "step": 1302 }, { "epoch": 0.08397241734871431, "grad_norm": 0.05323761764710082, "learning_rate": 9.761987218786474e-06, "loss": 0.0001, "step": 1303 }, { "epoch": 0.08403686279564349, "grad_norm": 0.00047998523145980336, "learning_rate": 9.763031364789768e-06, "loss": 0.0, "step": 1304 }, { "epoch": 0.08410130824257266, "grad_norm": 0.0012154446343361834, "learning_rate": 9.764074710374431e-06, "loss": 0.0, "step": 1305 }, { "epoch": 0.08416575368950184, "grad_norm": 0.1703871353470703, "learning_rate": 9.765117256766685e-06, "loss": 0.0003, "step": 1306 }, { "epoch": 0.08423019913643101, "grad_norm": 0.3756810798293176, "learning_rate": 9.766159005189943e-06, "loss": 0.0032, "step": 1307 }, { "epoch": 0.08429464458336018, "grad_norm": 0.0012046472375180962, "learning_rate": 9.767199956864808e-06, "loss": 0.0, "step": 1308 }, { "epoch": 0.08435909003028937, "grad_norm": 0.0008695978340420592, "learning_rate": 9.768240113009083e-06, "loss": 0.0, "step": 1309 }, { "epoch": 0.08442353547721854, "grad_norm": 0.035405811355180825, "learning_rate": 9.769279474837782e-06, "loss": 0.0003, "step": 1310 }, { "epoch": 0.08448798092414771, "grad_norm": 0.05460181932338454, "learning_rate": 9.770318043563141e-06, "loss": 0.0003, "step": 1311 }, { "epoch": 0.08455242637107688, "grad_norm": 0.0020692583491097017, "learning_rate": 9.771355820394616e-06, "loss": 0.0, "step": 1312 }, { "epoch": 0.08461687181800606, "grad_norm": 0.0029885875308376477, "learning_rate": 9.772392806538907e-06, "loss": 0.0, "step": 1313 }, { "epoch": 0.08468131726493523, "grad_norm": 0.0010176173067989608, "learning_rate": 9.773429003199951e-06, "loss": 0.0, "step": 1314 }, { "epoch": 0.0847457627118644, "grad_norm": 0.00025002265076070745, "learning_rate": 9.77446441157894e-06, "loss": 0.0, "step": 1315 }, { "epoch": 0.08481020815879359, "grad_norm": 0.011615362301719032, "learning_rate": 9.775499032874327e-06, "loss": 0.0, "step": 1316 }, { "epoch": 0.08487465360572276, "grad_norm": 0.8283102080329923, "learning_rate": 9.776532868281832e-06, "loss": 0.0065, "step": 1317 }, { "epoch": 0.08493909905265193, "grad_norm": 0.009607753739860307, "learning_rate": 9.777565918994456e-06, "loss": 0.0, "step": 1318 }, { "epoch": 0.0850035444995811, "grad_norm": 0.0008744673144066277, "learning_rate": 9.778598186202483e-06, "loss": 0.0, "step": 1319 }, { "epoch": 0.08506798994651028, "grad_norm": 0.35578931250669904, "learning_rate": 9.77962967109349e-06, "loss": 0.0015, "step": 1320 }, { "epoch": 0.08513243539343945, "grad_norm": 0.0002869815156587147, "learning_rate": 9.780660374852359e-06, "loss": 0.0, "step": 1321 }, { "epoch": 0.08519688084036862, "grad_norm": 9.750659056478101e-05, "learning_rate": 9.781690298661277e-06, "loss": 0.0, "step": 1322 }, { "epoch": 0.08526132628729781, "grad_norm": 0.0008800151837540015, "learning_rate": 9.782719443699757e-06, "loss": 0.0, "step": 1323 }, { "epoch": 0.08532577173422698, "grad_norm": 0.007283107294294607, "learning_rate": 9.783747811144632e-06, "loss": 0.0, "step": 1324 }, { "epoch": 0.08539021718115615, "grad_norm": 0.00046669696933537125, "learning_rate": 9.784775402170071e-06, "loss": 0.0, "step": 1325 }, { "epoch": 0.08545466262808532, "grad_norm": 0.011137442948644324, "learning_rate": 9.785802217947585e-06, "loss": 0.0, "step": 1326 }, { "epoch": 0.0855191080750145, "grad_norm": 0.00046858581858597265, "learning_rate": 9.786828259646038e-06, "loss": 0.0, "step": 1327 }, { "epoch": 0.08558355352194368, "grad_norm": 0.0011227359990139579, "learning_rate": 9.787853528431651e-06, "loss": 0.0, "step": 1328 }, { "epoch": 0.08564799896887285, "grad_norm": 0.005764189022396666, "learning_rate": 9.788878025468011e-06, "loss": 0.0, "step": 1329 }, { "epoch": 0.08571244441580203, "grad_norm": 0.00036195693870573616, "learning_rate": 9.78990175191608e-06, "loss": 0.0, "step": 1330 }, { "epoch": 0.0857768898627312, "grad_norm": 0.0004031809736862309, "learning_rate": 9.790924708934204e-06, "loss": 0.0, "step": 1331 }, { "epoch": 0.08584133530966037, "grad_norm": 0.12820426942731789, "learning_rate": 9.791946897678112e-06, "loss": 0.0015, "step": 1332 }, { "epoch": 0.08590578075658954, "grad_norm": 0.007201268710137027, "learning_rate": 9.792968319300942e-06, "loss": 0.0, "step": 1333 }, { "epoch": 0.08597022620351873, "grad_norm": 0.29705053711328777, "learning_rate": 9.793988974953227e-06, "loss": 0.0012, "step": 1334 }, { "epoch": 0.0860346716504479, "grad_norm": 0.048011958608229134, "learning_rate": 9.795008865782922e-06, "loss": 0.0003, "step": 1335 }, { "epoch": 0.08609911709737707, "grad_norm": 0.000588550762065899, "learning_rate": 9.796027992935399e-06, "loss": 0.0, "step": 1336 }, { "epoch": 0.08616356254430624, "grad_norm": 0.00403323898251864, "learning_rate": 9.79704635755346e-06, "loss": 0.0, "step": 1337 }, { "epoch": 0.08622800799123542, "grad_norm": 0.0770604017607048, "learning_rate": 9.798063960777342e-06, "loss": 0.0003, "step": 1338 }, { "epoch": 0.08629245343816459, "grad_norm": 0.002188687958222415, "learning_rate": 9.799080803744728e-06, "loss": 0.0, "step": 1339 }, { "epoch": 0.08635689888509376, "grad_norm": 0.0005241857617764945, "learning_rate": 9.800096887590755e-06, "loss": 0.0, "step": 1340 }, { "epoch": 0.08642134433202295, "grad_norm": 0.0007191739447359967, "learning_rate": 9.801112213448018e-06, "loss": 0.0, "step": 1341 }, { "epoch": 0.08648578977895212, "grad_norm": 0.0020778973471756966, "learning_rate": 9.802126782446577e-06, "loss": 0.0, "step": 1342 }, { "epoch": 0.08655023522588129, "grad_norm": 0.023544600776087193, "learning_rate": 9.803140595713964e-06, "loss": 0.0001, "step": 1343 }, { "epoch": 0.08661468067281046, "grad_norm": 0.005317380634932252, "learning_rate": 9.804153654375207e-06, "loss": 0.0, "step": 1344 }, { "epoch": 0.08667912611973964, "grad_norm": 0.001298663308470156, "learning_rate": 9.805165959552807e-06, "loss": 0.0, "step": 1345 }, { "epoch": 0.08674357156666881, "grad_norm": 0.001955103407708176, "learning_rate": 9.806177512366772e-06, "loss": 0.0, "step": 1346 }, { "epoch": 0.08680801701359799, "grad_norm": 0.012739390414210567, "learning_rate": 9.807188313934614e-06, "loss": 0.0, "step": 1347 }, { "epoch": 0.08687246246052717, "grad_norm": 0.0017430904771763144, "learning_rate": 9.808198365371351e-06, "loss": 0.0, "step": 1348 }, { "epoch": 0.08693690790745634, "grad_norm": 0.007015913286311483, "learning_rate": 9.80920766778953e-06, "loss": 0.0, "step": 1349 }, { "epoch": 0.08700135335438551, "grad_norm": 0.0018873731549218875, "learning_rate": 9.810216222299216e-06, "loss": 0.0, "step": 1350 }, { "epoch": 0.08706579880131468, "grad_norm": 0.0003947696754434573, "learning_rate": 9.81122403000801e-06, "loss": 0.0, "step": 1351 }, { "epoch": 0.08713024424824387, "grad_norm": 0.038484140741465445, "learning_rate": 9.81223109202106e-06, "loss": 0.0001, "step": 1352 }, { "epoch": 0.08719468969517304, "grad_norm": 0.11864357590167482, "learning_rate": 9.813237409441055e-06, "loss": 0.0003, "step": 1353 }, { "epoch": 0.0872591351421022, "grad_norm": 0.5198617615258171, "learning_rate": 9.814242983368245e-06, "loss": 0.0043, "step": 1354 }, { "epoch": 0.08732358058903139, "grad_norm": 0.06844873037905837, "learning_rate": 9.815247814900443e-06, "loss": 0.0001, "step": 1355 }, { "epoch": 0.08738802603596056, "grad_norm": 0.00017070857672494202, "learning_rate": 9.816251905133028e-06, "loss": 0.0, "step": 1356 }, { "epoch": 0.08745247148288973, "grad_norm": 0.014964520116337086, "learning_rate": 9.817255255158962e-06, "loss": 0.0001, "step": 1357 }, { "epoch": 0.0875169169298189, "grad_norm": 0.003770806539701692, "learning_rate": 9.818257866068787e-06, "loss": 0.0, "step": 1358 }, { "epoch": 0.08758136237674809, "grad_norm": 0.09473215165314121, "learning_rate": 9.81925973895064e-06, "loss": 0.0005, "step": 1359 }, { "epoch": 0.08764580782367726, "grad_norm": 0.005408603276892646, "learning_rate": 9.820260874890255e-06, "loss": 0.0, "step": 1360 }, { "epoch": 0.08771025327060643, "grad_norm": 0.0015547945886781377, "learning_rate": 9.821261274970973e-06, "loss": 0.0, "step": 1361 }, { "epoch": 0.08777469871753561, "grad_norm": 0.0015138486387163961, "learning_rate": 9.822260940273748e-06, "loss": 0.0, "step": 1362 }, { "epoch": 0.08783914416446478, "grad_norm": 0.00069763105509275, "learning_rate": 9.823259871877153e-06, "loss": 0.0015, "step": 1363 }, { "epoch": 0.08790358961139395, "grad_norm": 0.0025643315514372773, "learning_rate": 9.824258070857386e-06, "loss": 0.0, "step": 1364 }, { "epoch": 0.08796803505832312, "grad_norm": 0.009340093648465875, "learning_rate": 9.825255538288282e-06, "loss": 0.0, "step": 1365 }, { "epoch": 0.08803248050525231, "grad_norm": 0.015881523472943818, "learning_rate": 9.826252275241316e-06, "loss": 0.0001, "step": 1366 }, { "epoch": 0.08809692595218148, "grad_norm": 0.017519239632679023, "learning_rate": 9.827248282785611e-06, "loss": 0.0, "step": 1367 }, { "epoch": 0.08816137139911065, "grad_norm": 0.0016914266291122215, "learning_rate": 9.828243561987945e-06, "loss": 0.0, "step": 1368 }, { "epoch": 0.08822581684603983, "grad_norm": 0.05371591804994142, "learning_rate": 9.829238113912756e-06, "loss": 0.0002, "step": 1369 }, { "epoch": 0.088290262292969, "grad_norm": 0.002753948558404598, "learning_rate": 9.83023193962215e-06, "loss": 0.0, "step": 1370 }, { "epoch": 0.08835470773989818, "grad_norm": 0.002328304292487111, "learning_rate": 9.831225040175908e-06, "loss": 0.0, "step": 1371 }, { "epoch": 0.08841915318682735, "grad_norm": 0.001300685910040194, "learning_rate": 9.832217416631494e-06, "loss": 0.0, "step": 1372 }, { "epoch": 0.08848359863375653, "grad_norm": 0.02016763377567185, "learning_rate": 9.833209070044064e-06, "loss": 0.0, "step": 1373 }, { "epoch": 0.0885480440806857, "grad_norm": 0.0006239190779079104, "learning_rate": 9.834200001466461e-06, "loss": 0.0, "step": 1374 }, { "epoch": 0.08861248952761487, "grad_norm": 0.17247998444561874, "learning_rate": 9.835190211949236e-06, "loss": 0.0006, "step": 1375 }, { "epoch": 0.08867693497454404, "grad_norm": 0.02695488949887911, "learning_rate": 9.83617970254065e-06, "loss": 0.0, "step": 1376 }, { "epoch": 0.08874138042147323, "grad_norm": 0.004205663295906084, "learning_rate": 9.837168474286672e-06, "loss": 0.0, "step": 1377 }, { "epoch": 0.0888058258684024, "grad_norm": 0.23413007177194578, "learning_rate": 9.838156528231003e-06, "loss": 0.002, "step": 1378 }, { "epoch": 0.08887027131533157, "grad_norm": 0.010107797521254137, "learning_rate": 9.839143865415061e-06, "loss": 0.0, "step": 1379 }, { "epoch": 0.08893471676226075, "grad_norm": 0.22197280527991795, "learning_rate": 9.840130486878012e-06, "loss": 0.0034, "step": 1380 }, { "epoch": 0.08899916220918992, "grad_norm": 0.011300355431752012, "learning_rate": 9.841116393656755e-06, "loss": 0.0, "step": 1381 }, { "epoch": 0.08906360765611909, "grad_norm": 0.0006419008143487855, "learning_rate": 9.84210158678594e-06, "loss": 0.0, "step": 1382 }, { "epoch": 0.08912805310304826, "grad_norm": 0.06378269833658995, "learning_rate": 9.843086067297971e-06, "loss": 0.0001, "step": 1383 }, { "epoch": 0.08919249854997745, "grad_norm": 0.01747026315388834, "learning_rate": 9.844069836223013e-06, "loss": 0.0, "step": 1384 }, { "epoch": 0.08925694399690662, "grad_norm": 0.03605956953767996, "learning_rate": 9.845052894589003e-06, "loss": 0.0001, "step": 1385 }, { "epoch": 0.08932138944383579, "grad_norm": 0.008695684207934068, "learning_rate": 9.846035243421644e-06, "loss": 0.0, "step": 1386 }, { "epoch": 0.08938583489076497, "grad_norm": 1.3870611063691909, "learning_rate": 9.847016883744427e-06, "loss": 0.009, "step": 1387 }, { "epoch": 0.08945028033769414, "grad_norm": 0.0004034604019097972, "learning_rate": 9.847997816578631e-06, "loss": 0.0, "step": 1388 }, { "epoch": 0.08951472578462331, "grad_norm": 0.24464011404589933, "learning_rate": 9.848978042943318e-06, "loss": 0.0054, "step": 1389 }, { "epoch": 0.08957917123155248, "grad_norm": 0.006027357184420001, "learning_rate": 9.849957563855364e-06, "loss": 0.0, "step": 1390 }, { "epoch": 0.08964361667848167, "grad_norm": 0.0032428463574663727, "learning_rate": 9.85093638032944e-06, "loss": 0.0, "step": 1391 }, { "epoch": 0.08970806212541084, "grad_norm": 0.18261966601643068, "learning_rate": 9.851914493378031e-06, "loss": 0.0002, "step": 1392 }, { "epoch": 0.08977250757234001, "grad_norm": 0.019904185252961712, "learning_rate": 9.85289190401145e-06, "loss": 0.0, "step": 1393 }, { "epoch": 0.0898369530192692, "grad_norm": 0.006466688261427656, "learning_rate": 9.853868613237822e-06, "loss": 0.0, "step": 1394 }, { "epoch": 0.08990139846619837, "grad_norm": 0.2036738682902856, "learning_rate": 9.854844622063109e-06, "loss": 0.0005, "step": 1395 }, { "epoch": 0.08996584391312754, "grad_norm": 0.12498444221953549, "learning_rate": 9.855819931491116e-06, "loss": 0.0005, "step": 1396 }, { "epoch": 0.0900302893600567, "grad_norm": 0.25550481384225326, "learning_rate": 9.85679454252348e-06, "loss": 0.0002, "step": 1397 }, { "epoch": 0.09009473480698589, "grad_norm": 0.005078154637463445, "learning_rate": 9.857768456159699e-06, "loss": 0.0, "step": 1398 }, { "epoch": 0.09015918025391506, "grad_norm": 0.006486574987916103, "learning_rate": 9.85874167339712e-06, "loss": 0.0, "step": 1399 }, { "epoch": 0.09022362570084423, "grad_norm": 0.1849051672410749, "learning_rate": 9.859714195230952e-06, "loss": 0.0003, "step": 1400 }, { "epoch": 0.09028807114777342, "grad_norm": 1.157685394105611, "learning_rate": 9.860686022654276e-06, "loss": 0.0045, "step": 1401 }, { "epoch": 0.09035251659470259, "grad_norm": 0.05039861841158964, "learning_rate": 9.861657156658047e-06, "loss": 0.0002, "step": 1402 }, { "epoch": 0.09041696204163176, "grad_norm": 0.003640545123375758, "learning_rate": 9.862627598231097e-06, "loss": 0.0, "step": 1403 }, { "epoch": 0.09048140748856093, "grad_norm": 0.18785870532732124, "learning_rate": 9.863597348360145e-06, "loss": 0.0005, "step": 1404 }, { "epoch": 0.09054585293549011, "grad_norm": 0.02162690258325965, "learning_rate": 9.864566408029807e-06, "loss": 0.0, "step": 1405 }, { "epoch": 0.09061029838241928, "grad_norm": 0.3182465991267757, "learning_rate": 9.86553477822259e-06, "loss": 0.0013, "step": 1406 }, { "epoch": 0.09067474382934845, "grad_norm": 0.00506957712645276, "learning_rate": 9.866502459918909e-06, "loss": 0.0, "step": 1407 }, { "epoch": 0.09073918927627764, "grad_norm": 0.0008336997610315851, "learning_rate": 9.867469454097092e-06, "loss": 0.0, "step": 1408 }, { "epoch": 0.09080363472320681, "grad_norm": 0.044061586416188465, "learning_rate": 9.86843576173338e-06, "loss": 0.0004, "step": 1409 }, { "epoch": 0.09086808017013598, "grad_norm": 0.007409890122830501, "learning_rate": 9.869401383801938e-06, "loss": 0.0, "step": 1410 }, { "epoch": 0.09093252561706515, "grad_norm": 0.018156252708728265, "learning_rate": 9.870366321274855e-06, "loss": 0.0, "step": 1411 }, { "epoch": 0.09099697106399433, "grad_norm": 0.0006506659029600226, "learning_rate": 9.871330575122158e-06, "loss": 0.0, "step": 1412 }, { "epoch": 0.0910614165109235, "grad_norm": 0.0008328508705257589, "learning_rate": 9.872294146311813e-06, "loss": 0.0, "step": 1413 }, { "epoch": 0.09112586195785267, "grad_norm": 0.0007773650111156604, "learning_rate": 9.87325703580973e-06, "loss": 0.0, "step": 1414 }, { "epoch": 0.09119030740478186, "grad_norm": 0.0012217894950347168, "learning_rate": 9.874219244579777e-06, "loss": 0.0, "step": 1415 }, { "epoch": 0.09125475285171103, "grad_norm": 0.0480236903713464, "learning_rate": 9.875180773583766e-06, "loss": 0.0001, "step": 1416 }, { "epoch": 0.0913191982986402, "grad_norm": 0.001227121457690193, "learning_rate": 9.876141623781484e-06, "loss": 0.0, "step": 1417 }, { "epoch": 0.09138364374556937, "grad_norm": 0.004099157581286233, "learning_rate": 9.877101796130682e-06, "loss": 0.0, "step": 1418 }, { "epoch": 0.09144808919249856, "grad_norm": 0.001988160165573771, "learning_rate": 9.878061291587085e-06, "loss": 0.0, "step": 1419 }, { "epoch": 0.09151253463942773, "grad_norm": 0.00018577178633780048, "learning_rate": 9.879020111104401e-06, "loss": 0.0, "step": 1420 }, { "epoch": 0.0915769800863569, "grad_norm": 0.22095755874515524, "learning_rate": 9.87997825563432e-06, "loss": 0.0003, "step": 1421 }, { "epoch": 0.09164142553328607, "grad_norm": 2.3769321613123924, "learning_rate": 9.880935726126525e-06, "loss": 0.015, "step": 1422 }, { "epoch": 0.09170587098021525, "grad_norm": 0.018600959082779225, "learning_rate": 9.8818925235287e-06, "loss": 0.0001, "step": 1423 }, { "epoch": 0.09177031642714442, "grad_norm": 0.08022021561964109, "learning_rate": 9.882848648786525e-06, "loss": 0.0001, "step": 1424 }, { "epoch": 0.09183476187407359, "grad_norm": 0.20565971377159456, "learning_rate": 9.883804102843692e-06, "loss": 0.0002, "step": 1425 }, { "epoch": 0.09189920732100278, "grad_norm": 0.9405621745271036, "learning_rate": 9.884758886641908e-06, "loss": 0.0044, "step": 1426 }, { "epoch": 0.09196365276793195, "grad_norm": 0.0004957289890303268, "learning_rate": 9.885713001120897e-06, "loss": 0.0, "step": 1427 }, { "epoch": 0.09202809821486112, "grad_norm": 0.0017034345264256923, "learning_rate": 9.886666447218409e-06, "loss": 0.0, "step": 1428 }, { "epoch": 0.09209254366179029, "grad_norm": 0.0339874361429872, "learning_rate": 9.887619225870228e-06, "loss": 0.0001, "step": 1429 }, { "epoch": 0.09215698910871947, "grad_norm": 0.3589929540229609, "learning_rate": 9.888571338010167e-06, "loss": 0.0032, "step": 1430 }, { "epoch": 0.09222143455564864, "grad_norm": 0.005050188803306223, "learning_rate": 9.889522784570087e-06, "loss": 0.0001, "step": 1431 }, { "epoch": 0.09228588000257781, "grad_norm": 0.15730377931145592, "learning_rate": 9.890473566479896e-06, "loss": 0.0005, "step": 1432 }, { "epoch": 0.092350325449507, "grad_norm": 0.0005229967634722769, "learning_rate": 9.891423684667549e-06, "loss": 0.0, "step": 1433 }, { "epoch": 0.09241477089643617, "grad_norm": 0.005672397433659443, "learning_rate": 9.892373140059061e-06, "loss": 0.0001, "step": 1434 }, { "epoch": 0.09247921634336534, "grad_norm": 0.0002519493904518232, "learning_rate": 9.893321933578516e-06, "loss": 0.0, "step": 1435 }, { "epoch": 0.09254366179029451, "grad_norm": 0.027063703592058647, "learning_rate": 9.894270066148061e-06, "loss": 0.0001, "step": 1436 }, { "epoch": 0.0926081072372237, "grad_norm": 0.026686603489626357, "learning_rate": 9.895217538687919e-06, "loss": 0.0003, "step": 1437 }, { "epoch": 0.09267255268415286, "grad_norm": 0.000908574260126288, "learning_rate": 9.89616435211639e-06, "loss": 0.0, "step": 1438 }, { "epoch": 0.09273699813108204, "grad_norm": 0.02027503235190547, "learning_rate": 9.897110507349863e-06, "loss": 0.0, "step": 1439 }, { "epoch": 0.09280144357801122, "grad_norm": 0.019177680948524713, "learning_rate": 9.898056005302818e-06, "loss": 0.0001, "step": 1440 }, { "epoch": 0.09286588902494039, "grad_norm": 0.010158676693733085, "learning_rate": 9.899000846887821e-06, "loss": 0.0, "step": 1441 }, { "epoch": 0.09293033447186956, "grad_norm": 0.004541012965222062, "learning_rate": 9.899945033015553e-06, "loss": 0.0, "step": 1442 }, { "epoch": 0.09299477991879873, "grad_norm": 0.002712991317341899, "learning_rate": 9.900888564594789e-06, "loss": 0.0, "step": 1443 }, { "epoch": 0.09305922536572792, "grad_norm": 0.020957469761172257, "learning_rate": 9.901831442532423e-06, "loss": 0.0001, "step": 1444 }, { "epoch": 0.09312367081265709, "grad_norm": 0.00033094373180639466, "learning_rate": 9.902773667733459e-06, "loss": 0.0, "step": 1445 }, { "epoch": 0.09318811625958626, "grad_norm": 0.3864754190635272, "learning_rate": 9.90371524110103e-06, "loss": 0.0007, "step": 1446 }, { "epoch": 0.09325256170651544, "grad_norm": 0.01617892127793432, "learning_rate": 9.904656163536388e-06, "loss": 0.0001, "step": 1447 }, { "epoch": 0.09331700715344461, "grad_norm": 0.012048904136151128, "learning_rate": 9.905596435938926e-06, "loss": 0.0, "step": 1448 }, { "epoch": 0.09338145260037378, "grad_norm": 0.0011809616144378202, "learning_rate": 9.906536059206166e-06, "loss": 0.0, "step": 1449 }, { "epoch": 0.09344589804730295, "grad_norm": 0.0017681243992767163, "learning_rate": 9.907475034233778e-06, "loss": 0.0, "step": 1450 }, { "epoch": 0.09351034349423214, "grad_norm": 0.0017163327437415454, "learning_rate": 9.908413361915576e-06, "loss": 0.0, "step": 1451 }, { "epoch": 0.09357478894116131, "grad_norm": 0.01384325150685238, "learning_rate": 9.90935104314353e-06, "loss": 0.0, "step": 1452 }, { "epoch": 0.09363923438809048, "grad_norm": 0.0867655132908843, "learning_rate": 9.910288078807764e-06, "loss": 0.0001, "step": 1453 }, { "epoch": 0.09370367983501966, "grad_norm": 0.7641827369741804, "learning_rate": 9.911224469796568e-06, "loss": 0.0075, "step": 1454 }, { "epoch": 0.09376812528194883, "grad_norm": 0.060422641060364934, "learning_rate": 9.912160216996399e-06, "loss": 0.0002, "step": 1455 }, { "epoch": 0.093832570728878, "grad_norm": 2.010386913125508, "learning_rate": 9.913095321291884e-06, "loss": 0.0106, "step": 1456 }, { "epoch": 0.09389701617580717, "grad_norm": 0.006303394784412231, "learning_rate": 9.914029783565832e-06, "loss": 0.0, "step": 1457 }, { "epoch": 0.09396146162273636, "grad_norm": 0.009684100579074902, "learning_rate": 9.914963604699233e-06, "loss": 0.0, "step": 1458 }, { "epoch": 0.09402590706966553, "grad_norm": 0.008250804826870545, "learning_rate": 9.915896785571264e-06, "loss": 0.0, "step": 1459 }, { "epoch": 0.0940903525165947, "grad_norm": 0.0009466256485816172, "learning_rate": 9.9168293270593e-06, "loss": 0.0, "step": 1460 }, { "epoch": 0.09415479796352387, "grad_norm": 0.048614013439532935, "learning_rate": 9.917761230038905e-06, "loss": 0.0017, "step": 1461 }, { "epoch": 0.09421924341045305, "grad_norm": 0.01888382417348389, "learning_rate": 9.918692495383852e-06, "loss": 0.0, "step": 1462 }, { "epoch": 0.09428368885738223, "grad_norm": 0.05641691448848606, "learning_rate": 9.91962312396612e-06, "loss": 0.0002, "step": 1463 }, { "epoch": 0.0943481343043114, "grad_norm": 0.42407625379866715, "learning_rate": 9.920553116655903e-06, "loss": 0.0027, "step": 1464 }, { "epoch": 0.09441257975124058, "grad_norm": 0.010436752034464663, "learning_rate": 9.921482474321605e-06, "loss": 0.0, "step": 1465 }, { "epoch": 0.09447702519816975, "grad_norm": 0.8174559493256072, "learning_rate": 9.922411197829863e-06, "loss": 0.0033, "step": 1466 }, { "epoch": 0.09454147064509892, "grad_norm": 0.0583152390449536, "learning_rate": 9.923339288045531e-06, "loss": 0.0, "step": 1467 }, { "epoch": 0.09460591609202809, "grad_norm": 0.306076587996211, "learning_rate": 9.924266745831703e-06, "loss": 0.0071, "step": 1468 }, { "epoch": 0.09467036153895728, "grad_norm": 0.23739539088773876, "learning_rate": 9.925193572049705e-06, "loss": 0.0005, "step": 1469 }, { "epoch": 0.09473480698588645, "grad_norm": 0.250875088850879, "learning_rate": 9.926119767559105e-06, "loss": 0.0006, "step": 1470 }, { "epoch": 0.09479925243281562, "grad_norm": 0.042556311330388334, "learning_rate": 9.927045333217721e-06, "loss": 0.0, "step": 1471 }, { "epoch": 0.0948636978797448, "grad_norm": 0.03793214656428974, "learning_rate": 9.927970269881614e-06, "loss": 0.0, "step": 1472 }, { "epoch": 0.09492814332667397, "grad_norm": 0.28249577284916433, "learning_rate": 9.928894578405113e-06, "loss": 0.0012, "step": 1473 }, { "epoch": 0.09499258877360314, "grad_norm": 0.17475368245792194, "learning_rate": 9.929818259640796e-06, "loss": 0.0003, "step": 1474 }, { "epoch": 0.09505703422053231, "grad_norm": 0.1445167057348758, "learning_rate": 9.930741314439511e-06, "loss": 0.0002, "step": 1475 }, { "epoch": 0.0951214796674615, "grad_norm": 0.0236630083799695, "learning_rate": 9.931663743650381e-06, "loss": 0.0, "step": 1476 }, { "epoch": 0.09518592511439067, "grad_norm": 0.029098778789947974, "learning_rate": 9.932585548120796e-06, "loss": 0.0001, "step": 1477 }, { "epoch": 0.09525037056131984, "grad_norm": 0.009179967320519636, "learning_rate": 9.933506728696428e-06, "loss": 0.0001, "step": 1478 }, { "epoch": 0.09531481600824902, "grad_norm": 0.001304011320141148, "learning_rate": 9.934427286221235e-06, "loss": 0.0, "step": 1479 }, { "epoch": 0.0953792614551782, "grad_norm": 0.037167467586508234, "learning_rate": 9.93534722153746e-06, "loss": 0.0001, "step": 1480 }, { "epoch": 0.09544370690210736, "grad_norm": 0.004253996176845752, "learning_rate": 9.936266535485644e-06, "loss": 0.0, "step": 1481 }, { "epoch": 0.09550815234903653, "grad_norm": 0.012284348723612233, "learning_rate": 9.937185228904622e-06, "loss": 0.0, "step": 1482 }, { "epoch": 0.09557259779596572, "grad_norm": 0.23995914820594377, "learning_rate": 9.938103302631535e-06, "loss": 0.0005, "step": 1483 }, { "epoch": 0.09563704324289489, "grad_norm": 0.4843605408510047, "learning_rate": 9.939020757501826e-06, "loss": 0.0029, "step": 1484 }, { "epoch": 0.09570148868982406, "grad_norm": 0.0006049933104365288, "learning_rate": 9.939937594349256e-06, "loss": 0.0, "step": 1485 }, { "epoch": 0.09576593413675324, "grad_norm": 0.007013362420659339, "learning_rate": 9.940853814005894e-06, "loss": 0.0001, "step": 1486 }, { "epoch": 0.09583037958368242, "grad_norm": 0.057614378941506235, "learning_rate": 9.941769417302139e-06, "loss": 0.0001, "step": 1487 }, { "epoch": 0.09589482503061159, "grad_norm": 0.016872804892730364, "learning_rate": 9.942684405066712e-06, "loss": 0.0002, "step": 1488 }, { "epoch": 0.09595927047754076, "grad_norm": 0.02935959737771146, "learning_rate": 9.94359877812666e-06, "loss": 0.0002, "step": 1489 }, { "epoch": 0.09602371592446994, "grad_norm": 0.002290192727872697, "learning_rate": 9.944512537307367e-06, "loss": 0.0, "step": 1490 }, { "epoch": 0.09608816137139911, "grad_norm": 0.022548370604501426, "learning_rate": 9.945425683432555e-06, "loss": 0.0001, "step": 1491 }, { "epoch": 0.09615260681832828, "grad_norm": 0.007626670351049349, "learning_rate": 9.946338217324292e-06, "loss": 0.0, "step": 1492 }, { "epoch": 0.09621705226525747, "grad_norm": 0.006175822573090903, "learning_rate": 9.94725013980299e-06, "loss": 0.0001, "step": 1493 }, { "epoch": 0.09628149771218664, "grad_norm": 0.038742508647196895, "learning_rate": 9.948161451687416e-06, "loss": 0.0004, "step": 1494 }, { "epoch": 0.09634594315911581, "grad_norm": 0.0005905164246859292, "learning_rate": 9.949072153794689e-06, "loss": 0.0, "step": 1495 }, { "epoch": 0.09641038860604498, "grad_norm": 0.131122069226978, "learning_rate": 9.949982246940294e-06, "loss": 0.0005, "step": 1496 }, { "epoch": 0.09647483405297416, "grad_norm": 0.0005022347779479968, "learning_rate": 9.950891731938082e-06, "loss": 0.0, "step": 1497 }, { "epoch": 0.09653927949990333, "grad_norm": 0.0001437653529467265, "learning_rate": 9.951800609600263e-06, "loss": 0.0, "step": 1498 }, { "epoch": 0.0966037249468325, "grad_norm": 0.0010122622831039668, "learning_rate": 9.952708880737434e-06, "loss": 0.0, "step": 1499 }, { "epoch": 0.09666817039376169, "grad_norm": 0.33948919282434475, "learning_rate": 9.953616546158563e-06, "loss": 0.0024, "step": 1500 }, { "epoch": 0.09673261584069086, "grad_norm": 0.003581175820871328, "learning_rate": 9.954523606671003e-06, "loss": 0.0, "step": 1501 }, { "epoch": 0.09679706128762003, "grad_norm": 0.00042302368364188466, "learning_rate": 9.955430063080492e-06, "loss": 0.0, "step": 1502 }, { "epoch": 0.0968615067345492, "grad_norm": 0.0017871230098158303, "learning_rate": 9.956335916191164e-06, "loss": 0.0, "step": 1503 }, { "epoch": 0.09692595218147838, "grad_norm": 0.0777799107536665, "learning_rate": 9.95724116680554e-06, "loss": 0.0005, "step": 1504 }, { "epoch": 0.09699039762840755, "grad_norm": 0.0012527611164778536, "learning_rate": 9.958145815724548e-06, "loss": 0.0, "step": 1505 }, { "epoch": 0.09705484307533672, "grad_norm": 0.24797657097956194, "learning_rate": 9.95904986374752e-06, "loss": 0.0009, "step": 1506 }, { "epoch": 0.0971192885222659, "grad_norm": 0.0014533547584727054, "learning_rate": 9.95995331167219e-06, "loss": 0.0, "step": 1507 }, { "epoch": 0.09718373396919508, "grad_norm": 0.0015967057251301884, "learning_rate": 9.96085616029471e-06, "loss": 0.0, "step": 1508 }, { "epoch": 0.09724817941612425, "grad_norm": 0.004208869780398821, "learning_rate": 9.961758410409647e-06, "loss": 0.0, "step": 1509 }, { "epoch": 0.09731262486305342, "grad_norm": 0.01696486413837203, "learning_rate": 9.96266006280999e-06, "loss": 0.0, "step": 1510 }, { "epoch": 0.0973770703099826, "grad_norm": 0.0018957704558056011, "learning_rate": 9.963561118287149e-06, "loss": 0.0, "step": 1511 }, { "epoch": 0.09744151575691178, "grad_norm": 0.0040607474789808124, "learning_rate": 9.96446157763097e-06, "loss": 0.0, "step": 1512 }, { "epoch": 0.09750596120384095, "grad_norm": 0.0011904989553570007, "learning_rate": 9.965361441629727e-06, "loss": 0.0, "step": 1513 }, { "epoch": 0.09757040665077012, "grad_norm": 0.01142907492530569, "learning_rate": 9.966260711070133e-06, "loss": 0.0001, "step": 1514 }, { "epoch": 0.0976348520976993, "grad_norm": 0.0046347557260846255, "learning_rate": 9.967159386737342e-06, "loss": 0.0, "step": 1515 }, { "epoch": 0.09769929754462847, "grad_norm": 0.0010111396870745295, "learning_rate": 9.968057469414957e-06, "loss": 0.0, "step": 1516 }, { "epoch": 0.09776374299155764, "grad_norm": 0.0036916821478951735, "learning_rate": 9.968954959885024e-06, "loss": 0.0, "step": 1517 }, { "epoch": 0.09782818843848683, "grad_norm": 0.0547688530482111, "learning_rate": 9.969851858928051e-06, "loss": 0.0003, "step": 1518 }, { "epoch": 0.097892633885416, "grad_norm": 0.00326234239409605, "learning_rate": 9.970748167323e-06, "loss": 0.0, "step": 1519 }, { "epoch": 0.09795707933234517, "grad_norm": 0.004320389485947813, "learning_rate": 9.971643885847294e-06, "loss": 0.0, "step": 1520 }, { "epoch": 0.09802152477927434, "grad_norm": 0.002041762479109093, "learning_rate": 9.972539015276823e-06, "loss": 0.0, "step": 1521 }, { "epoch": 0.09808597022620352, "grad_norm": 0.06881354887199352, "learning_rate": 9.97343355638595e-06, "loss": 0.0001, "step": 1522 }, { "epoch": 0.0981504156731327, "grad_norm": 0.06540829750124166, "learning_rate": 9.974327509947506e-06, "loss": 0.0001, "step": 1523 }, { "epoch": 0.09821486112006186, "grad_norm": 0.00757338516581833, "learning_rate": 9.975220876732808e-06, "loss": 0.0, "step": 1524 }, { "epoch": 0.09827930656699105, "grad_norm": 0.008557299181676596, "learning_rate": 9.97611365751165e-06, "loss": 0.0001, "step": 1525 }, { "epoch": 0.09834375201392022, "grad_norm": 0.0004520848970033896, "learning_rate": 9.977005853052309e-06, "loss": 0.0, "step": 1526 }, { "epoch": 0.09840819746084939, "grad_norm": 0.0021838000738325022, "learning_rate": 9.977897464121563e-06, "loss": 0.0, "step": 1527 }, { "epoch": 0.09847264290777856, "grad_norm": 0.003258614152280576, "learning_rate": 9.978788491484673e-06, "loss": 0.0, "step": 1528 }, { "epoch": 0.09853708835470774, "grad_norm": 0.0015698688676021998, "learning_rate": 9.979678935905403e-06, "loss": 0.0, "step": 1529 }, { "epoch": 0.09860153380163691, "grad_norm": 0.01185302928350965, "learning_rate": 9.98056879814602e-06, "loss": 0.0001, "step": 1530 }, { "epoch": 0.09866597924856609, "grad_norm": 0.001710585233948929, "learning_rate": 9.981458078967295e-06, "loss": 0.0, "step": 1531 }, { "epoch": 0.09873042469549527, "grad_norm": 0.0593000578214577, "learning_rate": 9.982346779128506e-06, "loss": 0.0001, "step": 1532 }, { "epoch": 0.09879487014242444, "grad_norm": 0.0037362333247285434, "learning_rate": 9.983234899387453e-06, "loss": 0.0, "step": 1533 }, { "epoch": 0.09885931558935361, "grad_norm": 0.0004543681892485397, "learning_rate": 9.984122440500443e-06, "loss": 0.0, "step": 1534 }, { "epoch": 0.09892376103628278, "grad_norm": 9.713128271191101e-05, "learning_rate": 9.985009403222313e-06, "loss": 0.0, "step": 1535 }, { "epoch": 0.09898820648321197, "grad_norm": 0.038945052790278024, "learning_rate": 9.98589578830642e-06, "loss": 0.0002, "step": 1536 }, { "epoch": 0.09905265193014114, "grad_norm": 0.00021423168057826362, "learning_rate": 9.98678159650465e-06, "loss": 0.0, "step": 1537 }, { "epoch": 0.0991170973770703, "grad_norm": 0.017880657638670156, "learning_rate": 9.987666828567429e-06, "loss": 0.0001, "step": 1538 }, { "epoch": 0.09918154282399949, "grad_norm": 0.01319963089075672, "learning_rate": 9.98855148524371e-06, "loss": 0.0, "step": 1539 }, { "epoch": 0.09924598827092866, "grad_norm": 0.0003586439757768937, "learning_rate": 9.989435567280993e-06, "loss": 0.0, "step": 1540 }, { "epoch": 0.09931043371785783, "grad_norm": 0.36412274922213167, "learning_rate": 9.990319075425318e-06, "loss": 0.0026, "step": 1541 }, { "epoch": 0.099374879164787, "grad_norm": 0.24167075712069122, "learning_rate": 9.991202010421277e-06, "loss": 0.0021, "step": 1542 }, { "epoch": 0.09943932461171619, "grad_norm": 0.008199802468034723, "learning_rate": 9.992084373012014e-06, "loss": 0.0001, "step": 1543 }, { "epoch": 0.09950377005864536, "grad_norm": 0.08797115768979515, "learning_rate": 9.992966163939223e-06, "loss": 0.0006, "step": 1544 }, { "epoch": 0.09956821550557453, "grad_norm": 0.00023174358708018033, "learning_rate": 9.993847383943163e-06, "loss": 0.0, "step": 1545 }, { "epoch": 0.0996326609525037, "grad_norm": 0.11313614101504088, "learning_rate": 9.994728033762655e-06, "loss": 0.0002, "step": 1546 }, { "epoch": 0.09969710639943288, "grad_norm": 0.07457094638349225, "learning_rate": 9.995608114135087e-06, "loss": 0.0002, "step": 1547 }, { "epoch": 0.09976155184636205, "grad_norm": 0.012721014483883464, "learning_rate": 9.996487625796414e-06, "loss": 0.0001, "step": 1548 }, { "epoch": 0.09982599729329122, "grad_norm": 0.0012972554044241161, "learning_rate": 9.997366569481168e-06, "loss": 0.0, "step": 1549 }, { "epoch": 0.09989044274022041, "grad_norm": 0.0043032076280890485, "learning_rate": 9.998244945922459e-06, "loss": 0.0, "step": 1550 }, { "epoch": 0.09995488818714958, "grad_norm": 0.0019208322998490745, "learning_rate": 9.999122755851977e-06, "loss": 0.0, "step": 1551 }, { "epoch": 0.10001933363407875, "grad_norm": 0.0025537528524414454, "learning_rate": 1e-05, "loss": 0.0, "step": 1552 }, { "epoch": 0.10008377908100792, "grad_norm": 0.01819473357393409, "learning_rate": 1e-05, "loss": 0.0001, "step": 1553 }, { "epoch": 0.1001482245279371, "grad_norm": 0.023582341253408183, "learning_rate": 9.999283924095955e-06, "loss": 0.0001, "step": 1554 }, { "epoch": 0.10021266997486628, "grad_norm": 0.0006501829399390779, "learning_rate": 9.99856784819191e-06, "loss": 0.0, "step": 1555 }, { "epoch": 0.10027711542179545, "grad_norm": 0.0045095002079804025, "learning_rate": 9.997851772287864e-06, "loss": 0.0, "step": 1556 }, { "epoch": 0.10034156086872463, "grad_norm": 0.0048714429017706305, "learning_rate": 9.997135696383818e-06, "loss": 0.0, "step": 1557 }, { "epoch": 0.1004060063156538, "grad_norm": 0.06844209026375658, "learning_rate": 9.99641962047977e-06, "loss": 0.0002, "step": 1558 }, { "epoch": 0.10047045176258297, "grad_norm": 0.011684224684139077, "learning_rate": 9.995703544575725e-06, "loss": 0.0, "step": 1559 }, { "epoch": 0.10053489720951214, "grad_norm": 0.02074485054202999, "learning_rate": 9.99498746867168e-06, "loss": 0.0, "step": 1560 }, { "epoch": 0.10059934265644133, "grad_norm": 0.0688557315716873, "learning_rate": 9.994271392767634e-06, "loss": 0.0002, "step": 1561 }, { "epoch": 0.1006637881033705, "grad_norm": 0.04624153556322279, "learning_rate": 9.993555316863588e-06, "loss": 0.0001, "step": 1562 }, { "epoch": 0.10072823355029967, "grad_norm": 0.008817848692303166, "learning_rate": 9.992839240959542e-06, "loss": 0.0, "step": 1563 }, { "epoch": 0.10079267899722885, "grad_norm": 0.41477144476881117, "learning_rate": 9.992123165055496e-06, "loss": 0.0025, "step": 1564 }, { "epoch": 0.10085712444415802, "grad_norm": 0.0028627288818668436, "learning_rate": 9.99140708915145e-06, "loss": 0.0, "step": 1565 }, { "epoch": 0.10092156989108719, "grad_norm": 0.8068578764666766, "learning_rate": 9.990691013247405e-06, "loss": 0.0037, "step": 1566 }, { "epoch": 0.10098601533801636, "grad_norm": 0.00030946291876683197, "learning_rate": 9.98997493734336e-06, "loss": 0.0, "step": 1567 }, { "epoch": 0.10105046078494555, "grad_norm": 0.510590744176873, "learning_rate": 9.989258861439314e-06, "loss": 0.0015, "step": 1568 }, { "epoch": 0.10111490623187472, "grad_norm": 0.005179627251681472, "learning_rate": 9.988542785535268e-06, "loss": 0.0, "step": 1569 }, { "epoch": 0.10117935167880389, "grad_norm": 0.5898086335295064, "learning_rate": 9.987826709631222e-06, "loss": 0.0034, "step": 1570 }, { "epoch": 0.10124379712573307, "grad_norm": 0.06419531237883218, "learning_rate": 9.987110633727177e-06, "loss": 0.0002, "step": 1571 }, { "epoch": 0.10130824257266224, "grad_norm": 0.015524767153565688, "learning_rate": 9.98639455782313e-06, "loss": 0.0001, "step": 1572 }, { "epoch": 0.10137268801959141, "grad_norm": 0.02811805669194153, "learning_rate": 9.985678481919085e-06, "loss": 0.0002, "step": 1573 }, { "epoch": 0.10143713346652058, "grad_norm": 0.06079880538116993, "learning_rate": 9.984962406015038e-06, "loss": 0.0016, "step": 1574 }, { "epoch": 0.10150157891344977, "grad_norm": 0.021396310851056387, "learning_rate": 9.984246330110992e-06, "loss": 0.0001, "step": 1575 }, { "epoch": 0.10156602436037894, "grad_norm": 0.00487446275950087, "learning_rate": 9.983530254206946e-06, "loss": 0.0, "step": 1576 }, { "epoch": 0.10163046980730811, "grad_norm": 0.001861853412206755, "learning_rate": 9.9828141783029e-06, "loss": 0.0, "step": 1577 }, { "epoch": 0.1016949152542373, "grad_norm": 0.13226993615728455, "learning_rate": 9.982098102398855e-06, "loss": 0.0002, "step": 1578 }, { "epoch": 0.10175936070116647, "grad_norm": 0.6347721275583404, "learning_rate": 9.98138202649481e-06, "loss": 0.0029, "step": 1579 }, { "epoch": 0.10182380614809564, "grad_norm": 0.12557575131126444, "learning_rate": 9.980665950590763e-06, "loss": 0.0003, "step": 1580 }, { "epoch": 0.1018882515950248, "grad_norm": 0.15937583048922008, "learning_rate": 9.979949874686718e-06, "loss": 0.0014, "step": 1581 }, { "epoch": 0.10195269704195399, "grad_norm": 0.1975559703975177, "learning_rate": 9.979233798782672e-06, "loss": 0.0008, "step": 1582 }, { "epoch": 0.10201714248888316, "grad_norm": 0.18503193927540512, "learning_rate": 9.978517722878626e-06, "loss": 0.0001, "step": 1583 }, { "epoch": 0.10208158793581233, "grad_norm": 0.018288625469601905, "learning_rate": 9.977801646974579e-06, "loss": 0.0, "step": 1584 }, { "epoch": 0.10214603338274152, "grad_norm": 0.06556653898587625, "learning_rate": 9.977085571070533e-06, "loss": 0.0016, "step": 1585 }, { "epoch": 0.10221047882967069, "grad_norm": 0.12746553448387646, "learning_rate": 9.976369495166488e-06, "loss": 0.0, "step": 1586 }, { "epoch": 0.10227492427659986, "grad_norm": 1.126843214046914, "learning_rate": 9.975653419262442e-06, "loss": 0.0058, "step": 1587 }, { "epoch": 0.10233936972352903, "grad_norm": 0.0009175100005660954, "learning_rate": 9.974937343358396e-06, "loss": 0.0, "step": 1588 }, { "epoch": 0.10240381517045821, "grad_norm": 0.025905761639659113, "learning_rate": 9.974221267454352e-06, "loss": 0.0, "step": 1589 }, { "epoch": 0.10246826061738738, "grad_norm": 0.001112717209841313, "learning_rate": 9.973505191550305e-06, "loss": 0.0, "step": 1590 }, { "epoch": 0.10253270606431655, "grad_norm": 0.007526676600103704, "learning_rate": 9.972789115646259e-06, "loss": 0.0, "step": 1591 }, { "epoch": 0.10259715151124572, "grad_norm": 0.07737540406489828, "learning_rate": 9.972073039742213e-06, "loss": 0.0001, "step": 1592 }, { "epoch": 0.10266159695817491, "grad_norm": 0.15650133541903946, "learning_rate": 9.971356963838168e-06, "loss": 0.0002, "step": 1593 }, { "epoch": 0.10272604240510408, "grad_norm": 0.25322032122670035, "learning_rate": 9.970640887934122e-06, "loss": 0.0025, "step": 1594 }, { "epoch": 0.10279048785203325, "grad_norm": 0.3750155962243933, "learning_rate": 9.969924812030076e-06, "loss": 0.0039, "step": 1595 }, { "epoch": 0.10285493329896243, "grad_norm": 0.01156925484668394, "learning_rate": 9.96920873612603e-06, "loss": 0.0015, "step": 1596 }, { "epoch": 0.1029193787458916, "grad_norm": 0.0010545124652149038, "learning_rate": 9.968492660221985e-06, "loss": 0.0, "step": 1597 }, { "epoch": 0.10298382419282077, "grad_norm": 0.029305482633034747, "learning_rate": 9.96777658431794e-06, "loss": 0.0, "step": 1598 }, { "epoch": 0.10304826963974995, "grad_norm": 0.0017090864848175315, "learning_rate": 9.967060508413893e-06, "loss": 0.0, "step": 1599 }, { "epoch": 0.10311271508667913, "grad_norm": 0.05317997095655382, "learning_rate": 9.966344432509846e-06, "loss": 0.0001, "step": 1600 }, { "epoch": 0.1031771605336083, "grad_norm": 0.22688678027798553, "learning_rate": 9.9656283566058e-06, "loss": 0.0026, "step": 1601 }, { "epoch": 0.10324160598053747, "grad_norm": 0.30919653532738356, "learning_rate": 9.964912280701755e-06, "loss": 0.0004, "step": 1602 }, { "epoch": 0.10330605142746666, "grad_norm": 0.014351292654744227, "learning_rate": 9.964196204797709e-06, "loss": 0.0, "step": 1603 }, { "epoch": 0.10337049687439583, "grad_norm": 0.010907906211386922, "learning_rate": 9.963480128893663e-06, "loss": 0.0, "step": 1604 }, { "epoch": 0.103434942321325, "grad_norm": 0.002965178042445306, "learning_rate": 9.962764052989618e-06, "loss": 0.0, "step": 1605 }, { "epoch": 0.10349938776825417, "grad_norm": 0.003496357209876648, "learning_rate": 9.962047977085572e-06, "loss": 0.0, "step": 1606 }, { "epoch": 0.10356383321518335, "grad_norm": 0.13440704324049624, "learning_rate": 9.961331901181526e-06, "loss": 0.0008, "step": 1607 }, { "epoch": 0.10362827866211252, "grad_norm": 0.013390774687813115, "learning_rate": 9.96061582527748e-06, "loss": 0.0016, "step": 1608 }, { "epoch": 0.10369272410904169, "grad_norm": 0.0007995069776809722, "learning_rate": 9.959899749373435e-06, "loss": 0.0, "step": 1609 }, { "epoch": 0.10375716955597088, "grad_norm": 0.0002824253997624104, "learning_rate": 9.959183673469387e-06, "loss": 0.0, "step": 1610 }, { "epoch": 0.10382161500290005, "grad_norm": 0.019146714592035718, "learning_rate": 9.958467597565342e-06, "loss": 0.0, "step": 1611 }, { "epoch": 0.10388606044982922, "grad_norm": 0.3344483081280959, "learning_rate": 9.957751521661298e-06, "loss": 0.0014, "step": 1612 }, { "epoch": 0.10395050589675839, "grad_norm": 0.007686443658249208, "learning_rate": 9.957035445757252e-06, "loss": 0.0016, "step": 1613 }, { "epoch": 0.10401495134368757, "grad_norm": 0.004736729924681449, "learning_rate": 9.956319369853206e-06, "loss": 0.0001, "step": 1614 }, { "epoch": 0.10407939679061674, "grad_norm": 0.2593175709583272, "learning_rate": 9.95560329394916e-06, "loss": 0.0045, "step": 1615 }, { "epoch": 0.10414384223754591, "grad_norm": 1.196787952016555, "learning_rate": 9.954887218045113e-06, "loss": 0.0024, "step": 1616 }, { "epoch": 0.1042082876844751, "grad_norm": 0.00788326272426621, "learning_rate": 9.954171142141067e-06, "loss": 0.0001, "step": 1617 }, { "epoch": 0.10427273313140427, "grad_norm": 0.004238651176470014, "learning_rate": 9.953455066237022e-06, "loss": 0.0, "step": 1618 }, { "epoch": 0.10433717857833344, "grad_norm": 0.006960126069193266, "learning_rate": 9.952738990332976e-06, "loss": 0.0001, "step": 1619 }, { "epoch": 0.10440162402526261, "grad_norm": 0.3046891750387463, "learning_rate": 9.95202291442893e-06, "loss": 0.0005, "step": 1620 }, { "epoch": 0.1044660694721918, "grad_norm": 0.19090559741277238, "learning_rate": 9.951306838524885e-06, "loss": 0.0003, "step": 1621 }, { "epoch": 0.10453051491912096, "grad_norm": 0.12862948849226613, "learning_rate": 9.950590762620839e-06, "loss": 0.0009, "step": 1622 }, { "epoch": 0.10459496036605014, "grad_norm": 0.000399543694424654, "learning_rate": 9.949874686716793e-06, "loss": 0.0, "step": 1623 }, { "epoch": 0.10465940581297932, "grad_norm": 0.02006041245888554, "learning_rate": 9.949158610812748e-06, "loss": 0.0001, "step": 1624 }, { "epoch": 0.10472385125990849, "grad_norm": 0.031908140114484264, "learning_rate": 9.948442534908702e-06, "loss": 0.0001, "step": 1625 }, { "epoch": 0.10478829670683766, "grad_norm": 0.0009540502284184393, "learning_rate": 9.947726459004654e-06, "loss": 0.0, "step": 1626 }, { "epoch": 0.10485274215376683, "grad_norm": 0.0008386468327602505, "learning_rate": 9.947010383100609e-06, "loss": 0.0, "step": 1627 }, { "epoch": 0.10491718760069602, "grad_norm": 0.00263780282117747, "learning_rate": 9.946294307196563e-06, "loss": 0.0, "step": 1628 }, { "epoch": 0.10498163304762519, "grad_norm": 0.006001984598497469, "learning_rate": 9.945578231292517e-06, "loss": 0.0001, "step": 1629 }, { "epoch": 0.10504607849455436, "grad_norm": 0.00013517572587163083, "learning_rate": 9.944862155388472e-06, "loss": 0.0, "step": 1630 }, { "epoch": 0.10511052394148353, "grad_norm": 0.03848265845278273, "learning_rate": 9.944146079484426e-06, "loss": 0.0001, "step": 1631 }, { "epoch": 0.10517496938841271, "grad_norm": 0.022167288196300997, "learning_rate": 9.94343000358038e-06, "loss": 0.0001, "step": 1632 }, { "epoch": 0.10523941483534188, "grad_norm": 0.002821403357723632, "learning_rate": 9.942713927676335e-06, "loss": 0.0, "step": 1633 }, { "epoch": 0.10530386028227105, "grad_norm": 1.4247583351674593, "learning_rate": 9.941997851772289e-06, "loss": 0.0085, "step": 1634 }, { "epoch": 0.10536830572920024, "grad_norm": 0.21941276183174943, "learning_rate": 9.941281775868243e-06, "loss": 0.0018, "step": 1635 }, { "epoch": 0.10543275117612941, "grad_norm": 0.12458575422455666, "learning_rate": 9.940565699964197e-06, "loss": 0.0025, "step": 1636 }, { "epoch": 0.10549719662305858, "grad_norm": 0.0027435063651743657, "learning_rate": 9.939849624060152e-06, "loss": 0.0, "step": 1637 }, { "epoch": 0.10556164206998775, "grad_norm": 0.6987531166484844, "learning_rate": 9.939133548156106e-06, "loss": 0.0022, "step": 1638 }, { "epoch": 0.10562608751691693, "grad_norm": 0.02080380749067857, "learning_rate": 9.93841747225206e-06, "loss": 0.0001, "step": 1639 }, { "epoch": 0.1056905329638461, "grad_norm": 0.04035038836258241, "learning_rate": 9.937701396348015e-06, "loss": 0.0001, "step": 1640 }, { "epoch": 0.10575497841077527, "grad_norm": 0.003756669122280707, "learning_rate": 9.936985320443969e-06, "loss": 0.0, "step": 1641 }, { "epoch": 0.10581942385770446, "grad_norm": 0.009743641159397206, "learning_rate": 9.936269244539922e-06, "loss": 0.0001, "step": 1642 }, { "epoch": 0.10588386930463363, "grad_norm": 0.06777027930780727, "learning_rate": 9.935553168635876e-06, "loss": 0.0004, "step": 1643 }, { "epoch": 0.1059483147515628, "grad_norm": 0.005226804001337679, "learning_rate": 9.93483709273183e-06, "loss": 0.0, "step": 1644 }, { "epoch": 0.10601276019849197, "grad_norm": 0.015324877669177634, "learning_rate": 9.934121016827784e-06, "loss": 0.0001, "step": 1645 }, { "epoch": 0.10607720564542115, "grad_norm": 0.0344477850831631, "learning_rate": 9.933404940923739e-06, "loss": 0.0003, "step": 1646 }, { "epoch": 0.10614165109235033, "grad_norm": 0.07882127139899352, "learning_rate": 9.932688865019693e-06, "loss": 0.0001, "step": 1647 }, { "epoch": 0.1062060965392795, "grad_norm": 0.053166576465501644, "learning_rate": 9.931972789115647e-06, "loss": 0.0004, "step": 1648 }, { "epoch": 0.10627054198620868, "grad_norm": 0.0001717063849772211, "learning_rate": 9.931256713211602e-06, "loss": 0.0, "step": 1649 }, { "epoch": 0.10633498743313785, "grad_norm": 0.009019181123145955, "learning_rate": 9.930540637307556e-06, "loss": 0.0002, "step": 1650 }, { "epoch": 0.10639943288006702, "grad_norm": 0.04910897784280347, "learning_rate": 9.929824561403509e-06, "loss": 0.0002, "step": 1651 }, { "epoch": 0.10646387832699619, "grad_norm": 0.011014606916717328, "learning_rate": 9.929108485499463e-06, "loss": 0.0001, "step": 1652 }, { "epoch": 0.10652832377392538, "grad_norm": 0.14360992510726237, "learning_rate": 9.928392409595417e-06, "loss": 0.0011, "step": 1653 }, { "epoch": 0.10659276922085455, "grad_norm": 0.49932092507531767, "learning_rate": 9.927676333691371e-06, "loss": 0.0015, "step": 1654 }, { "epoch": 0.10665721466778372, "grad_norm": 0.006795758715444581, "learning_rate": 9.926960257787326e-06, "loss": 0.0001, "step": 1655 }, { "epoch": 0.1067216601147129, "grad_norm": 0.001934329060949019, "learning_rate": 9.92624418188328e-06, "loss": 0.0, "step": 1656 }, { "epoch": 0.10678610556164207, "grad_norm": 0.09470259963083237, "learning_rate": 9.925528105979234e-06, "loss": 0.0003, "step": 1657 }, { "epoch": 0.10685055100857124, "grad_norm": 0.02276080609148665, "learning_rate": 9.924812030075189e-06, "loss": 0.0002, "step": 1658 }, { "epoch": 0.10691499645550041, "grad_norm": 0.004617006557617145, "learning_rate": 9.924095954171143e-06, "loss": 0.0, "step": 1659 }, { "epoch": 0.1069794419024296, "grad_norm": 0.008264148745958573, "learning_rate": 9.923379878267097e-06, "loss": 0.0, "step": 1660 }, { "epoch": 0.10704388734935877, "grad_norm": 0.42388602709453416, "learning_rate": 9.922663802363052e-06, "loss": 0.003, "step": 1661 }, { "epoch": 0.10710833279628794, "grad_norm": 0.022281854083548366, "learning_rate": 9.921947726459006e-06, "loss": 0.0003, "step": 1662 }, { "epoch": 0.10717277824321712, "grad_norm": 0.024515886161469536, "learning_rate": 9.92123165055496e-06, "loss": 0.0001, "step": 1663 }, { "epoch": 0.1072372236901463, "grad_norm": 0.004597568827487063, "learning_rate": 9.920515574650914e-06, "loss": 0.0001, "step": 1664 }, { "epoch": 0.10730166913707546, "grad_norm": 0.020596066669992474, "learning_rate": 9.919799498746869e-06, "loss": 0.0001, "step": 1665 }, { "epoch": 0.10736611458400463, "grad_norm": 0.17759965674602354, "learning_rate": 9.919083422842823e-06, "loss": 0.0013, "step": 1666 }, { "epoch": 0.10743056003093382, "grad_norm": 0.018403341874534827, "learning_rate": 9.918367346938776e-06, "loss": 0.0002, "step": 1667 }, { "epoch": 0.10749500547786299, "grad_norm": 2.1243720529282752, "learning_rate": 9.91765127103473e-06, "loss": 0.011, "step": 1668 }, { "epoch": 0.10755945092479216, "grad_norm": 0.009084960426039212, "learning_rate": 9.916935195130684e-06, "loss": 0.0001, "step": 1669 }, { "epoch": 0.10762389637172134, "grad_norm": 0.0387725830078125, "learning_rate": 9.916219119226639e-06, "loss": 0.0003, "step": 1670 }, { "epoch": 0.10768834181865052, "grad_norm": 0.22734220117519863, "learning_rate": 9.915503043322593e-06, "loss": 0.0006, "step": 1671 }, { "epoch": 0.10775278726557969, "grad_norm": 0.839883847721016, "learning_rate": 9.914786967418547e-06, "loss": 0.0105, "step": 1672 }, { "epoch": 0.10781723271250886, "grad_norm": 0.0015275560043602424, "learning_rate": 9.914070891514501e-06, "loss": 0.0, "step": 1673 }, { "epoch": 0.10788167815943804, "grad_norm": 0.2546175456952681, "learning_rate": 9.913354815610456e-06, "loss": 0.0021, "step": 1674 }, { "epoch": 0.10794612360636721, "grad_norm": 0.27017822568966937, "learning_rate": 9.91263873970641e-06, "loss": 0.0007, "step": 1675 }, { "epoch": 0.10801056905329638, "grad_norm": 0.020131666449685264, "learning_rate": 9.911922663802364e-06, "loss": 0.0001, "step": 1676 }, { "epoch": 0.10807501450022555, "grad_norm": 0.007008979154739355, "learning_rate": 9.911206587898317e-06, "loss": 0.0, "step": 1677 }, { "epoch": 0.10813945994715474, "grad_norm": 0.31403968120823095, "learning_rate": 9.910490511994271e-06, "loss": 0.0008, "step": 1678 }, { "epoch": 0.10820390539408391, "grad_norm": 0.08434008796606712, "learning_rate": 9.909774436090226e-06, "loss": 0.0001, "step": 1679 }, { "epoch": 0.10826835084101308, "grad_norm": 0.21085719946650242, "learning_rate": 9.90905836018618e-06, "loss": 0.0007, "step": 1680 }, { "epoch": 0.10833279628794226, "grad_norm": 0.06126331219652856, "learning_rate": 9.908342284282134e-06, "loss": 0.0001, "step": 1681 }, { "epoch": 0.10839724173487143, "grad_norm": 0.02127553894163218, "learning_rate": 9.90762620837809e-06, "loss": 0.0, "step": 1682 }, { "epoch": 0.1084616871818006, "grad_norm": 0.0020103520188776516, "learning_rate": 9.906910132474043e-06, "loss": 0.0, "step": 1683 }, { "epoch": 0.10852613262872977, "grad_norm": 0.1730377089352632, "learning_rate": 9.906194056569997e-06, "loss": 0.0032, "step": 1684 }, { "epoch": 0.10859057807565896, "grad_norm": 0.40038176736105546, "learning_rate": 9.905477980665951e-06, "loss": 0.0048, "step": 1685 }, { "epoch": 0.10865502352258813, "grad_norm": 0.0349529717470364, "learning_rate": 9.904761904761906e-06, "loss": 0.0001, "step": 1686 }, { "epoch": 0.1087194689695173, "grad_norm": 0.12564037620330987, "learning_rate": 9.90404582885786e-06, "loss": 0.0002, "step": 1687 }, { "epoch": 0.10878391441644648, "grad_norm": 0.02573585559729339, "learning_rate": 9.903329752953814e-06, "loss": 0.0001, "step": 1688 }, { "epoch": 0.10884835986337565, "grad_norm": 0.0182613506637033, "learning_rate": 9.902613677049769e-06, "loss": 0.0001, "step": 1689 }, { "epoch": 0.10891280531030482, "grad_norm": 0.0033500290164589796, "learning_rate": 9.901897601145723e-06, "loss": 0.0, "step": 1690 }, { "epoch": 0.108977250757234, "grad_norm": 0.22283454081114984, "learning_rate": 9.901181525241677e-06, "loss": 0.0009, "step": 1691 }, { "epoch": 0.10904169620416318, "grad_norm": 0.0013649688327389462, "learning_rate": 9.900465449337631e-06, "loss": 0.0, "step": 1692 }, { "epoch": 0.10910614165109235, "grad_norm": 0.0007646449219787592, "learning_rate": 9.899749373433584e-06, "loss": 0.0, "step": 1693 }, { "epoch": 0.10917058709802152, "grad_norm": 0.009881301935125679, "learning_rate": 9.899033297529538e-06, "loss": 0.0001, "step": 1694 }, { "epoch": 0.1092350325449507, "grad_norm": 0.22115118406281953, "learning_rate": 9.898317221625493e-06, "loss": 0.001, "step": 1695 }, { "epoch": 0.10929947799187988, "grad_norm": 0.015784157099274273, "learning_rate": 9.897601145721447e-06, "loss": 0.0001, "step": 1696 }, { "epoch": 0.10936392343880905, "grad_norm": 0.04523709863820392, "learning_rate": 9.896885069817401e-06, "loss": 0.0001, "step": 1697 }, { "epoch": 0.10942836888573822, "grad_norm": 0.028951350795599847, "learning_rate": 9.896168993913355e-06, "loss": 0.0, "step": 1698 }, { "epoch": 0.1094928143326674, "grad_norm": 0.00021010130849105123, "learning_rate": 9.89545291800931e-06, "loss": 0.0, "step": 1699 }, { "epoch": 0.10955725977959657, "grad_norm": 0.01118543396035685, "learning_rate": 9.894736842105264e-06, "loss": 0.0001, "step": 1700 }, { "epoch": 0.10962170522652574, "grad_norm": 0.00016900792184204182, "learning_rate": 9.894020766201218e-06, "loss": 0.0, "step": 1701 }, { "epoch": 0.10968615067345493, "grad_norm": 0.00286496145638672, "learning_rate": 9.893304690297173e-06, "loss": 0.0, "step": 1702 }, { "epoch": 0.1097505961203841, "grad_norm": 0.00041776218172744057, "learning_rate": 9.892588614393125e-06, "loss": 0.0, "step": 1703 }, { "epoch": 0.10981504156731327, "grad_norm": 0.008368331222943564, "learning_rate": 9.89187253848908e-06, "loss": 0.0001, "step": 1704 }, { "epoch": 0.10987948701424244, "grad_norm": 0.041839912630408586, "learning_rate": 9.891156462585036e-06, "loss": 0.0002, "step": 1705 }, { "epoch": 0.10994393246117162, "grad_norm": 0.024497527264033654, "learning_rate": 9.89044038668099e-06, "loss": 0.0001, "step": 1706 }, { "epoch": 0.1100083779081008, "grad_norm": 0.023590454305983673, "learning_rate": 9.889724310776944e-06, "loss": 0.0001, "step": 1707 }, { "epoch": 0.11007282335502996, "grad_norm": 0.012773182132556847, "learning_rate": 9.889008234872898e-06, "loss": 0.0, "step": 1708 }, { "epoch": 0.11013726880195915, "grad_norm": 0.004021249149665355, "learning_rate": 9.888292158968851e-06, "loss": 0.0, "step": 1709 }, { "epoch": 0.11020171424888832, "grad_norm": 0.02710948476336297, "learning_rate": 9.887576083064805e-06, "loss": 0.0003, "step": 1710 }, { "epoch": 0.11026615969581749, "grad_norm": 0.006102303146094259, "learning_rate": 9.88686000716076e-06, "loss": 0.0, "step": 1711 }, { "epoch": 0.11033060514274666, "grad_norm": 0.07333378468360141, "learning_rate": 9.886143931256714e-06, "loss": 0.0004, "step": 1712 }, { "epoch": 0.11039505058967584, "grad_norm": 0.03245848052794509, "learning_rate": 9.885427855352668e-06, "loss": 0.0, "step": 1713 }, { "epoch": 0.11045949603660501, "grad_norm": 0.06893218377444407, "learning_rate": 9.884711779448623e-06, "loss": 0.0001, "step": 1714 }, { "epoch": 0.11052394148353419, "grad_norm": 0.0598030877506716, "learning_rate": 9.883995703544577e-06, "loss": 0.0002, "step": 1715 }, { "epoch": 0.11058838693046336, "grad_norm": 0.0013980899631203523, "learning_rate": 9.883279627640531e-06, "loss": 0.0, "step": 1716 }, { "epoch": 0.11065283237739254, "grad_norm": 0.3934271253107235, "learning_rate": 9.882563551736485e-06, "loss": 0.0009, "step": 1717 }, { "epoch": 0.11071727782432171, "grad_norm": 0.13356755708162912, "learning_rate": 9.88184747583244e-06, "loss": 0.0004, "step": 1718 }, { "epoch": 0.11078172327125088, "grad_norm": 0.009251442707350574, "learning_rate": 9.881131399928392e-06, "loss": 0.0001, "step": 1719 }, { "epoch": 0.11084616871818007, "grad_norm": 0.09078265506655032, "learning_rate": 9.880415324024347e-06, "loss": 0.0002, "step": 1720 }, { "epoch": 0.11091061416510924, "grad_norm": 0.45446654387990904, "learning_rate": 9.879699248120301e-06, "loss": 0.0018, "step": 1721 }, { "epoch": 0.11097505961203841, "grad_norm": 0.03796955507606215, "learning_rate": 9.878983172216255e-06, "loss": 0.0002, "step": 1722 }, { "epoch": 0.11103950505896758, "grad_norm": 0.33390644346596626, "learning_rate": 9.87826709631221e-06, "loss": 0.0004, "step": 1723 }, { "epoch": 0.11110395050589676, "grad_norm": 0.0017985687644987878, "learning_rate": 9.877551020408164e-06, "loss": 0.0, "step": 1724 }, { "epoch": 0.11116839595282593, "grad_norm": 0.00012855284184480202, "learning_rate": 9.876834944504118e-06, "loss": 0.0, "step": 1725 }, { "epoch": 0.1112328413997551, "grad_norm": 0.002766077239669994, "learning_rate": 9.876118868600072e-06, "loss": 0.0, "step": 1726 }, { "epoch": 0.11129728684668429, "grad_norm": 0.007371952801886638, "learning_rate": 9.875402792696027e-06, "loss": 0.0, "step": 1727 }, { "epoch": 0.11136173229361346, "grad_norm": 0.004698404644119558, "learning_rate": 9.87468671679198e-06, "loss": 0.0, "step": 1728 }, { "epoch": 0.11142617774054263, "grad_norm": 0.003165305540191389, "learning_rate": 9.873970640887935e-06, "loss": 0.0, "step": 1729 }, { "epoch": 0.1114906231874718, "grad_norm": 0.002938261901882307, "learning_rate": 9.87325456498389e-06, "loss": 0.0, "step": 1730 }, { "epoch": 0.11155506863440098, "grad_norm": 0.004821551934840438, "learning_rate": 9.872538489079844e-06, "loss": 0.0001, "step": 1731 }, { "epoch": 0.11161951408133015, "grad_norm": 4.844431380060839, "learning_rate": 9.871822413175798e-06, "loss": 0.0238, "step": 1732 }, { "epoch": 0.11168395952825932, "grad_norm": 0.023224661434122617, "learning_rate": 9.871106337271753e-06, "loss": 0.0001, "step": 1733 }, { "epoch": 0.11174840497518851, "grad_norm": 0.31371399393588917, "learning_rate": 9.870390261367707e-06, "loss": 0.0055, "step": 1734 }, { "epoch": 0.11181285042211768, "grad_norm": 0.06859223690514762, "learning_rate": 9.86967418546366e-06, "loss": 0.0004, "step": 1735 }, { "epoch": 0.11187729586904685, "grad_norm": 0.00032449914922864464, "learning_rate": 9.868958109559614e-06, "loss": 0.0, "step": 1736 }, { "epoch": 0.11194174131597602, "grad_norm": 0.000419428479045938, "learning_rate": 9.868242033655568e-06, "loss": 0.0, "step": 1737 }, { "epoch": 0.1120061867629052, "grad_norm": 0.00641962832500581, "learning_rate": 9.867525957751522e-06, "loss": 0.0, "step": 1738 }, { "epoch": 0.11207063220983438, "grad_norm": 0.007188855193993879, "learning_rate": 9.866809881847477e-06, "loss": 0.0, "step": 1739 }, { "epoch": 0.11213507765676355, "grad_norm": 0.5678866540904066, "learning_rate": 9.866093805943431e-06, "loss": 0.0022, "step": 1740 }, { "epoch": 0.11219952310369273, "grad_norm": 0.05270199532192997, "learning_rate": 9.865377730039385e-06, "loss": 0.0002, "step": 1741 }, { "epoch": 0.1122639685506219, "grad_norm": 0.0034274067808784683, "learning_rate": 9.86466165413534e-06, "loss": 0.0, "step": 1742 }, { "epoch": 0.11232841399755107, "grad_norm": 0.0012332182509001428, "learning_rate": 9.863945578231294e-06, "loss": 0.0, "step": 1743 }, { "epoch": 0.11239285944448024, "grad_norm": 0.019587984236910893, "learning_rate": 9.863229502327246e-06, "loss": 0.0001, "step": 1744 }, { "epoch": 0.11245730489140943, "grad_norm": 0.0001058282062590549, "learning_rate": 9.8625134264232e-06, "loss": 0.0, "step": 1745 }, { "epoch": 0.1125217503383386, "grad_norm": 0.0019646040441125955, "learning_rate": 9.861797350519155e-06, "loss": 0.0, "step": 1746 }, { "epoch": 0.11258619578526777, "grad_norm": 0.0024159480603714364, "learning_rate": 9.86108127461511e-06, "loss": 0.0, "step": 1747 }, { "epoch": 0.11265064123219695, "grad_norm": 0.03507614114998864, "learning_rate": 9.860365198711064e-06, "loss": 0.0002, "step": 1748 }, { "epoch": 0.11271508667912612, "grad_norm": 0.02787627501469119, "learning_rate": 9.859649122807018e-06, "loss": 0.0001, "step": 1749 }, { "epoch": 0.11277953212605529, "grad_norm": 0.008317143690736094, "learning_rate": 9.858933046902972e-06, "loss": 0.0, "step": 1750 }, { "epoch": 0.11284397757298446, "grad_norm": 0.8252554064657068, "learning_rate": 9.858216970998927e-06, "loss": 0.0044, "step": 1751 }, { "epoch": 0.11290842301991365, "grad_norm": 1.160233260658864, "learning_rate": 9.857500895094881e-06, "loss": 0.0055, "step": 1752 }, { "epoch": 0.11297286846684282, "grad_norm": 0.007301981019052537, "learning_rate": 9.856784819190835e-06, "loss": 0.0, "step": 1753 }, { "epoch": 0.11303731391377199, "grad_norm": 0.0021348737129215627, "learning_rate": 9.85606874328679e-06, "loss": 0.0, "step": 1754 }, { "epoch": 0.11310175936070116, "grad_norm": 0.1850555238494288, "learning_rate": 9.855352667382744e-06, "loss": 0.0002, "step": 1755 }, { "epoch": 0.11316620480763034, "grad_norm": 0.003926293491443531, "learning_rate": 9.854636591478698e-06, "loss": 0.0, "step": 1756 }, { "epoch": 0.11323065025455951, "grad_norm": 0.03075867687050704, "learning_rate": 9.853920515574652e-06, "loss": 0.0, "step": 1757 }, { "epoch": 0.11329509570148869, "grad_norm": 0.0006125017611817344, "learning_rate": 9.853204439670607e-06, "loss": 0.0, "step": 1758 }, { "epoch": 0.11335954114841787, "grad_norm": 0.0019315106515052796, "learning_rate": 9.852488363766561e-06, "loss": 0.0, "step": 1759 }, { "epoch": 0.11342398659534704, "grad_norm": 0.14089677959341357, "learning_rate": 9.851772287862514e-06, "loss": 0.0002, "step": 1760 }, { "epoch": 0.11348843204227621, "grad_norm": 0.0015930161051359712, "learning_rate": 9.851056211958468e-06, "loss": 0.0, "step": 1761 }, { "epoch": 0.11355287748920538, "grad_norm": 0.2209100850050719, "learning_rate": 9.850340136054422e-06, "loss": 0.0021, "step": 1762 }, { "epoch": 0.11361732293613457, "grad_norm": 0.11348926830889935, "learning_rate": 9.849624060150376e-06, "loss": 0.0004, "step": 1763 }, { "epoch": 0.11368176838306374, "grad_norm": 0.05025630142028179, "learning_rate": 9.84890798424633e-06, "loss": 0.0002, "step": 1764 }, { "epoch": 0.1137462138299929, "grad_norm": 0.004523011762179798, "learning_rate": 9.848191908342285e-06, "loss": 0.0, "step": 1765 }, { "epoch": 0.11381065927692209, "grad_norm": 0.03315617945049665, "learning_rate": 9.84747583243824e-06, "loss": 0.0004, "step": 1766 }, { "epoch": 0.11387510472385126, "grad_norm": 0.004434986096414539, "learning_rate": 9.846759756534194e-06, "loss": 0.0, "step": 1767 }, { "epoch": 0.11393955017078043, "grad_norm": 0.23389844924721334, "learning_rate": 9.846043680630148e-06, "loss": 0.0008, "step": 1768 }, { "epoch": 0.1140039956177096, "grad_norm": 0.12071385320497696, "learning_rate": 9.845327604726102e-06, "loss": 0.0004, "step": 1769 }, { "epoch": 0.11406844106463879, "grad_norm": 0.027456958188735676, "learning_rate": 9.844611528822055e-06, "loss": 0.0, "step": 1770 }, { "epoch": 0.11413288651156796, "grad_norm": 0.2016661137400553, "learning_rate": 9.843895452918009e-06, "loss": 0.0026, "step": 1771 }, { "epoch": 0.11419733195849713, "grad_norm": 0.0006652434216453054, "learning_rate": 9.843179377013963e-06, "loss": 0.0, "step": 1772 }, { "epoch": 0.11426177740542631, "grad_norm": 0.06822878762253942, "learning_rate": 9.842463301109918e-06, "loss": 0.001, "step": 1773 }, { "epoch": 0.11432622285235548, "grad_norm": 0.19988441517336292, "learning_rate": 9.841747225205872e-06, "loss": 0.0003, "step": 1774 }, { "epoch": 0.11439066829928465, "grad_norm": 0.05110567110947775, "learning_rate": 9.841031149301828e-06, "loss": 0.0001, "step": 1775 }, { "epoch": 0.11445511374621382, "grad_norm": 0.007954576835518657, "learning_rate": 9.84031507339778e-06, "loss": 0.0, "step": 1776 }, { "epoch": 0.11451955919314301, "grad_norm": 0.0010392938517551102, "learning_rate": 9.839598997493735e-06, "loss": 0.0, "step": 1777 }, { "epoch": 0.11458400464007218, "grad_norm": 0.0056564445665283, "learning_rate": 9.83888292158969e-06, "loss": 0.0, "step": 1778 }, { "epoch": 0.11464845008700135, "grad_norm": 0.0014838747156959813, "learning_rate": 9.838166845685644e-06, "loss": 0.0, "step": 1779 }, { "epoch": 0.11471289553393053, "grad_norm": 0.0030324228042027165, "learning_rate": 9.837450769781598e-06, "loss": 0.0, "step": 1780 }, { "epoch": 0.1147773409808597, "grad_norm": 0.007153180921713235, "learning_rate": 9.836734693877552e-06, "loss": 0.0, "step": 1781 }, { "epoch": 0.11484178642778888, "grad_norm": 0.0014775236203942192, "learning_rate": 9.836018617973506e-06, "loss": 0.0, "step": 1782 }, { "epoch": 0.11490623187471805, "grad_norm": 0.042548194151211846, "learning_rate": 9.83530254206946e-06, "loss": 0.0001, "step": 1783 }, { "epoch": 0.11497067732164723, "grad_norm": 0.012568469437944967, "learning_rate": 9.834586466165415e-06, "loss": 0.0, "step": 1784 }, { "epoch": 0.1150351227685764, "grad_norm": 0.03218187318560303, "learning_rate": 9.83387039026137e-06, "loss": 0.0, "step": 1785 }, { "epoch": 0.11509956821550557, "grad_norm": 0.007692717030373828, "learning_rate": 9.833154314357322e-06, "loss": 0.0001, "step": 1786 }, { "epoch": 0.11516401366243476, "grad_norm": 0.012189991502221508, "learning_rate": 9.832438238453276e-06, "loss": 0.0, "step": 1787 }, { "epoch": 0.11522845910936393, "grad_norm": 0.04904071543282355, "learning_rate": 9.83172216254923e-06, "loss": 0.0022, "step": 1788 }, { "epoch": 0.1152929045562931, "grad_norm": 0.7873672933629802, "learning_rate": 9.831006086645185e-06, "loss": 0.0021, "step": 1789 }, { "epoch": 0.11535735000322227, "grad_norm": 0.03984673321766176, "learning_rate": 9.830290010741139e-06, "loss": 0.0002, "step": 1790 }, { "epoch": 0.11542179545015145, "grad_norm": 0.04043361873909708, "learning_rate": 9.829573934837093e-06, "loss": 0.0001, "step": 1791 }, { "epoch": 0.11548624089708062, "grad_norm": 0.00460612867531268, "learning_rate": 9.828857858933048e-06, "loss": 0.0, "step": 1792 }, { "epoch": 0.11555068634400979, "grad_norm": 0.006081790106934461, "learning_rate": 9.828141783029002e-06, "loss": 0.0, "step": 1793 }, { "epoch": 0.11561513179093898, "grad_norm": 0.06486523973034726, "learning_rate": 9.827425707124956e-06, "loss": 0.0001, "step": 1794 }, { "epoch": 0.11567957723786815, "grad_norm": 3.2129624564538615, "learning_rate": 9.82670963122091e-06, "loss": 0.03, "step": 1795 }, { "epoch": 0.11574402268479732, "grad_norm": 0.0007317990986965962, "learning_rate": 9.825993555316863e-06, "loss": 0.0, "step": 1796 }, { "epoch": 0.11580846813172649, "grad_norm": 0.007586954091167557, "learning_rate": 9.825277479412818e-06, "loss": 0.0, "step": 1797 }, { "epoch": 0.11587291357865567, "grad_norm": 0.004224269569500741, "learning_rate": 9.824561403508772e-06, "loss": 0.0, "step": 1798 }, { "epoch": 0.11593735902558484, "grad_norm": 0.0027508155825239165, "learning_rate": 9.823845327604728e-06, "loss": 0.0, "step": 1799 }, { "epoch": 0.11600180447251401, "grad_norm": 0.0005093112787406999, "learning_rate": 9.823129251700682e-06, "loss": 0.0, "step": 1800 }, { "epoch": 0.11606624991944318, "grad_norm": 0.2830816863572351, "learning_rate": 9.822413175796636e-06, "loss": 0.0006, "step": 1801 }, { "epoch": 0.11613069536637237, "grad_norm": 0.008831027053601843, "learning_rate": 9.821697099892589e-06, "loss": 0.0001, "step": 1802 }, { "epoch": 0.11619514081330154, "grad_norm": 0.041514996841974086, "learning_rate": 9.820981023988543e-06, "loss": 0.0003, "step": 1803 }, { "epoch": 0.11625958626023071, "grad_norm": 0.0021810375241418326, "learning_rate": 9.820264948084498e-06, "loss": 0.0, "step": 1804 }, { "epoch": 0.1163240317071599, "grad_norm": 0.004574236003985102, "learning_rate": 9.819548872180452e-06, "loss": 0.0, "step": 1805 }, { "epoch": 0.11638847715408907, "grad_norm": 0.001797843287860481, "learning_rate": 9.818832796276406e-06, "loss": 0.0, "step": 1806 }, { "epoch": 0.11645292260101824, "grad_norm": 0.03992228954057036, "learning_rate": 9.81811672037236e-06, "loss": 0.0003, "step": 1807 }, { "epoch": 0.1165173680479474, "grad_norm": 2.652800530496413, "learning_rate": 9.817400644468315e-06, "loss": 0.0095, "step": 1808 }, { "epoch": 0.11658181349487659, "grad_norm": 0.005735454108690199, "learning_rate": 9.816684568564269e-06, "loss": 0.0, "step": 1809 }, { "epoch": 0.11664625894180576, "grad_norm": 0.46595162514166677, "learning_rate": 9.815968492660223e-06, "loss": 0.0022, "step": 1810 }, { "epoch": 0.11671070438873493, "grad_norm": 0.008367182582977108, "learning_rate": 9.815252416756178e-06, "loss": 0.0001, "step": 1811 }, { "epoch": 0.11677514983566412, "grad_norm": 0.010296350222608969, "learning_rate": 9.81453634085213e-06, "loss": 0.0001, "step": 1812 }, { "epoch": 0.11683959528259329, "grad_norm": 0.002394567203813334, "learning_rate": 9.813820264948085e-06, "loss": 0.0, "step": 1813 }, { "epoch": 0.11690404072952246, "grad_norm": 0.013784040528215559, "learning_rate": 9.813104189044039e-06, "loss": 0.0, "step": 1814 }, { "epoch": 0.11696848617645163, "grad_norm": 0.003288869045197393, "learning_rate": 9.812388113139993e-06, "loss": 0.0, "step": 1815 }, { "epoch": 0.11703293162338081, "grad_norm": 0.003724212986621817, "learning_rate": 9.811672037235947e-06, "loss": 0.0, "step": 1816 }, { "epoch": 0.11709737707030998, "grad_norm": 0.0013027106074673877, "learning_rate": 9.810955961331902e-06, "loss": 0.0, "step": 1817 }, { "epoch": 0.11716182251723915, "grad_norm": 0.5599757351897601, "learning_rate": 9.810239885427856e-06, "loss": 0.0036, "step": 1818 }, { "epoch": 0.11722626796416834, "grad_norm": 0.012470146755273252, "learning_rate": 9.80952380952381e-06, "loss": 0.0, "step": 1819 }, { "epoch": 0.11729071341109751, "grad_norm": 0.31943589725439364, "learning_rate": 9.808807733619765e-06, "loss": 0.0017, "step": 1820 }, { "epoch": 0.11735515885802668, "grad_norm": 0.1360801427960747, "learning_rate": 9.808091657715717e-06, "loss": 0.0003, "step": 1821 }, { "epoch": 0.11741960430495585, "grad_norm": 1.11044941842528, "learning_rate": 9.807375581811673e-06, "loss": 0.0037, "step": 1822 }, { "epoch": 0.11748404975188503, "grad_norm": 0.046221214479398, "learning_rate": 9.806659505907628e-06, "loss": 0.0001, "step": 1823 }, { "epoch": 0.1175484951988142, "grad_norm": 0.014703095802911305, "learning_rate": 9.805943430003582e-06, "loss": 0.0001, "step": 1824 }, { "epoch": 0.11761294064574337, "grad_norm": 0.09168547607566063, "learning_rate": 9.805227354099536e-06, "loss": 0.0002, "step": 1825 }, { "epoch": 0.11767738609267256, "grad_norm": 0.04476822615850487, "learning_rate": 9.80451127819549e-06, "loss": 0.0002, "step": 1826 }, { "epoch": 0.11774183153960173, "grad_norm": 0.23735031819500035, "learning_rate": 9.803795202291445e-06, "loss": 0.0001, "step": 1827 }, { "epoch": 0.1178062769865309, "grad_norm": 0.019883241428665027, "learning_rate": 9.803079126387397e-06, "loss": 0.0, "step": 1828 }, { "epoch": 0.11787072243346007, "grad_norm": 0.021027037639753155, "learning_rate": 9.802363050483352e-06, "loss": 0.0002, "step": 1829 }, { "epoch": 0.11793516788038926, "grad_norm": 0.10222835241112932, "learning_rate": 9.801646974579306e-06, "loss": 0.0009, "step": 1830 }, { "epoch": 0.11799961332731843, "grad_norm": 0.019721556166337365, "learning_rate": 9.80093089867526e-06, "loss": 0.0001, "step": 1831 }, { "epoch": 0.1180640587742476, "grad_norm": 0.011796718616704312, "learning_rate": 9.800214822771215e-06, "loss": 0.0001, "step": 1832 }, { "epoch": 0.11812850422117678, "grad_norm": 1.1332119467653918, "learning_rate": 9.799498746867169e-06, "loss": 0.0074, "step": 1833 }, { "epoch": 0.11819294966810595, "grad_norm": 0.04183516698015525, "learning_rate": 9.798782670963123e-06, "loss": 0.0001, "step": 1834 }, { "epoch": 0.11825739511503512, "grad_norm": 0.020117152431605615, "learning_rate": 9.798066595059077e-06, "loss": 0.0, "step": 1835 }, { "epoch": 0.11832184056196429, "grad_norm": 0.005855164840714329, "learning_rate": 9.797350519155032e-06, "loss": 0.0, "step": 1836 }, { "epoch": 0.11838628600889348, "grad_norm": 1.0156371482709448, "learning_rate": 9.796634443250986e-06, "loss": 0.0036, "step": 1837 }, { "epoch": 0.11845073145582265, "grad_norm": 0.14426022971488647, "learning_rate": 9.795918367346939e-06, "loss": 0.0004, "step": 1838 }, { "epoch": 0.11851517690275182, "grad_norm": 0.059642671585571926, "learning_rate": 9.795202291442893e-06, "loss": 0.0002, "step": 1839 }, { "epoch": 0.11857962234968099, "grad_norm": 0.01622077639010314, "learning_rate": 9.794486215538847e-06, "loss": 0.0, "step": 1840 }, { "epoch": 0.11864406779661017, "grad_norm": 0.00355795310393443, "learning_rate": 9.793770139634802e-06, "loss": 0.0, "step": 1841 }, { "epoch": 0.11870851324353934, "grad_norm": 0.22162360267499878, "learning_rate": 9.793054063730756e-06, "loss": 0.0016, "step": 1842 }, { "epoch": 0.11877295869046851, "grad_norm": 0.03565730231915416, "learning_rate": 9.79233798782671e-06, "loss": 0.0, "step": 1843 }, { "epoch": 0.1188374041373977, "grad_norm": 0.04782365939084201, "learning_rate": 9.791621911922664e-06, "loss": 0.0, "step": 1844 }, { "epoch": 0.11890184958432687, "grad_norm": 0.007612927308443903, "learning_rate": 9.790905836018619e-06, "loss": 0.0, "step": 1845 }, { "epoch": 0.11896629503125604, "grad_norm": 0.010512277541804959, "learning_rate": 9.790189760114573e-06, "loss": 0.0001, "step": 1846 }, { "epoch": 0.11903074047818521, "grad_norm": 0.01802613225231296, "learning_rate": 9.789473684210527e-06, "loss": 0.0001, "step": 1847 }, { "epoch": 0.1190951859251144, "grad_norm": 0.002060216204765636, "learning_rate": 9.788757608306482e-06, "loss": 0.0, "step": 1848 }, { "epoch": 0.11915963137204356, "grad_norm": 0.01830879044055786, "learning_rate": 9.788041532402436e-06, "loss": 0.0001, "step": 1849 }, { "epoch": 0.11922407681897274, "grad_norm": 0.1223366844699642, "learning_rate": 9.78732545649839e-06, "loss": 0.002, "step": 1850 }, { "epoch": 0.11928852226590192, "grad_norm": 0.2997939887607467, "learning_rate": 9.786609380594345e-06, "loss": 0.0004, "step": 1851 }, { "epoch": 0.11935296771283109, "grad_norm": 0.10824277371729986, "learning_rate": 9.785893304690299e-06, "loss": 0.0003, "step": 1852 }, { "epoch": 0.11941741315976026, "grad_norm": 0.0021208647960036446, "learning_rate": 9.785177228786253e-06, "loss": 0.0, "step": 1853 }, { "epoch": 0.11948185860668943, "grad_norm": 0.29415234850131244, "learning_rate": 9.784461152882206e-06, "loss": 0.0007, "step": 1854 }, { "epoch": 0.11954630405361862, "grad_norm": 0.9598414365134162, "learning_rate": 9.78374507697816e-06, "loss": 0.0098, "step": 1855 }, { "epoch": 0.11961074950054779, "grad_norm": 0.0016752527904032157, "learning_rate": 9.783029001074114e-06, "loss": 0.0, "step": 1856 }, { "epoch": 0.11967519494747696, "grad_norm": 0.0483249174057824, "learning_rate": 9.782312925170069e-06, "loss": 0.0001, "step": 1857 }, { "epoch": 0.11973964039440614, "grad_norm": 0.03142188899828553, "learning_rate": 9.781596849266023e-06, "loss": 0.0003, "step": 1858 }, { "epoch": 0.11980408584133531, "grad_norm": 0.2874638451772468, "learning_rate": 9.780880773361977e-06, "loss": 0.0006, "step": 1859 }, { "epoch": 0.11986853128826448, "grad_norm": 0.005053723657460722, "learning_rate": 9.780164697457932e-06, "loss": 0.0, "step": 1860 }, { "epoch": 0.11993297673519365, "grad_norm": 0.02870584304045045, "learning_rate": 9.779448621553886e-06, "loss": 0.0001, "step": 1861 }, { "epoch": 0.11999742218212284, "grad_norm": 0.2748382656294078, "learning_rate": 9.77873254564984e-06, "loss": 0.0004, "step": 1862 }, { "epoch": 0.12006186762905201, "grad_norm": 0.0008884232880952846, "learning_rate": 9.778016469745793e-06, "loss": 0.0, "step": 1863 }, { "epoch": 0.12012631307598118, "grad_norm": 0.17966616545364283, "learning_rate": 9.777300393841747e-06, "loss": 0.0022, "step": 1864 }, { "epoch": 0.12019075852291036, "grad_norm": 0.004776275188142645, "learning_rate": 9.776584317937701e-06, "loss": 0.0, "step": 1865 }, { "epoch": 0.12025520396983953, "grad_norm": 0.006079849315427602, "learning_rate": 9.775868242033656e-06, "loss": 0.0, "step": 1866 }, { "epoch": 0.1203196494167687, "grad_norm": 0.2494943765783425, "learning_rate": 9.77515216612961e-06, "loss": 0.0005, "step": 1867 }, { "epoch": 0.12038409486369787, "grad_norm": 0.011233876711569475, "learning_rate": 9.774436090225564e-06, "loss": 0.0001, "step": 1868 }, { "epoch": 0.12044854031062706, "grad_norm": 0.22918849086157178, "learning_rate": 9.77372001432152e-06, "loss": 0.0007, "step": 1869 }, { "epoch": 0.12051298575755623, "grad_norm": 0.003232146333357443, "learning_rate": 9.773003938417473e-06, "loss": 0.0, "step": 1870 }, { "epoch": 0.1205774312044854, "grad_norm": 0.09946201213585422, "learning_rate": 9.772287862513427e-06, "loss": 0.0011, "step": 1871 }, { "epoch": 0.12064187665141458, "grad_norm": 0.02818442187642415, "learning_rate": 9.771571786609381e-06, "loss": 0.0002, "step": 1872 }, { "epoch": 0.12070632209834375, "grad_norm": 0.010201301486194175, "learning_rate": 9.770855710705336e-06, "loss": 0.0, "step": 1873 }, { "epoch": 0.12077076754527293, "grad_norm": 0.6826322714867648, "learning_rate": 9.77013963480129e-06, "loss": 0.0058, "step": 1874 }, { "epoch": 0.1208352129922021, "grad_norm": 7.93762296335806e-05, "learning_rate": 9.769423558897244e-06, "loss": 0.0, "step": 1875 }, { "epoch": 0.12089965843913128, "grad_norm": 0.03652186301096464, "learning_rate": 9.768707482993199e-06, "loss": 0.0004, "step": 1876 }, { "epoch": 0.12096410388606045, "grad_norm": 0.06363982698805158, "learning_rate": 9.767991407089153e-06, "loss": 0.0002, "step": 1877 }, { "epoch": 0.12102854933298962, "grad_norm": 0.0020954344021100178, "learning_rate": 9.767275331185107e-06, "loss": 0.0, "step": 1878 }, { "epoch": 0.1210929947799188, "grad_norm": 0.17501861119930448, "learning_rate": 9.76655925528106e-06, "loss": 0.0009, "step": 1879 }, { "epoch": 0.12115744022684798, "grad_norm": 0.004144092077334751, "learning_rate": 9.765843179377014e-06, "loss": 0.0, "step": 1880 }, { "epoch": 0.12122188567377715, "grad_norm": 0.0020211306433941265, "learning_rate": 9.765127103472968e-06, "loss": 0.0, "step": 1881 }, { "epoch": 0.12128633112070632, "grad_norm": 0.02949696401101104, "learning_rate": 9.764411027568923e-06, "loss": 0.0002, "step": 1882 }, { "epoch": 0.1213507765676355, "grad_norm": 0.016619784508678393, "learning_rate": 9.763694951664877e-06, "loss": 0.0001, "step": 1883 }, { "epoch": 0.12141522201456467, "grad_norm": 0.3003303690624513, "learning_rate": 9.762978875760831e-06, "loss": 0.0015, "step": 1884 }, { "epoch": 0.12147966746149384, "grad_norm": 0.0013416382681100284, "learning_rate": 9.762262799856786e-06, "loss": 0.0, "step": 1885 }, { "epoch": 0.12154411290842301, "grad_norm": 0.00362328786595532, "learning_rate": 9.76154672395274e-06, "loss": 0.0, "step": 1886 }, { "epoch": 0.1216085583553522, "grad_norm": 0.20061469461587267, "learning_rate": 9.760830648048694e-06, "loss": 0.0008, "step": 1887 }, { "epoch": 0.12167300380228137, "grad_norm": 0.1765857512431687, "learning_rate": 9.760114572144649e-06, "loss": 0.0007, "step": 1888 }, { "epoch": 0.12173744924921054, "grad_norm": 0.009202730838186858, "learning_rate": 9.759398496240601e-06, "loss": 0.0001, "step": 1889 }, { "epoch": 0.12180189469613972, "grad_norm": 0.024221015879938333, "learning_rate": 9.758682420336555e-06, "loss": 0.0001, "step": 1890 }, { "epoch": 0.1218663401430689, "grad_norm": 0.0032962728768922566, "learning_rate": 9.75796634443251e-06, "loss": 0.0, "step": 1891 }, { "epoch": 0.12193078558999806, "grad_norm": 0.0017656631596894676, "learning_rate": 9.757250268528466e-06, "loss": 0.0, "step": 1892 }, { "epoch": 0.12199523103692723, "grad_norm": 0.29931956751586863, "learning_rate": 9.75653419262442e-06, "loss": 0.001, "step": 1893 }, { "epoch": 0.12205967648385642, "grad_norm": 0.005788445500950225, "learning_rate": 9.755818116720374e-06, "loss": 0.0, "step": 1894 }, { "epoch": 0.12212412193078559, "grad_norm": 0.0004621153606314104, "learning_rate": 9.755102040816327e-06, "loss": 0.0, "step": 1895 }, { "epoch": 0.12218856737771476, "grad_norm": 0.0032908027163354404, "learning_rate": 9.754385964912281e-06, "loss": 0.0, "step": 1896 }, { "epoch": 0.12225301282464394, "grad_norm": 0.011861358115373295, "learning_rate": 9.753669889008236e-06, "loss": 0.0, "step": 1897 }, { "epoch": 0.12231745827157312, "grad_norm": 0.28728746872163846, "learning_rate": 9.75295381310419e-06, "loss": 0.001, "step": 1898 }, { "epoch": 0.12238190371850229, "grad_norm": 0.05119925405332734, "learning_rate": 9.752237737200144e-06, "loss": 0.0001, "step": 1899 }, { "epoch": 0.12244634916543146, "grad_norm": 0.027689745751122395, "learning_rate": 9.751521661296098e-06, "loss": 0.0001, "step": 1900 }, { "epoch": 0.12251079461236064, "grad_norm": 0.013967656639211944, "learning_rate": 9.750805585392053e-06, "loss": 0.0001, "step": 1901 }, { "epoch": 0.12257524005928981, "grad_norm": 0.006609898455584625, "learning_rate": 9.750089509488007e-06, "loss": 0.0, "step": 1902 }, { "epoch": 0.12263968550621898, "grad_norm": 0.0014454099959909776, "learning_rate": 9.749373433583961e-06, "loss": 0.0, "step": 1903 }, { "epoch": 0.12270413095314817, "grad_norm": 0.00026203836987910003, "learning_rate": 9.748657357679916e-06, "loss": 0.0, "step": 1904 }, { "epoch": 0.12276857640007734, "grad_norm": 0.00271610028264705, "learning_rate": 9.747941281775868e-06, "loss": 0.0, "step": 1905 }, { "epoch": 0.12283302184700651, "grad_norm": 0.0016812886013362202, "learning_rate": 9.747225205871823e-06, "loss": 0.0, "step": 1906 }, { "epoch": 0.12289746729393568, "grad_norm": 0.061560284842779436, "learning_rate": 9.746509129967777e-06, "loss": 0.0002, "step": 1907 }, { "epoch": 0.12296191274086486, "grad_norm": 0.003250485974073988, "learning_rate": 9.745793054063731e-06, "loss": 0.0, "step": 1908 }, { "epoch": 0.12302635818779403, "grad_norm": 0.0006700169758031093, "learning_rate": 9.745076978159685e-06, "loss": 0.0, "step": 1909 }, { "epoch": 0.1230908036347232, "grad_norm": 0.005113968592353677, "learning_rate": 9.74436090225564e-06, "loss": 0.0001, "step": 1910 }, { "epoch": 0.12315524908165239, "grad_norm": 0.41192248477347315, "learning_rate": 9.743644826351594e-06, "loss": 0.0015, "step": 1911 }, { "epoch": 0.12321969452858156, "grad_norm": 0.0045803669893567924, "learning_rate": 9.742928750447548e-06, "loss": 0.0, "step": 1912 }, { "epoch": 0.12328413997551073, "grad_norm": 0.17900576010418873, "learning_rate": 9.742212674543503e-06, "loss": 0.0022, "step": 1913 }, { "epoch": 0.1233485854224399, "grad_norm": 0.04272674418974187, "learning_rate": 9.741496598639457e-06, "loss": 0.0001, "step": 1914 }, { "epoch": 0.12341303086936908, "grad_norm": 0.021601893328035547, "learning_rate": 9.740780522735411e-06, "loss": 0.0, "step": 1915 }, { "epoch": 0.12347747631629825, "grad_norm": 0.13924543879510617, "learning_rate": 9.740064446831366e-06, "loss": 0.001, "step": 1916 }, { "epoch": 0.12354192176322742, "grad_norm": 0.028301274749120395, "learning_rate": 9.73934837092732e-06, "loss": 0.0001, "step": 1917 }, { "epoch": 0.12360636721015661, "grad_norm": 0.009873854772588045, "learning_rate": 9.738632295023274e-06, "loss": 0.0001, "step": 1918 }, { "epoch": 0.12367081265708578, "grad_norm": 0.16220766557710695, "learning_rate": 9.737916219119228e-06, "loss": 0.0007, "step": 1919 }, { "epoch": 0.12373525810401495, "grad_norm": 0.022465447563904575, "learning_rate": 9.737200143215183e-06, "loss": 0.0001, "step": 1920 }, { "epoch": 0.12379970355094412, "grad_norm": 0.009806271616587365, "learning_rate": 9.736484067311135e-06, "loss": 0.0, "step": 1921 }, { "epoch": 0.1238641489978733, "grad_norm": 0.05567401244931437, "learning_rate": 9.73576799140709e-06, "loss": 0.0002, "step": 1922 }, { "epoch": 0.12392859444480248, "grad_norm": 0.052939701804537706, "learning_rate": 9.735051915503044e-06, "loss": 0.0002, "step": 1923 }, { "epoch": 0.12399303989173165, "grad_norm": 0.001525405124781685, "learning_rate": 9.734335839598998e-06, "loss": 0.0, "step": 1924 }, { "epoch": 0.12405748533866082, "grad_norm": 0.37714290288877417, "learning_rate": 9.733619763694953e-06, "loss": 0.0011, "step": 1925 }, { "epoch": 0.12412193078559, "grad_norm": 0.001764385053678129, "learning_rate": 9.732903687790907e-06, "loss": 0.0, "step": 1926 }, { "epoch": 0.12418637623251917, "grad_norm": 0.019501183135983383, "learning_rate": 9.732187611886861e-06, "loss": 0.0002, "step": 1927 }, { "epoch": 0.12425082167944834, "grad_norm": 0.018751990192806996, "learning_rate": 9.731471535982815e-06, "loss": 0.0001, "step": 1928 }, { "epoch": 0.12431526712637753, "grad_norm": 0.004955295051767424, "learning_rate": 9.73075546007877e-06, "loss": 0.0, "step": 1929 }, { "epoch": 0.1243797125733067, "grad_norm": 0.061112311234522064, "learning_rate": 9.730039384174724e-06, "loss": 0.0002, "step": 1930 }, { "epoch": 0.12444415802023587, "grad_norm": 0.004004600983625894, "learning_rate": 9.729323308270677e-06, "loss": 0.0, "step": 1931 }, { "epoch": 0.12450860346716504, "grad_norm": 0.0003309675091962049, "learning_rate": 9.728607232366631e-06, "loss": 0.0, "step": 1932 }, { "epoch": 0.12457304891409422, "grad_norm": 0.15170736542210217, "learning_rate": 9.727891156462585e-06, "loss": 0.002, "step": 1933 }, { "epoch": 0.1246374943610234, "grad_norm": 1.4362513882968126, "learning_rate": 9.72717508055854e-06, "loss": 0.0162, "step": 1934 }, { "epoch": 0.12470193980795256, "grad_norm": 0.014300368475316579, "learning_rate": 9.726459004654494e-06, "loss": 0.0001, "step": 1935 }, { "epoch": 0.12476638525488175, "grad_norm": 0.004234241794280436, "learning_rate": 9.725742928750448e-06, "loss": 0.0001, "step": 1936 }, { "epoch": 0.12483083070181092, "grad_norm": 0.015785880095632743, "learning_rate": 9.725026852846402e-06, "loss": 0.0, "step": 1937 }, { "epoch": 0.12489527614874009, "grad_norm": 0.16290879479847703, "learning_rate": 9.724310776942357e-06, "loss": 0.0019, "step": 1938 }, { "epoch": 0.12495972159566926, "grad_norm": 0.0871295187201113, "learning_rate": 9.723594701038311e-06, "loss": 0.0017, "step": 1939 }, { "epoch": 0.12502416704259844, "grad_norm": 0.04819015484545837, "learning_rate": 9.722878625134265e-06, "loss": 0.0001, "step": 1940 }, { "epoch": 0.12508861248952763, "grad_norm": 0.043515775119977985, "learning_rate": 9.72216254923022e-06, "loss": 0.0006, "step": 1941 }, { "epoch": 0.12515305793645679, "grad_norm": 0.019120648631456776, "learning_rate": 9.721446473326174e-06, "loss": 0.0002, "step": 1942 }, { "epoch": 0.12521750338338597, "grad_norm": 0.06639534145403339, "learning_rate": 9.720730397422128e-06, "loss": 0.0002, "step": 1943 }, { "epoch": 0.12528194883031513, "grad_norm": 0.32793971915972786, "learning_rate": 9.720014321518082e-06, "loss": 0.0003, "step": 1944 }, { "epoch": 0.1253463942772443, "grad_norm": 0.23585167454332714, "learning_rate": 9.719298245614037e-06, "loss": 0.0004, "step": 1945 }, { "epoch": 0.1254108397241735, "grad_norm": 0.08747161219538542, "learning_rate": 9.718582169709991e-06, "loss": 0.0003, "step": 1946 }, { "epoch": 0.12547528517110265, "grad_norm": 0.08837184298199861, "learning_rate": 9.717866093805944e-06, "loss": 0.0001, "step": 1947 }, { "epoch": 0.12553973061803184, "grad_norm": 0.048392791842107145, "learning_rate": 9.717150017901898e-06, "loss": 0.0001, "step": 1948 }, { "epoch": 0.12560417606496102, "grad_norm": 0.9592803684743428, "learning_rate": 9.716433941997852e-06, "loss": 0.0082, "step": 1949 }, { "epoch": 0.12566862151189018, "grad_norm": 0.2817725519397007, "learning_rate": 9.715717866093807e-06, "loss": 0.0003, "step": 1950 }, { "epoch": 0.12573306695881936, "grad_norm": 0.012663876833443062, "learning_rate": 9.715001790189761e-06, "loss": 0.0, "step": 1951 }, { "epoch": 0.12579751240574855, "grad_norm": 0.05103851906930751, "learning_rate": 9.714285714285715e-06, "loss": 0.0016, "step": 1952 }, { "epoch": 0.1258619578526777, "grad_norm": 0.07201781829296526, "learning_rate": 9.71356963838167e-06, "loss": 0.0003, "step": 1953 }, { "epoch": 0.1259264032996069, "grad_norm": 0.0009689525530489202, "learning_rate": 9.712853562477624e-06, "loss": 0.0, "step": 1954 }, { "epoch": 0.12599084874653604, "grad_norm": 0.0145900601196395, "learning_rate": 9.712137486573578e-06, "loss": 0.0, "step": 1955 }, { "epoch": 0.12605529419346523, "grad_norm": 0.14103773266063874, "learning_rate": 9.71142141066953e-06, "loss": 0.0005, "step": 1956 }, { "epoch": 0.1261197396403944, "grad_norm": 0.007242582761787885, "learning_rate": 9.710705334765485e-06, "loss": 0.0, "step": 1957 }, { "epoch": 0.12618418508732357, "grad_norm": 2.3089804801462774, "learning_rate": 9.70998925886144e-06, "loss": 0.018, "step": 1958 }, { "epoch": 0.12624863053425275, "grad_norm": 0.001684282938530845, "learning_rate": 9.709273182957394e-06, "loss": 0.0, "step": 1959 }, { "epoch": 0.12631307598118194, "grad_norm": 0.008110944387793166, "learning_rate": 9.708557107053348e-06, "loss": 0.0, "step": 1960 }, { "epoch": 0.1263775214281111, "grad_norm": 0.0012180645839539397, "learning_rate": 9.707841031149302e-06, "loss": 0.0, "step": 1961 }, { "epoch": 0.12644196687504028, "grad_norm": 0.0032079103488066195, "learning_rate": 9.707124955245258e-06, "loss": 0.0, "step": 1962 }, { "epoch": 0.12650641232196946, "grad_norm": 0.0015223787814444203, "learning_rate": 9.70640887934121e-06, "loss": 0.0, "step": 1963 }, { "epoch": 0.12657085776889862, "grad_norm": 0.002616986152407543, "learning_rate": 9.705692803437165e-06, "loss": 0.0, "step": 1964 }, { "epoch": 0.1266353032158278, "grad_norm": 0.45123696786549056, "learning_rate": 9.70497672753312e-06, "loss": 0.0022, "step": 1965 }, { "epoch": 0.126699748662757, "grad_norm": 0.5187885166714238, "learning_rate": 9.704260651629074e-06, "loss": 0.0025, "step": 1966 }, { "epoch": 0.12676419410968615, "grad_norm": 0.002532257006504168, "learning_rate": 9.703544575725028e-06, "loss": 0.0, "step": 1967 }, { "epoch": 0.12682863955661533, "grad_norm": 0.0024266808451328583, "learning_rate": 9.702828499820982e-06, "loss": 0.0, "step": 1968 }, { "epoch": 0.1268930850035445, "grad_norm": 0.000678778908986063, "learning_rate": 9.702112423916937e-06, "loss": 0.0, "step": 1969 }, { "epoch": 0.12695753045047367, "grad_norm": 0.2541039650735298, "learning_rate": 9.701396348012891e-06, "loss": 0.002, "step": 1970 }, { "epoch": 0.12702197589740286, "grad_norm": 0.05886083764835366, "learning_rate": 9.700680272108845e-06, "loss": 0.0005, "step": 1971 }, { "epoch": 0.127086421344332, "grad_norm": 0.01571323952426794, "learning_rate": 9.699964196204798e-06, "loss": 0.0001, "step": 1972 }, { "epoch": 0.1271508667912612, "grad_norm": 0.003961847090334322, "learning_rate": 9.699248120300752e-06, "loss": 0.0, "step": 1973 }, { "epoch": 0.12721531223819038, "grad_norm": 0.027488595208986197, "learning_rate": 9.698532044396706e-06, "loss": 0.0, "step": 1974 }, { "epoch": 0.12727975768511954, "grad_norm": 0.011537951536441094, "learning_rate": 9.69781596849266e-06, "loss": 0.0, "step": 1975 }, { "epoch": 0.12734420313204872, "grad_norm": 0.06099060148991771, "learning_rate": 9.697099892588615e-06, "loss": 0.0001, "step": 1976 }, { "epoch": 0.1274086485789779, "grad_norm": 0.0044298324954258495, "learning_rate": 9.69638381668457e-06, "loss": 0.0, "step": 1977 }, { "epoch": 0.12747309402590706, "grad_norm": 0.03492540826460728, "learning_rate": 9.695667740780524e-06, "loss": 0.0001, "step": 1978 }, { "epoch": 0.12753753947283625, "grad_norm": 0.01319119997833318, "learning_rate": 9.694951664876478e-06, "loss": 0.0, "step": 1979 }, { "epoch": 0.12760198491976543, "grad_norm": 0.007561792083493985, "learning_rate": 9.694235588972432e-06, "loss": 0.0, "step": 1980 }, { "epoch": 0.1276664303666946, "grad_norm": 0.04029629379038578, "learning_rate": 9.693519513068386e-06, "loss": 0.0001, "step": 1981 }, { "epoch": 0.12773087581362377, "grad_norm": 0.0435667968924196, "learning_rate": 9.692803437164339e-06, "loss": 0.0002, "step": 1982 }, { "epoch": 0.12779532126055293, "grad_norm": 0.010267197277295997, "learning_rate": 9.692087361260293e-06, "loss": 0.0, "step": 1983 }, { "epoch": 0.12785976670748211, "grad_norm": 0.11801684485489015, "learning_rate": 9.691371285356248e-06, "loss": 0.0023, "step": 1984 }, { "epoch": 0.1279242121544113, "grad_norm": 0.02908363021812742, "learning_rate": 9.690655209452202e-06, "loss": 0.0001, "step": 1985 }, { "epoch": 0.12798865760134046, "grad_norm": 0.0020590699766240743, "learning_rate": 9.689939133548158e-06, "loss": 0.0, "step": 1986 }, { "epoch": 0.12805310304826964, "grad_norm": 0.18389281811274666, "learning_rate": 9.689223057644112e-06, "loss": 0.0002, "step": 1987 }, { "epoch": 0.12811754849519882, "grad_norm": 0.09317822238790244, "learning_rate": 9.688506981740065e-06, "loss": 0.002, "step": 1988 }, { "epoch": 0.12818199394212798, "grad_norm": 0.2188158447077549, "learning_rate": 9.687790905836019e-06, "loss": 0.001, "step": 1989 }, { "epoch": 0.12824643938905717, "grad_norm": 0.0652653566331668, "learning_rate": 9.687074829931973e-06, "loss": 0.0001, "step": 1990 }, { "epoch": 0.12831088483598635, "grad_norm": 0.08241202614232288, "learning_rate": 9.686358754027928e-06, "loss": 0.0001, "step": 1991 }, { "epoch": 0.1283753302829155, "grad_norm": 0.006286295574519581, "learning_rate": 9.685642678123882e-06, "loss": 0.0, "step": 1992 }, { "epoch": 0.1284397757298447, "grad_norm": 0.003714464869485905, "learning_rate": 9.684926602219836e-06, "loss": 0.0, "step": 1993 }, { "epoch": 0.12850422117677385, "grad_norm": 0.005759371136794642, "learning_rate": 9.68421052631579e-06, "loss": 0.0, "step": 1994 }, { "epoch": 0.12856866662370303, "grad_norm": 0.005675051030252726, "learning_rate": 9.683494450411745e-06, "loss": 0.0001, "step": 1995 }, { "epoch": 0.12863311207063222, "grad_norm": 0.0024021179085006966, "learning_rate": 9.6827783745077e-06, "loss": 0.0, "step": 1996 }, { "epoch": 0.12869755751756137, "grad_norm": 0.0003785387330054233, "learning_rate": 9.682062298603654e-06, "loss": 0.0, "step": 1997 }, { "epoch": 0.12876200296449056, "grad_norm": 0.029128929405783818, "learning_rate": 9.681346222699606e-06, "loss": 0.0003, "step": 1998 }, { "epoch": 0.12882644841141974, "grad_norm": 0.00965094471811793, "learning_rate": 9.68063014679556e-06, "loss": 0.0001, "step": 1999 }, { "epoch": 0.1288908938583489, "grad_norm": 0.1631243083836886, "learning_rate": 9.679914070891515e-06, "loss": 0.0004, "step": 2000 }, { "epoch": 0.12895533930527808, "grad_norm": 0.0008463747064084646, "learning_rate": 9.679197994987469e-06, "loss": 0.0, "step": 2001 }, { "epoch": 0.12901978475220727, "grad_norm": 0.16633632762422967, "learning_rate": 9.678481919083423e-06, "loss": 0.0017, "step": 2002 }, { "epoch": 0.12908423019913642, "grad_norm": 0.010527829476286266, "learning_rate": 9.677765843179378e-06, "loss": 0.0, "step": 2003 }, { "epoch": 0.1291486756460656, "grad_norm": 0.0005172988190096706, "learning_rate": 9.677049767275332e-06, "loss": 0.0, "step": 2004 }, { "epoch": 0.1292131210929948, "grad_norm": 0.5475268974403349, "learning_rate": 9.676333691371286e-06, "loss": 0.0049, "step": 2005 }, { "epoch": 0.12927756653992395, "grad_norm": 0.3089875231942968, "learning_rate": 9.67561761546724e-06, "loss": 0.0015, "step": 2006 }, { "epoch": 0.12934201198685313, "grad_norm": 0.4836295297972049, "learning_rate": 9.674901539563195e-06, "loss": 0.0014, "step": 2007 }, { "epoch": 0.1294064574337823, "grad_norm": 0.027169375141539498, "learning_rate": 9.674185463659147e-06, "loss": 0.0001, "step": 2008 }, { "epoch": 0.12947090288071147, "grad_norm": 0.021970903803559733, "learning_rate": 9.673469387755103e-06, "loss": 0.0001, "step": 2009 }, { "epoch": 0.12953534832764066, "grad_norm": 0.009162414864943546, "learning_rate": 9.672753311851058e-06, "loss": 0.0, "step": 2010 }, { "epoch": 0.12959979377456982, "grad_norm": 0.11454378921360125, "learning_rate": 9.672037235947012e-06, "loss": 0.0001, "step": 2011 }, { "epoch": 0.129664239221499, "grad_norm": 0.029784219172073287, "learning_rate": 9.671321160042966e-06, "loss": 0.0, "step": 2012 }, { "epoch": 0.12972868466842818, "grad_norm": 0.023949376703751586, "learning_rate": 9.67060508413892e-06, "loss": 0.0001, "step": 2013 }, { "epoch": 0.12979313011535734, "grad_norm": 0.002974615832058479, "learning_rate": 9.669889008234873e-06, "loss": 0.0, "step": 2014 }, { "epoch": 0.12985757556228653, "grad_norm": 0.0378054906125581, "learning_rate": 9.669172932330828e-06, "loss": 0.0002, "step": 2015 }, { "epoch": 0.1299220210092157, "grad_norm": 0.9699590275654907, "learning_rate": 9.668456856426782e-06, "loss": 0.0052, "step": 2016 }, { "epoch": 0.12998646645614487, "grad_norm": 0.04729061845179302, "learning_rate": 9.667740780522736e-06, "loss": 0.0002, "step": 2017 }, { "epoch": 0.13005091190307405, "grad_norm": 0.0011672871369493613, "learning_rate": 9.66702470461869e-06, "loss": 0.0, "step": 2018 }, { "epoch": 0.13011535735000324, "grad_norm": 0.16779780560629792, "learning_rate": 9.666308628714645e-06, "loss": 0.0013, "step": 2019 }, { "epoch": 0.1301798027969324, "grad_norm": 0.009769002881575378, "learning_rate": 9.665592552810599e-06, "loss": 0.0, "step": 2020 }, { "epoch": 0.13024424824386158, "grad_norm": 0.06925341571206166, "learning_rate": 9.664876476906553e-06, "loss": 0.0016, "step": 2021 }, { "epoch": 0.13030869369079073, "grad_norm": 0.028873011933198297, "learning_rate": 9.664160401002508e-06, "loss": 0.0002, "step": 2022 }, { "epoch": 0.13037313913771992, "grad_norm": 0.030437929164553016, "learning_rate": 9.663444325098462e-06, "loss": 0.0003, "step": 2023 }, { "epoch": 0.1304375845846491, "grad_norm": 0.04157236987937166, "learning_rate": 9.662728249194415e-06, "loss": 0.0, "step": 2024 }, { "epoch": 0.13050203003157826, "grad_norm": 0.002376335667268474, "learning_rate": 9.662012173290369e-06, "loss": 0.0, "step": 2025 }, { "epoch": 0.13056647547850744, "grad_norm": 0.4035950618520248, "learning_rate": 9.661296097386323e-06, "loss": 0.0029, "step": 2026 }, { "epoch": 0.13063092092543663, "grad_norm": 0.027696990872225418, "learning_rate": 9.660580021482277e-06, "loss": 0.0, "step": 2027 }, { "epoch": 0.13069536637236578, "grad_norm": 0.04223727765421684, "learning_rate": 9.659863945578232e-06, "loss": 0.0001, "step": 2028 }, { "epoch": 0.13075981181929497, "grad_norm": 0.07407476864774445, "learning_rate": 9.659147869674186e-06, "loss": 0.0001, "step": 2029 }, { "epoch": 0.13082425726622415, "grad_norm": 0.06935889911574292, "learning_rate": 9.65843179377014e-06, "loss": 0.0001, "step": 2030 }, { "epoch": 0.1308887027131533, "grad_norm": 0.048697351332985216, "learning_rate": 9.657715717866095e-06, "loss": 0.0003, "step": 2031 }, { "epoch": 0.1309531481600825, "grad_norm": 0.005297919283381317, "learning_rate": 9.656999641962049e-06, "loss": 0.0, "step": 2032 }, { "epoch": 0.13101759360701165, "grad_norm": 0.026038987175857987, "learning_rate": 9.656283566058003e-06, "loss": 0.0, "step": 2033 }, { "epoch": 0.13108203905394084, "grad_norm": 0.031187994357722387, "learning_rate": 9.655567490153958e-06, "loss": 0.0, "step": 2034 }, { "epoch": 0.13114648450087002, "grad_norm": 0.0026731337132794813, "learning_rate": 9.654851414249912e-06, "loss": 0.0, "step": 2035 }, { "epoch": 0.13121092994779918, "grad_norm": 0.031899373503944894, "learning_rate": 9.654135338345866e-06, "loss": 0.0, "step": 2036 }, { "epoch": 0.13127537539472836, "grad_norm": 0.19949602017699414, "learning_rate": 9.65341926244182e-06, "loss": 0.0018, "step": 2037 }, { "epoch": 0.13133982084165755, "grad_norm": 0.08790331325073492, "learning_rate": 9.652703186537775e-06, "loss": 0.0001, "step": 2038 }, { "epoch": 0.1314042662885867, "grad_norm": 0.017596495982531747, "learning_rate": 9.651987110633729e-06, "loss": 0.0001, "step": 2039 }, { "epoch": 0.1314687117355159, "grad_norm": 0.019189778384638312, "learning_rate": 9.651271034729682e-06, "loss": 0.0002, "step": 2040 }, { "epoch": 0.13153315718244507, "grad_norm": 0.06314370843579555, "learning_rate": 9.650554958825636e-06, "loss": 0.0003, "step": 2041 }, { "epoch": 0.13159760262937423, "grad_norm": 0.0006894959370470676, "learning_rate": 9.64983888292159e-06, "loss": 0.0, "step": 2042 }, { "epoch": 0.1316620480763034, "grad_norm": 0.37931082451387943, "learning_rate": 9.649122807017545e-06, "loss": 0.0007, "step": 2043 }, { "epoch": 0.1317264935232326, "grad_norm": 0.0030844319112572124, "learning_rate": 9.648406731113499e-06, "loss": 0.0, "step": 2044 }, { "epoch": 0.13179093897016175, "grad_norm": 0.08177165554426978, "learning_rate": 9.647690655209453e-06, "loss": 0.0004, "step": 2045 }, { "epoch": 0.13185538441709094, "grad_norm": 0.43350847797617365, "learning_rate": 9.646974579305407e-06, "loss": 0.0023, "step": 2046 }, { "epoch": 0.1319198298640201, "grad_norm": 0.017477741474904874, "learning_rate": 9.646258503401362e-06, "loss": 0.0, "step": 2047 }, { "epoch": 0.13198427531094928, "grad_norm": 0.002203850556521162, "learning_rate": 9.645542427497316e-06, "loss": 0.0, "step": 2048 }, { "epoch": 0.13204872075787846, "grad_norm": 0.31950717965940695, "learning_rate": 9.644826351593269e-06, "loss": 0.0006, "step": 2049 }, { "epoch": 0.13211316620480762, "grad_norm": 0.2171599295152496, "learning_rate": 9.644110275689223e-06, "loss": 0.0012, "step": 2050 }, { "epoch": 0.1321776116517368, "grad_norm": 0.006761549558222412, "learning_rate": 9.643394199785177e-06, "loss": 0.0001, "step": 2051 }, { "epoch": 0.132242057098666, "grad_norm": 0.21121347000074073, "learning_rate": 9.642678123881131e-06, "loss": 0.0016, "step": 2052 }, { "epoch": 0.13230650254559514, "grad_norm": 0.00621073689854682, "learning_rate": 9.641962047977086e-06, "loss": 0.0001, "step": 2053 }, { "epoch": 0.13237094799252433, "grad_norm": 0.32037854095344764, "learning_rate": 9.64124597207304e-06, "loss": 0.0011, "step": 2054 }, { "epoch": 0.1324353934394535, "grad_norm": 0.009164569823797965, "learning_rate": 9.640529896168994e-06, "loss": 0.0, "step": 2055 }, { "epoch": 0.13249983888638267, "grad_norm": 0.02221471650999578, "learning_rate": 9.639813820264949e-06, "loss": 0.0002, "step": 2056 }, { "epoch": 0.13256428433331185, "grad_norm": 0.000575011763455413, "learning_rate": 9.639097744360903e-06, "loss": 0.0, "step": 2057 }, { "epoch": 0.13262872978024104, "grad_norm": 0.010129757102013255, "learning_rate": 9.638381668456857e-06, "loss": 0.0, "step": 2058 }, { "epoch": 0.1326931752271702, "grad_norm": 0.1409973473472684, "learning_rate": 9.637665592552812e-06, "loss": 0.0013, "step": 2059 }, { "epoch": 0.13275762067409938, "grad_norm": 0.005983310707828044, "learning_rate": 9.636949516648766e-06, "loss": 0.0, "step": 2060 }, { "epoch": 0.13282206612102854, "grad_norm": 0.0009344883651601945, "learning_rate": 9.63623344074472e-06, "loss": 0.0, "step": 2061 }, { "epoch": 0.13288651156795772, "grad_norm": 0.003960576942543866, "learning_rate": 9.635517364840674e-06, "loss": 0.0, "step": 2062 }, { "epoch": 0.1329509570148869, "grad_norm": 0.5569627866898234, "learning_rate": 9.634801288936629e-06, "loss": 0.0028, "step": 2063 }, { "epoch": 0.13301540246181606, "grad_norm": 0.0047079077069493705, "learning_rate": 9.634085213032583e-06, "loss": 0.0, "step": 2064 }, { "epoch": 0.13307984790874525, "grad_norm": 0.003906507967835101, "learning_rate": 9.633369137128536e-06, "loss": 0.0, "step": 2065 }, { "epoch": 0.13314429335567443, "grad_norm": 0.11023906442304471, "learning_rate": 9.63265306122449e-06, "loss": 0.0003, "step": 2066 }, { "epoch": 0.1332087388026036, "grad_norm": 0.0004294632581472271, "learning_rate": 9.631936985320444e-06, "loss": 0.0, "step": 2067 }, { "epoch": 0.13327318424953277, "grad_norm": 0.11307068278807467, "learning_rate": 9.631220909416399e-06, "loss": 0.0021, "step": 2068 }, { "epoch": 0.13333762969646196, "grad_norm": 0.0006063610247870296, "learning_rate": 9.630504833512353e-06, "loss": 0.0, "step": 2069 }, { "epoch": 0.1334020751433911, "grad_norm": 0.0014840911368628803, "learning_rate": 9.629788757608307e-06, "loss": 0.0, "step": 2070 }, { "epoch": 0.1334665205903203, "grad_norm": 0.6738283019134732, "learning_rate": 9.629072681704261e-06, "loss": 0.0026, "step": 2071 }, { "epoch": 0.13353096603724945, "grad_norm": 0.0011470253664121449, "learning_rate": 9.628356605800216e-06, "loss": 0.0, "step": 2072 }, { "epoch": 0.13359541148417864, "grad_norm": 0.0026127623637299136, "learning_rate": 9.62764052989617e-06, "loss": 0.0, "step": 2073 }, { "epoch": 0.13365985693110782, "grad_norm": 0.0012740458006002587, "learning_rate": 9.626924453992124e-06, "loss": 0.0, "step": 2074 }, { "epoch": 0.13372430237803698, "grad_norm": 0.01763503065907178, "learning_rate": 9.626208378088077e-06, "loss": 0.0001, "step": 2075 }, { "epoch": 0.13378874782496616, "grad_norm": 0.023128139248136018, "learning_rate": 9.625492302184031e-06, "loss": 0.0003, "step": 2076 }, { "epoch": 0.13385319327189535, "grad_norm": 0.027094712760084454, "learning_rate": 9.624776226279986e-06, "loss": 0.0, "step": 2077 }, { "epoch": 0.1339176387188245, "grad_norm": 0.006780221983481411, "learning_rate": 9.62406015037594e-06, "loss": 0.0, "step": 2078 }, { "epoch": 0.1339820841657537, "grad_norm": 0.03709572145604888, "learning_rate": 9.623344074471896e-06, "loss": 0.0, "step": 2079 }, { "epoch": 0.13404652961268287, "grad_norm": 0.013222114372806895, "learning_rate": 9.62262799856785e-06, "loss": 0.0, "step": 2080 }, { "epoch": 0.13411097505961203, "grad_norm": 2.5401217554602096, "learning_rate": 9.621911922663803e-06, "loss": 0.0209, "step": 2081 }, { "epoch": 0.13417542050654122, "grad_norm": 1.0853759081919199, "learning_rate": 9.621195846759757e-06, "loss": 0.004, "step": 2082 }, { "epoch": 0.1342398659534704, "grad_norm": 0.0014960894831067125, "learning_rate": 9.620479770855711e-06, "loss": 0.0, "step": 2083 }, { "epoch": 0.13430431140039956, "grad_norm": 0.0010822836961904576, "learning_rate": 9.619763694951666e-06, "loss": 0.0, "step": 2084 }, { "epoch": 0.13436875684732874, "grad_norm": 0.0020887246339200602, "learning_rate": 9.61904761904762e-06, "loss": 0.0, "step": 2085 }, { "epoch": 0.1344332022942579, "grad_norm": 0.0006119661795616889, "learning_rate": 9.618331543143574e-06, "loss": 0.0, "step": 2086 }, { "epoch": 0.13449764774118708, "grad_norm": 0.0343665176866361, "learning_rate": 9.617615467239529e-06, "loss": 0.0001, "step": 2087 }, { "epoch": 0.13456209318811627, "grad_norm": 0.013401859452242893, "learning_rate": 9.616899391335483e-06, "loss": 0.0, "step": 2088 }, { "epoch": 0.13462653863504542, "grad_norm": 0.012735135863470763, "learning_rate": 9.616183315431437e-06, "loss": 0.0, "step": 2089 }, { "epoch": 0.1346909840819746, "grad_norm": 0.00041242381199152895, "learning_rate": 9.615467239527391e-06, "loss": 0.0, "step": 2090 }, { "epoch": 0.1347554295289038, "grad_norm": 0.0021930814514787705, "learning_rate": 9.614751163623344e-06, "loss": 0.0, "step": 2091 }, { "epoch": 0.13481987497583295, "grad_norm": 0.010113932262379114, "learning_rate": 9.614035087719298e-06, "loss": 0.0, "step": 2092 }, { "epoch": 0.13488432042276213, "grad_norm": 0.002889841846447938, "learning_rate": 9.613319011815253e-06, "loss": 0.0, "step": 2093 }, { "epoch": 0.13494876586969132, "grad_norm": 0.10021186453941479, "learning_rate": 9.612602935911207e-06, "loss": 0.0017, "step": 2094 }, { "epoch": 0.13501321131662047, "grad_norm": 0.1701542427445515, "learning_rate": 9.611886860007161e-06, "loss": 0.0005, "step": 2095 }, { "epoch": 0.13507765676354966, "grad_norm": 0.2863434546765848, "learning_rate": 9.611170784103116e-06, "loss": 0.0003, "step": 2096 }, { "epoch": 0.13514210221047884, "grad_norm": 0.030204838470596046, "learning_rate": 9.61045470819907e-06, "loss": 0.0001, "step": 2097 }, { "epoch": 0.135206547657408, "grad_norm": 0.0006739822506401262, "learning_rate": 9.609738632295024e-06, "loss": 0.0, "step": 2098 }, { "epoch": 0.13527099310433718, "grad_norm": 0.011925152450066019, "learning_rate": 9.609022556390978e-06, "loss": 0.0, "step": 2099 }, { "epoch": 0.13533543855126634, "grad_norm": 0.000670276235356618, "learning_rate": 9.608306480486933e-06, "loss": 0.0, "step": 2100 }, { "epoch": 0.13539988399819552, "grad_norm": 0.0018891649124682022, "learning_rate": 9.607590404582885e-06, "loss": 0.0, "step": 2101 }, { "epoch": 0.1354643294451247, "grad_norm": 0.0008358051574337161, "learning_rate": 9.606874328678841e-06, "loss": 0.0, "step": 2102 }, { "epoch": 0.13552877489205387, "grad_norm": 0.17796909637015565, "learning_rate": 9.606158252774796e-06, "loss": 0.0005, "step": 2103 }, { "epoch": 0.13559322033898305, "grad_norm": 0.0029781535719557023, "learning_rate": 9.60544217687075e-06, "loss": 0.0, "step": 2104 }, { "epoch": 0.13565766578591223, "grad_norm": 0.0007821579397496209, "learning_rate": 9.604726100966704e-06, "loss": 0.0, "step": 2105 }, { "epoch": 0.1357221112328414, "grad_norm": 0.013895512731706612, "learning_rate": 9.604010025062659e-06, "loss": 0.0001, "step": 2106 }, { "epoch": 0.13578655667977058, "grad_norm": 0.0006325714668676574, "learning_rate": 9.603293949158611e-06, "loss": 0.0, "step": 2107 }, { "epoch": 0.13585100212669976, "grad_norm": 0.0036075466188459538, "learning_rate": 9.602577873254565e-06, "loss": 0.0, "step": 2108 }, { "epoch": 0.13591544757362892, "grad_norm": 0.00022546715718579325, "learning_rate": 9.60186179735052e-06, "loss": 0.0, "step": 2109 }, { "epoch": 0.1359798930205581, "grad_norm": 0.0019036321791445633, "learning_rate": 9.601145721446474e-06, "loss": 0.0, "step": 2110 }, { "epoch": 0.13604433846748729, "grad_norm": 0.0013767631086754703, "learning_rate": 9.600429645542428e-06, "loss": 0.0, "step": 2111 }, { "epoch": 0.13610878391441644, "grad_norm": 0.00033106270899061087, "learning_rate": 9.599713569638383e-06, "loss": 0.0, "step": 2112 }, { "epoch": 0.13617322936134563, "grad_norm": 0.0004757141807601016, "learning_rate": 9.598997493734337e-06, "loss": 0.0, "step": 2113 }, { "epoch": 0.13623767480827478, "grad_norm": 0.0012730227358612794, "learning_rate": 9.598281417830291e-06, "loss": 0.0, "step": 2114 }, { "epoch": 0.13630212025520397, "grad_norm": 0.006321125741840267, "learning_rate": 9.597565341926246e-06, "loss": 0.0001, "step": 2115 }, { "epoch": 0.13636656570213315, "grad_norm": 0.007981766124111247, "learning_rate": 9.5968492660222e-06, "loss": 0.0001, "step": 2116 }, { "epoch": 0.1364310111490623, "grad_norm": 0.0013743153367953734, "learning_rate": 9.596133190118152e-06, "loss": 0.0, "step": 2117 }, { "epoch": 0.1364954565959915, "grad_norm": 0.009479398745795132, "learning_rate": 9.595417114214107e-06, "loss": 0.0, "step": 2118 }, { "epoch": 0.13655990204292068, "grad_norm": 0.0025998528844112315, "learning_rate": 9.594701038310061e-06, "loss": 0.0, "step": 2119 }, { "epoch": 0.13662434748984983, "grad_norm": 0.0014551499795753477, "learning_rate": 9.593984962406015e-06, "loss": 0.0, "step": 2120 }, { "epoch": 0.13668879293677902, "grad_norm": 0.00018595876394929454, "learning_rate": 9.59326888650197e-06, "loss": 0.0, "step": 2121 }, { "epoch": 0.1367532383837082, "grad_norm": 0.16831879403294095, "learning_rate": 9.592552810597924e-06, "loss": 0.0001, "step": 2122 }, { "epoch": 0.13681768383063736, "grad_norm": 0.006065252590868405, "learning_rate": 9.591836734693878e-06, "loss": 0.0001, "step": 2123 }, { "epoch": 0.13688212927756654, "grad_norm": 0.0008141178296848007, "learning_rate": 9.591120658789833e-06, "loss": 0.0, "step": 2124 }, { "epoch": 0.1369465747244957, "grad_norm": 0.010431545831266202, "learning_rate": 9.590404582885787e-06, "loss": 0.0, "step": 2125 }, { "epoch": 0.13701102017142489, "grad_norm": 0.00022965953498677982, "learning_rate": 9.589688506981741e-06, "loss": 0.0, "step": 2126 }, { "epoch": 0.13707546561835407, "grad_norm": 0.03739440462698081, "learning_rate": 9.588972431077695e-06, "loss": 0.0006, "step": 2127 }, { "epoch": 0.13713991106528323, "grad_norm": 0.0012059992764552644, "learning_rate": 9.58825635517365e-06, "loss": 0.0, "step": 2128 }, { "epoch": 0.1372043565122124, "grad_norm": 0.003077806228310975, "learning_rate": 9.587540279269604e-06, "loss": 0.0, "step": 2129 }, { "epoch": 0.1372688019591416, "grad_norm": 0.0038935344462661513, "learning_rate": 9.586824203365558e-06, "loss": 0.0001, "step": 2130 }, { "epoch": 0.13733324740607075, "grad_norm": 0.3498434423366078, "learning_rate": 9.586108127461513e-06, "loss": 0.0024, "step": 2131 }, { "epoch": 0.13739769285299994, "grad_norm": 0.0030082327199185107, "learning_rate": 9.585392051557467e-06, "loss": 0.0, "step": 2132 }, { "epoch": 0.13746213829992912, "grad_norm": 0.001989814893872896, "learning_rate": 9.58467597565342e-06, "loss": 0.0, "step": 2133 }, { "epoch": 0.13752658374685828, "grad_norm": 0.2112891392170814, "learning_rate": 9.583959899749374e-06, "loss": 0.0004, "step": 2134 }, { "epoch": 0.13759102919378746, "grad_norm": 0.007960537454691344, "learning_rate": 9.583243823845328e-06, "loss": 0.0001, "step": 2135 }, { "epoch": 0.13765547464071665, "grad_norm": 0.14093893824348863, "learning_rate": 9.582527747941282e-06, "loss": 0.0002, "step": 2136 }, { "epoch": 0.1377199200876458, "grad_norm": 0.926997838025005, "learning_rate": 9.581811672037237e-06, "loss": 0.0055, "step": 2137 }, { "epoch": 0.137784365534575, "grad_norm": 0.0007762803125597636, "learning_rate": 9.581095596133191e-06, "loss": 0.0, "step": 2138 }, { "epoch": 0.13784881098150414, "grad_norm": 0.011034256644735584, "learning_rate": 9.580379520229145e-06, "loss": 0.0001, "step": 2139 }, { "epoch": 0.13791325642843333, "grad_norm": 0.005050776046657039, "learning_rate": 9.5796634443251e-06, "loss": 0.0, "step": 2140 }, { "epoch": 0.1379777018753625, "grad_norm": 0.0058777191061129405, "learning_rate": 9.578947368421054e-06, "loss": 0.0, "step": 2141 }, { "epoch": 0.13804214732229167, "grad_norm": 0.00010401014356011279, "learning_rate": 9.578231292517007e-06, "loss": 0.0, "step": 2142 }, { "epoch": 0.13810659276922085, "grad_norm": 0.010355138189235565, "learning_rate": 9.57751521661296e-06, "loss": 0.0, "step": 2143 }, { "epoch": 0.13817103821615004, "grad_norm": 0.0004001475512964918, "learning_rate": 9.576799140708915e-06, "loss": 0.0, "step": 2144 }, { "epoch": 0.1382354836630792, "grad_norm": 0.004463203340507725, "learning_rate": 9.57608306480487e-06, "loss": 0.0, "step": 2145 }, { "epoch": 0.13829992911000838, "grad_norm": 0.08375990333559151, "learning_rate": 9.575366988900824e-06, "loss": 0.0013, "step": 2146 }, { "epoch": 0.13836437455693756, "grad_norm": 0.001991547368177983, "learning_rate": 9.574650912996778e-06, "loss": 0.0, "step": 2147 }, { "epoch": 0.13842882000386672, "grad_norm": 0.00025540746774049394, "learning_rate": 9.573934837092732e-06, "loss": 0.0, "step": 2148 }, { "epoch": 0.1384932654507959, "grad_norm": 0.00028334922210477735, "learning_rate": 9.573218761188687e-06, "loss": 0.0, "step": 2149 }, { "epoch": 0.1385577108977251, "grad_norm": 0.0002980895207097618, "learning_rate": 9.572502685284641e-06, "loss": 0.0, "step": 2150 }, { "epoch": 0.13862215634465425, "grad_norm": 0.06858295254675982, "learning_rate": 9.571786609380595e-06, "loss": 0.0007, "step": 2151 }, { "epoch": 0.13868660179158343, "grad_norm": 0.0037622125428383424, "learning_rate": 9.57107053347655e-06, "loss": 0.0, "step": 2152 }, { "epoch": 0.1387510472385126, "grad_norm": 0.002992232614199599, "learning_rate": 9.570354457572504e-06, "loss": 0.0, "step": 2153 }, { "epoch": 0.13881549268544177, "grad_norm": 0.0015270018171054526, "learning_rate": 9.569638381668458e-06, "loss": 0.0, "step": 2154 }, { "epoch": 0.13887993813237096, "grad_norm": 0.00048099787247835995, "learning_rate": 9.568922305764412e-06, "loss": 0.0, "step": 2155 }, { "epoch": 0.1389443835793001, "grad_norm": 0.06242042743630073, "learning_rate": 9.568206229860367e-06, "loss": 0.0008, "step": 2156 }, { "epoch": 0.1390088290262293, "grad_norm": 0.010198910788331253, "learning_rate": 9.567490153956321e-06, "loss": 0.0, "step": 2157 }, { "epoch": 0.13907327447315848, "grad_norm": 0.02498183349411272, "learning_rate": 9.566774078052274e-06, "loss": 0.0002, "step": 2158 }, { "epoch": 0.13913771992008764, "grad_norm": 0.02871299698962203, "learning_rate": 9.566058002148228e-06, "loss": 0.0002, "step": 2159 }, { "epoch": 0.13920216536701682, "grad_norm": 0.0027904745167431897, "learning_rate": 9.565341926244182e-06, "loss": 0.0, "step": 2160 }, { "epoch": 0.139266610813946, "grad_norm": 0.0017900588858772886, "learning_rate": 9.564625850340137e-06, "loss": 0.0, "step": 2161 }, { "epoch": 0.13933105626087516, "grad_norm": 0.34400681092622376, "learning_rate": 9.56390977443609e-06, "loss": 0.0009, "step": 2162 }, { "epoch": 0.13939550170780435, "grad_norm": 0.0004665528583749067, "learning_rate": 9.563193698532045e-06, "loss": 0.0, "step": 2163 }, { "epoch": 0.1394599471547335, "grad_norm": 0.0022528881939270448, "learning_rate": 9.562477622628e-06, "loss": 0.0, "step": 2164 }, { "epoch": 0.1395243926016627, "grad_norm": 0.0747928905148953, "learning_rate": 9.561761546723954e-06, "loss": 0.0008, "step": 2165 }, { "epoch": 0.13958883804859187, "grad_norm": 0.15587924242670487, "learning_rate": 9.561045470819908e-06, "loss": 0.0015, "step": 2166 }, { "epoch": 0.13965328349552103, "grad_norm": 0.010952113106743318, "learning_rate": 9.560329394915862e-06, "loss": 0.0001, "step": 2167 }, { "epoch": 0.13971772894245021, "grad_norm": 0.0052290386293551245, "learning_rate": 9.559613319011815e-06, "loss": 0.0, "step": 2168 }, { "epoch": 0.1397821743893794, "grad_norm": 0.0009271938531524405, "learning_rate": 9.55889724310777e-06, "loss": 0.0, "step": 2169 }, { "epoch": 0.13984661983630856, "grad_norm": 0.0012394748834099222, "learning_rate": 9.558181167203723e-06, "loss": 0.0, "step": 2170 }, { "epoch": 0.13991106528323774, "grad_norm": 0.058789488562754094, "learning_rate": 9.557465091299678e-06, "loss": 0.0001, "step": 2171 }, { "epoch": 0.13997551073016692, "grad_norm": 0.0008058793089597991, "learning_rate": 9.556749015395634e-06, "loss": 0.0, "step": 2172 }, { "epoch": 0.14003995617709608, "grad_norm": 0.010173480068610822, "learning_rate": 9.556032939491588e-06, "loss": 0.0001, "step": 2173 }, { "epoch": 0.14010440162402527, "grad_norm": 0.015670921228439428, "learning_rate": 9.55531686358754e-06, "loss": 0.0001, "step": 2174 }, { "epoch": 0.14016884707095445, "grad_norm": 0.03238827058306233, "learning_rate": 9.554600787683495e-06, "loss": 0.0001, "step": 2175 }, { "epoch": 0.1402332925178836, "grad_norm": 0.001430104595176344, "learning_rate": 9.55388471177945e-06, "loss": 0.0, "step": 2176 }, { "epoch": 0.1402977379648128, "grad_norm": 0.22314276162412025, "learning_rate": 9.553168635875404e-06, "loss": 0.0009, "step": 2177 }, { "epoch": 0.14036218341174195, "grad_norm": 0.003981575309131401, "learning_rate": 9.552452559971358e-06, "loss": 0.0, "step": 2178 }, { "epoch": 0.14042662885867113, "grad_norm": 0.022705913856466316, "learning_rate": 9.551736484067312e-06, "loss": 0.0, "step": 2179 }, { "epoch": 0.14049107430560032, "grad_norm": 0.02675406863132256, "learning_rate": 9.551020408163266e-06, "loss": 0.0004, "step": 2180 }, { "epoch": 0.14055551975252947, "grad_norm": 0.0002129580119729682, "learning_rate": 9.55030433225922e-06, "loss": 0.0, "step": 2181 }, { "epoch": 0.14061996519945866, "grad_norm": 0.4309991929595454, "learning_rate": 9.549588256355175e-06, "loss": 0.001, "step": 2182 }, { "epoch": 0.14068441064638784, "grad_norm": 0.027090977677346518, "learning_rate": 9.54887218045113e-06, "loss": 0.0002, "step": 2183 }, { "epoch": 0.140748856093317, "grad_norm": 0.015662161214944636, "learning_rate": 9.548156104547082e-06, "loss": 0.0002, "step": 2184 }, { "epoch": 0.14081330154024618, "grad_norm": 0.2672868382237579, "learning_rate": 9.547440028643036e-06, "loss": 0.0017, "step": 2185 }, { "epoch": 0.14087774698717537, "grad_norm": 0.42117580116917186, "learning_rate": 9.54672395273899e-06, "loss": 0.0019, "step": 2186 }, { "epoch": 0.14094219243410452, "grad_norm": 0.0010824486543928827, "learning_rate": 9.546007876834945e-06, "loss": 0.0, "step": 2187 }, { "epoch": 0.1410066378810337, "grad_norm": 0.002333835033309739, "learning_rate": 9.5452918009309e-06, "loss": 0.0, "step": 2188 }, { "epoch": 0.1410710833279629, "grad_norm": 0.01005892812360604, "learning_rate": 9.544575725026853e-06, "loss": 0.0001, "step": 2189 }, { "epoch": 0.14113552877489205, "grad_norm": 0.04455364954326522, "learning_rate": 9.543859649122808e-06, "loss": 0.0001, "step": 2190 }, { "epoch": 0.14119997422182123, "grad_norm": 0.011728766129579472, "learning_rate": 9.543143573218762e-06, "loss": 0.0, "step": 2191 }, { "epoch": 0.1412644196687504, "grad_norm": 0.04750135049264757, "learning_rate": 9.542427497314716e-06, "loss": 0.0001, "step": 2192 }, { "epoch": 0.14132886511567957, "grad_norm": 0.10700909026667592, "learning_rate": 9.54171142141067e-06, "loss": 0.0001, "step": 2193 }, { "epoch": 0.14139331056260876, "grad_norm": 0.0018928932543482945, "learning_rate": 9.540995345506623e-06, "loss": 0.0, "step": 2194 }, { "epoch": 0.14145775600953792, "grad_norm": 0.00035078335624548354, "learning_rate": 9.540279269602578e-06, "loss": 0.0, "step": 2195 }, { "epoch": 0.1415222014564671, "grad_norm": 0.003913708593869081, "learning_rate": 9.539563193698534e-06, "loss": 0.0, "step": 2196 }, { "epoch": 0.14158664690339628, "grad_norm": 0.32054424044004987, "learning_rate": 9.538847117794488e-06, "loss": 0.0025, "step": 2197 }, { "epoch": 0.14165109235032544, "grad_norm": 0.00016171468966313804, "learning_rate": 9.538131041890442e-06, "loss": 0.0, "step": 2198 }, { "epoch": 0.14171553779725463, "grad_norm": 0.03721486801308035, "learning_rate": 9.537414965986396e-06, "loss": 0.0003, "step": 2199 }, { "epoch": 0.1417799832441838, "grad_norm": 0.009083953664856316, "learning_rate": 9.536698890082349e-06, "loss": 0.0, "step": 2200 }, { "epoch": 0.14184442869111297, "grad_norm": 0.005521489335318659, "learning_rate": 9.535982814178303e-06, "loss": 0.0, "step": 2201 }, { "epoch": 0.14190887413804215, "grad_norm": 0.0010786474428082949, "learning_rate": 9.535266738274258e-06, "loss": 0.0, "step": 2202 }, { "epoch": 0.1419733195849713, "grad_norm": 0.0008330341741979345, "learning_rate": 9.534550662370212e-06, "loss": 0.0, "step": 2203 }, { "epoch": 0.1420377650319005, "grad_norm": 0.004503439156467866, "learning_rate": 9.533834586466166e-06, "loss": 0.0, "step": 2204 }, { "epoch": 0.14210221047882968, "grad_norm": 0.1447155653550225, "learning_rate": 9.53311851056212e-06, "loss": 0.0003, "step": 2205 }, { "epoch": 0.14216665592575883, "grad_norm": 0.0006731134985055816, "learning_rate": 9.532402434658075e-06, "loss": 0.0, "step": 2206 }, { "epoch": 0.14223110137268802, "grad_norm": 0.000276659143474887, "learning_rate": 9.53168635875403e-06, "loss": 0.0, "step": 2207 }, { "epoch": 0.1422955468196172, "grad_norm": 0.0007131897573369064, "learning_rate": 9.530970282849983e-06, "loss": 0.0, "step": 2208 }, { "epoch": 0.14235999226654636, "grad_norm": 0.007994417837624694, "learning_rate": 9.530254206945938e-06, "loss": 0.0, "step": 2209 }, { "epoch": 0.14242443771347554, "grad_norm": 0.181136117707174, "learning_rate": 9.52953813104189e-06, "loss": 0.0005, "step": 2210 }, { "epoch": 0.14248888316040473, "grad_norm": 0.15367599099740947, "learning_rate": 9.528822055137845e-06, "loss": 0.0005, "step": 2211 }, { "epoch": 0.14255332860733388, "grad_norm": 0.15235054783448743, "learning_rate": 9.528105979233799e-06, "loss": 0.0007, "step": 2212 }, { "epoch": 0.14261777405426307, "grad_norm": 0.004490466231885834, "learning_rate": 9.527389903329753e-06, "loss": 0.0, "step": 2213 }, { "epoch": 0.14268221950119225, "grad_norm": 0.1511576771537659, "learning_rate": 9.526673827425708e-06, "loss": 0.0017, "step": 2214 }, { "epoch": 0.1427466649481214, "grad_norm": 0.013676781391319589, "learning_rate": 9.525957751521662e-06, "loss": 0.0001, "step": 2215 }, { "epoch": 0.1428111103950506, "grad_norm": 0.06545610037074183, "learning_rate": 9.525241675617616e-06, "loss": 0.0005, "step": 2216 }, { "epoch": 0.14287555584197975, "grad_norm": 0.9775896697611458, "learning_rate": 9.52452559971357e-06, "loss": 0.0047, "step": 2217 }, { "epoch": 0.14294000128890894, "grad_norm": 0.00031478760827874825, "learning_rate": 9.523809523809525e-06, "loss": 0.0, "step": 2218 }, { "epoch": 0.14300444673583812, "grad_norm": 0.030597664652693826, "learning_rate": 9.523093447905479e-06, "loss": 0.0002, "step": 2219 }, { "epoch": 0.14306889218276728, "grad_norm": 1.7293522593468222, "learning_rate": 9.522377372001433e-06, "loss": 0.0008, "step": 2220 }, { "epoch": 0.14313333762969646, "grad_norm": 0.01601536033865269, "learning_rate": 9.521661296097388e-06, "loss": 0.0, "step": 2221 }, { "epoch": 0.14319778307662565, "grad_norm": 0.0006310703572563955, "learning_rate": 9.520945220193342e-06, "loss": 0.0, "step": 2222 }, { "epoch": 0.1432622285235548, "grad_norm": 0.0005210939538044766, "learning_rate": 9.520229144289296e-06, "loss": 0.0, "step": 2223 }, { "epoch": 0.143326673970484, "grad_norm": 0.018941119371520383, "learning_rate": 9.51951306838525e-06, "loss": 0.0002, "step": 2224 }, { "epoch": 0.14339111941741317, "grad_norm": 0.05937819495828138, "learning_rate": 9.518796992481205e-06, "loss": 0.0001, "step": 2225 }, { "epoch": 0.14345556486434233, "grad_norm": 0.018362872540066737, "learning_rate": 9.518080916577157e-06, "loss": 0.0001, "step": 2226 }, { "epoch": 0.1435200103112715, "grad_norm": 0.01831749307485675, "learning_rate": 9.517364840673112e-06, "loss": 0.0001, "step": 2227 }, { "epoch": 0.1435844557582007, "grad_norm": 0.006862687030695175, "learning_rate": 9.516648764769066e-06, "loss": 0.0, "step": 2228 }, { "epoch": 0.14364890120512985, "grad_norm": 7.471194955517663e-05, "learning_rate": 9.51593268886502e-06, "loss": 0.0, "step": 2229 }, { "epoch": 0.14371334665205904, "grad_norm": 0.003645056892950574, "learning_rate": 9.515216612960975e-06, "loss": 0.0, "step": 2230 }, { "epoch": 0.1437777920989882, "grad_norm": 0.016415402507622183, "learning_rate": 9.514500537056929e-06, "loss": 0.0001, "step": 2231 }, { "epoch": 0.14384223754591738, "grad_norm": 0.0016457805006108836, "learning_rate": 9.513784461152883e-06, "loss": 0.0, "step": 2232 }, { "epoch": 0.14390668299284656, "grad_norm": 0.026616648264700046, "learning_rate": 9.513068385248838e-06, "loss": 0.0001, "step": 2233 }, { "epoch": 0.14397112843977572, "grad_norm": 0.5394570109498802, "learning_rate": 9.512352309344792e-06, "loss": 0.0036, "step": 2234 }, { "epoch": 0.1440355738867049, "grad_norm": 0.006396931689943378, "learning_rate": 9.511636233440744e-06, "loss": 0.0, "step": 2235 }, { "epoch": 0.1441000193336341, "grad_norm": 0.012580438401703474, "learning_rate": 9.510920157536699e-06, "loss": 0.0, "step": 2236 }, { "epoch": 0.14416446478056325, "grad_norm": 0.01920853147496425, "learning_rate": 9.510204081632653e-06, "loss": 0.0001, "step": 2237 }, { "epoch": 0.14422891022749243, "grad_norm": 0.002245388255484627, "learning_rate": 9.509488005728607e-06, "loss": 0.0, "step": 2238 }, { "epoch": 0.14429335567442161, "grad_norm": 0.579263982784339, "learning_rate": 9.508771929824562e-06, "loss": 0.0168, "step": 2239 }, { "epoch": 0.14435780112135077, "grad_norm": 0.00028066203840411235, "learning_rate": 9.508055853920516e-06, "loss": 0.0, "step": 2240 }, { "epoch": 0.14442224656827995, "grad_norm": 0.021214743917220127, "learning_rate": 9.50733977801647e-06, "loss": 0.0001, "step": 2241 }, { "epoch": 0.1444866920152091, "grad_norm": 0.00032764501369088723, "learning_rate": 9.506623702112425e-06, "loss": 0.0, "step": 2242 }, { "epoch": 0.1445511374621383, "grad_norm": 0.0058459261688533605, "learning_rate": 9.505907626208379e-06, "loss": 0.0, "step": 2243 }, { "epoch": 0.14461558290906748, "grad_norm": 0.051741941735831465, "learning_rate": 9.505191550304333e-06, "loss": 0.0, "step": 2244 }, { "epoch": 0.14468002835599664, "grad_norm": 0.32622149655365945, "learning_rate": 9.504475474400287e-06, "loss": 0.0003, "step": 2245 }, { "epoch": 0.14474447380292582, "grad_norm": 0.055378686377618995, "learning_rate": 9.503759398496242e-06, "loss": 0.0001, "step": 2246 }, { "epoch": 0.144808919249855, "grad_norm": 0.02396991245736209, "learning_rate": 9.503043322592196e-06, "loss": 0.0001, "step": 2247 }, { "epoch": 0.14487336469678416, "grad_norm": 0.03345312463997792, "learning_rate": 9.50232724668815e-06, "loss": 0.0015, "step": 2248 }, { "epoch": 0.14493781014371335, "grad_norm": 0.15419614497691245, "learning_rate": 9.501611170784105e-06, "loss": 0.002, "step": 2249 }, { "epoch": 0.14500225559064253, "grad_norm": 0.25495189154503606, "learning_rate": 9.500895094880059e-06, "loss": 0.001, "step": 2250 }, { "epoch": 0.1450667010375717, "grad_norm": 0.06784105322185756, "learning_rate": 9.500179018976012e-06, "loss": 0.0004, "step": 2251 }, { "epoch": 0.14513114648450087, "grad_norm": 0.0008401214103608888, "learning_rate": 9.499462943071966e-06, "loss": 0.0, "step": 2252 }, { "epoch": 0.14519559193143006, "grad_norm": 0.7134616185045286, "learning_rate": 9.49874686716792e-06, "loss": 0.0016, "step": 2253 }, { "epoch": 0.1452600373783592, "grad_norm": 0.005298159616167539, "learning_rate": 9.498030791263874e-06, "loss": 0.0, "step": 2254 }, { "epoch": 0.1453244828252884, "grad_norm": 0.24674281553149796, "learning_rate": 9.497314715359829e-06, "loss": 0.0005, "step": 2255 }, { "epoch": 0.14538892827221755, "grad_norm": 0.015423563114479719, "learning_rate": 9.496598639455783e-06, "loss": 0.0001, "step": 2256 }, { "epoch": 0.14545337371914674, "grad_norm": 0.014220395291028429, "learning_rate": 9.495882563551737e-06, "loss": 0.0001, "step": 2257 }, { "epoch": 0.14551781916607592, "grad_norm": 0.0020908002801696174, "learning_rate": 9.495166487647692e-06, "loss": 0.0, "step": 2258 }, { "epoch": 0.14558226461300508, "grad_norm": 0.004630563261824797, "learning_rate": 9.494450411743646e-06, "loss": 0.0, "step": 2259 }, { "epoch": 0.14564671005993426, "grad_norm": 0.0005906543258460047, "learning_rate": 9.4937343358396e-06, "loss": 0.0, "step": 2260 }, { "epoch": 0.14571115550686345, "grad_norm": 0.00957402495164633, "learning_rate": 9.493018259935553e-06, "loss": 0.0, "step": 2261 }, { "epoch": 0.1457756009537926, "grad_norm": 0.014087719869458042, "learning_rate": 9.492302184031507e-06, "loss": 0.0, "step": 2262 }, { "epoch": 0.1458400464007218, "grad_norm": 0.0029143890782602393, "learning_rate": 9.491586108127461e-06, "loss": 0.0, "step": 2263 }, { "epoch": 0.14590449184765097, "grad_norm": 0.03503983481883921, "learning_rate": 9.490870032223416e-06, "loss": 0.0004, "step": 2264 }, { "epoch": 0.14596893729458013, "grad_norm": 0.013767286296889813, "learning_rate": 9.49015395631937e-06, "loss": 0.0, "step": 2265 }, { "epoch": 0.14603338274150932, "grad_norm": 0.25470119158679877, "learning_rate": 9.489437880415326e-06, "loss": 0.0005, "step": 2266 }, { "epoch": 0.1460978281884385, "grad_norm": 0.007543495843438581, "learning_rate": 9.488721804511279e-06, "loss": 0.0, "step": 2267 }, { "epoch": 0.14616227363536766, "grad_norm": 0.0020839204456079203, "learning_rate": 9.488005728607233e-06, "loss": 0.0, "step": 2268 }, { "epoch": 0.14622671908229684, "grad_norm": 0.06732914003313965, "learning_rate": 9.487289652703187e-06, "loss": 0.0003, "step": 2269 }, { "epoch": 0.146291164529226, "grad_norm": 0.0016212466885061447, "learning_rate": 9.486573576799142e-06, "loss": 0.0, "step": 2270 }, { "epoch": 0.14635560997615518, "grad_norm": 0.3236607034805371, "learning_rate": 9.485857500895096e-06, "loss": 0.0045, "step": 2271 }, { "epoch": 0.14642005542308437, "grad_norm": 0.004995574742021267, "learning_rate": 9.48514142499105e-06, "loss": 0.0, "step": 2272 }, { "epoch": 0.14648450087001352, "grad_norm": 0.00117536297122479, "learning_rate": 9.484425349087004e-06, "loss": 0.0, "step": 2273 }, { "epoch": 0.1465489463169427, "grad_norm": 0.0005086225519439412, "learning_rate": 9.483709273182959e-06, "loss": 0.0, "step": 2274 }, { "epoch": 0.1466133917638719, "grad_norm": 0.0030946496454177484, "learning_rate": 9.482993197278913e-06, "loss": 0.0, "step": 2275 }, { "epoch": 0.14667783721080105, "grad_norm": 0.0010062472442629099, "learning_rate": 9.482277121374867e-06, "loss": 0.0, "step": 2276 }, { "epoch": 0.14674228265773023, "grad_norm": 0.03180384283377841, "learning_rate": 9.48156104547082e-06, "loss": 0.0002, "step": 2277 }, { "epoch": 0.14680672810465942, "grad_norm": 0.21418460822901342, "learning_rate": 9.480844969566774e-06, "loss": 0.0015, "step": 2278 }, { "epoch": 0.14687117355158857, "grad_norm": 0.010110387519656235, "learning_rate": 9.480128893662729e-06, "loss": 0.0, "step": 2279 }, { "epoch": 0.14693561899851776, "grad_norm": 0.0013718162505616696, "learning_rate": 9.479412817758683e-06, "loss": 0.0, "step": 2280 }, { "epoch": 0.14700006444544694, "grad_norm": 0.001147183938640018, "learning_rate": 9.478696741854637e-06, "loss": 0.0, "step": 2281 }, { "epoch": 0.1470645098923761, "grad_norm": 0.0038179217062423933, "learning_rate": 9.477980665950591e-06, "loss": 0.0, "step": 2282 }, { "epoch": 0.14712895533930528, "grad_norm": 0.16823613707940346, "learning_rate": 9.477264590046546e-06, "loss": 0.0001, "step": 2283 }, { "epoch": 0.14719340078623444, "grad_norm": 0.0007041555804207547, "learning_rate": 9.4765485141425e-06, "loss": 0.0, "step": 2284 }, { "epoch": 0.14725784623316363, "grad_norm": 0.17469604142040665, "learning_rate": 9.475832438238454e-06, "loss": 0.0006, "step": 2285 }, { "epoch": 0.1473222916800928, "grad_norm": 0.07642142365060478, "learning_rate": 9.475116362334409e-06, "loss": 0.0002, "step": 2286 }, { "epoch": 0.14738673712702197, "grad_norm": 0.026083890142809724, "learning_rate": 9.474400286430361e-06, "loss": 0.0003, "step": 2287 }, { "epoch": 0.14745118257395115, "grad_norm": 0.0013347215920775026, "learning_rate": 9.473684210526315e-06, "loss": 0.0, "step": 2288 }, { "epoch": 0.14751562802088033, "grad_norm": 0.0656069517862333, "learning_rate": 9.472968134622271e-06, "loss": 0.0001, "step": 2289 }, { "epoch": 0.1475800734678095, "grad_norm": 0.0008470576602853886, "learning_rate": 9.472252058718226e-06, "loss": 0.0, "step": 2290 }, { "epoch": 0.14764451891473868, "grad_norm": 0.16749845691760115, "learning_rate": 9.47153598281418e-06, "loss": 0.003, "step": 2291 }, { "epoch": 0.14770896436166786, "grad_norm": 0.0016121071686281076, "learning_rate": 9.470819906910134e-06, "loss": 0.0, "step": 2292 }, { "epoch": 0.14777340980859702, "grad_norm": 0.006441744083881955, "learning_rate": 9.470103831006087e-06, "loss": 0.0, "step": 2293 }, { "epoch": 0.1478378552555262, "grad_norm": 0.016728643202810173, "learning_rate": 9.469387755102041e-06, "loss": 0.0001, "step": 2294 }, { "epoch": 0.14790230070245536, "grad_norm": 0.005027691402007882, "learning_rate": 9.468671679197996e-06, "loss": 0.0, "step": 2295 }, { "epoch": 0.14796674614938454, "grad_norm": 0.001770350858388834, "learning_rate": 9.46795560329395e-06, "loss": 0.0, "step": 2296 }, { "epoch": 0.14803119159631373, "grad_norm": 0.00011038801131939273, "learning_rate": 9.467239527389904e-06, "loss": 0.0, "step": 2297 }, { "epoch": 0.14809563704324288, "grad_norm": 0.30113663671343743, "learning_rate": 9.466523451485858e-06, "loss": 0.0011, "step": 2298 }, { "epoch": 0.14816008249017207, "grad_norm": 0.0003501895384979152, "learning_rate": 9.465807375581813e-06, "loss": 0.0, "step": 2299 }, { "epoch": 0.14822452793710125, "grad_norm": 0.001076580935332914, "learning_rate": 9.465091299677767e-06, "loss": 0.0, "step": 2300 }, { "epoch": 0.1482889733840304, "grad_norm": 0.880960361418177, "learning_rate": 9.464375223773721e-06, "loss": 0.0051, "step": 2301 }, { "epoch": 0.1483534188309596, "grad_norm": 0.03526416745467504, "learning_rate": 9.463659147869676e-06, "loss": 0.0002, "step": 2302 }, { "epoch": 0.14841786427788878, "grad_norm": 0.0025750307660047872, "learning_rate": 9.462943071965628e-06, "loss": 0.0, "step": 2303 }, { "epoch": 0.14848230972481793, "grad_norm": 0.004023243719837641, "learning_rate": 9.462226996061583e-06, "loss": 0.0, "step": 2304 }, { "epoch": 0.14854675517174712, "grad_norm": 0.9786404284533876, "learning_rate": 9.461510920157537e-06, "loss": 0.0027, "step": 2305 }, { "epoch": 0.1486112006186763, "grad_norm": 0.13937884764855718, "learning_rate": 9.460794844253491e-06, "loss": 0.0018, "step": 2306 }, { "epoch": 0.14867564606560546, "grad_norm": 0.004105793329801603, "learning_rate": 9.460078768349445e-06, "loss": 0.0, "step": 2307 }, { "epoch": 0.14874009151253464, "grad_norm": 0.0005642601189968466, "learning_rate": 9.4593626924454e-06, "loss": 0.0, "step": 2308 }, { "epoch": 0.1488045369594638, "grad_norm": 0.041690372482371, "learning_rate": 9.458646616541354e-06, "loss": 0.0001, "step": 2309 }, { "epoch": 0.14886898240639299, "grad_norm": 0.0004829795518174653, "learning_rate": 9.457930540637308e-06, "loss": 0.0, "step": 2310 }, { "epoch": 0.14893342785332217, "grad_norm": 0.0004682333152799085, "learning_rate": 9.457214464733263e-06, "loss": 0.0, "step": 2311 }, { "epoch": 0.14899787330025133, "grad_norm": 0.016004459520340634, "learning_rate": 9.456498388829217e-06, "loss": 0.0002, "step": 2312 }, { "epoch": 0.1490623187471805, "grad_norm": 0.6353641212082908, "learning_rate": 9.455782312925171e-06, "loss": 0.0041, "step": 2313 }, { "epoch": 0.1491267641941097, "grad_norm": 0.0006008079600321058, "learning_rate": 9.455066237021126e-06, "loss": 0.0, "step": 2314 }, { "epoch": 0.14919120964103885, "grad_norm": 0.002256655415846708, "learning_rate": 9.45435016111708e-06, "loss": 0.0, "step": 2315 }, { "epoch": 0.14925565508796804, "grad_norm": 0.00014758511457507473, "learning_rate": 9.453634085213034e-06, "loss": 0.0, "step": 2316 }, { "epoch": 0.14932010053489722, "grad_norm": 0.006368467140729355, "learning_rate": 9.452918009308988e-06, "loss": 0.0, "step": 2317 }, { "epoch": 0.14938454598182638, "grad_norm": 0.0032855975036902166, "learning_rate": 9.452201933404943e-06, "loss": 0.0, "step": 2318 }, { "epoch": 0.14944899142875556, "grad_norm": 0.6131575149796049, "learning_rate": 9.451485857500895e-06, "loss": 0.003, "step": 2319 }, { "epoch": 0.14951343687568475, "grad_norm": 0.275169933440258, "learning_rate": 9.45076978159685e-06, "loss": 0.0007, "step": 2320 }, { "epoch": 0.1495778823226139, "grad_norm": 0.0023692457357848964, "learning_rate": 9.450053705692804e-06, "loss": 0.0, "step": 2321 }, { "epoch": 0.1496423277695431, "grad_norm": 0.0025468429265798303, "learning_rate": 9.449337629788758e-06, "loss": 0.0, "step": 2322 }, { "epoch": 0.14970677321647224, "grad_norm": 0.6888988525450286, "learning_rate": 9.448621553884713e-06, "loss": 0.0026, "step": 2323 }, { "epoch": 0.14977121866340143, "grad_norm": 0.000541195102515391, "learning_rate": 9.447905477980667e-06, "loss": 0.0, "step": 2324 }, { "epoch": 0.1498356641103306, "grad_norm": 0.04337376750890723, "learning_rate": 9.447189402076621e-06, "loss": 0.0001, "step": 2325 }, { "epoch": 0.14990010955725977, "grad_norm": 0.05995612202794425, "learning_rate": 9.446473326172575e-06, "loss": 0.0002, "step": 2326 }, { "epoch": 0.14996455500418895, "grad_norm": 0.014729528683343611, "learning_rate": 9.44575725026853e-06, "loss": 0.0001, "step": 2327 }, { "epoch": 0.15002900045111814, "grad_norm": 0.015224141839539764, "learning_rate": 9.445041174364482e-06, "loss": 0.0, "step": 2328 }, { "epoch": 0.1500934458980473, "grad_norm": 0.0020922033316262286, "learning_rate": 9.444325098460437e-06, "loss": 0.0, "step": 2329 }, { "epoch": 0.15015789134497648, "grad_norm": 0.00976562052965062, "learning_rate": 9.443609022556391e-06, "loss": 0.0, "step": 2330 }, { "epoch": 0.15022233679190566, "grad_norm": 0.003141049531346141, "learning_rate": 9.442892946652345e-06, "loss": 0.0, "step": 2331 }, { "epoch": 0.15028678223883482, "grad_norm": 0.2375710487754487, "learning_rate": 9.4421768707483e-06, "loss": 0.0007, "step": 2332 }, { "epoch": 0.150351227685764, "grad_norm": 0.10324509375902188, "learning_rate": 9.441460794844254e-06, "loss": 0.0003, "step": 2333 }, { "epoch": 0.15041567313269316, "grad_norm": 0.011364228373043003, "learning_rate": 9.440744718940208e-06, "loss": 0.0, "step": 2334 }, { "epoch": 0.15048011857962235, "grad_norm": 0.033959039268354946, "learning_rate": 9.440028643036162e-06, "loss": 0.0001, "step": 2335 }, { "epoch": 0.15054456402655153, "grad_norm": 0.0010414789426565346, "learning_rate": 9.439312567132117e-06, "loss": 0.0, "step": 2336 }, { "epoch": 0.1506090094734807, "grad_norm": 0.011555880377593744, "learning_rate": 9.438596491228071e-06, "loss": 0.0001, "step": 2337 }, { "epoch": 0.15067345492040987, "grad_norm": 0.13693876274950872, "learning_rate": 9.437880415324025e-06, "loss": 0.0005, "step": 2338 }, { "epoch": 0.15073790036733906, "grad_norm": 0.046467830614872876, "learning_rate": 9.43716433941998e-06, "loss": 0.0002, "step": 2339 }, { "epoch": 0.1508023458142682, "grad_norm": 0.28248522299966256, "learning_rate": 9.436448263515934e-06, "loss": 0.0005, "step": 2340 }, { "epoch": 0.1508667912611974, "grad_norm": 0.010665106642022313, "learning_rate": 9.435732187611888e-06, "loss": 0.0001, "step": 2341 }, { "epoch": 0.15093123670812658, "grad_norm": 0.01410729256099583, "learning_rate": 9.435016111707843e-06, "loss": 0.0001, "step": 2342 }, { "epoch": 0.15099568215505574, "grad_norm": 0.006377867812248754, "learning_rate": 9.434300035803797e-06, "loss": 0.0, "step": 2343 }, { "epoch": 0.15106012760198492, "grad_norm": 0.3001965872068028, "learning_rate": 9.43358395989975e-06, "loss": 0.0005, "step": 2344 }, { "epoch": 0.1511245730489141, "grad_norm": 0.0054143970512442385, "learning_rate": 9.432867883995704e-06, "loss": 0.0, "step": 2345 }, { "epoch": 0.15118901849584326, "grad_norm": 0.003879576657613195, "learning_rate": 9.432151808091658e-06, "loss": 0.0, "step": 2346 }, { "epoch": 0.15125346394277245, "grad_norm": 0.017826363177415604, "learning_rate": 9.431435732187612e-06, "loss": 0.0002, "step": 2347 }, { "epoch": 0.1513179093897016, "grad_norm": 0.14413647606933994, "learning_rate": 9.430719656283567e-06, "loss": 0.0004, "step": 2348 }, { "epoch": 0.1513823548366308, "grad_norm": 0.0006606705735611096, "learning_rate": 9.430003580379521e-06, "loss": 0.0, "step": 2349 }, { "epoch": 0.15144680028355997, "grad_norm": 0.001281065290532309, "learning_rate": 9.429287504475475e-06, "loss": 0.0, "step": 2350 }, { "epoch": 0.15151124573048913, "grad_norm": 0.002033793603108425, "learning_rate": 9.42857142857143e-06, "loss": 0.0, "step": 2351 }, { "epoch": 0.15157569117741831, "grad_norm": 0.006509579136491605, "learning_rate": 9.427855352667384e-06, "loss": 0.0, "step": 2352 }, { "epoch": 0.1516401366243475, "grad_norm": 0.015385489864271826, "learning_rate": 9.427139276763338e-06, "loss": 0.0001, "step": 2353 }, { "epoch": 0.15170458207127666, "grad_norm": 0.00034424994771698977, "learning_rate": 9.42642320085929e-06, "loss": 0.0, "step": 2354 }, { "epoch": 0.15176902751820584, "grad_norm": 0.0008804023377011956, "learning_rate": 9.425707124955245e-06, "loss": 0.0, "step": 2355 }, { "epoch": 0.15183347296513502, "grad_norm": 0.547700177660738, "learning_rate": 9.4249910490512e-06, "loss": 0.0022, "step": 2356 }, { "epoch": 0.15189791841206418, "grad_norm": 0.0032957801919053283, "learning_rate": 9.424274973147154e-06, "loss": 0.0, "step": 2357 }, { "epoch": 0.15196236385899337, "grad_norm": 0.0027416642189764164, "learning_rate": 9.423558897243108e-06, "loss": 0.0, "step": 2358 }, { "epoch": 0.15202680930592255, "grad_norm": 0.001389136393539497, "learning_rate": 9.422842821339064e-06, "loss": 0.0, "step": 2359 }, { "epoch": 0.1520912547528517, "grad_norm": 0.0016453047623652915, "learning_rate": 9.422126745435017e-06, "loss": 0.0, "step": 2360 }, { "epoch": 0.1521557001997809, "grad_norm": 0.003564651534562986, "learning_rate": 9.42141066953097e-06, "loss": 0.0, "step": 2361 }, { "epoch": 0.15222014564671005, "grad_norm": 0.001581220919916646, "learning_rate": 9.420694593626925e-06, "loss": 0.0, "step": 2362 }, { "epoch": 0.15228459109363923, "grad_norm": 1.166109780962107, "learning_rate": 9.41997851772288e-06, "loss": 0.0068, "step": 2363 }, { "epoch": 0.15234903654056842, "grad_norm": 0.001095153365991885, "learning_rate": 9.419262441818834e-06, "loss": 0.0, "step": 2364 }, { "epoch": 0.15241348198749757, "grad_norm": 0.00018614029556956357, "learning_rate": 9.418546365914788e-06, "loss": 0.0, "step": 2365 }, { "epoch": 0.15247792743442676, "grad_norm": 0.1916720432757476, "learning_rate": 9.417830290010742e-06, "loss": 0.0003, "step": 2366 }, { "epoch": 0.15254237288135594, "grad_norm": 0.17782862752708178, "learning_rate": 9.417114214106697e-06, "loss": 0.0004, "step": 2367 }, { "epoch": 0.1526068183282851, "grad_norm": 0.01575983901171653, "learning_rate": 9.416398138202651e-06, "loss": 0.0, "step": 2368 }, { "epoch": 0.15267126377521428, "grad_norm": 0.0023644075189573855, "learning_rate": 9.415682062298605e-06, "loss": 0.0, "step": 2369 }, { "epoch": 0.15273570922214347, "grad_norm": 0.03693351516886743, "learning_rate": 9.414965986394558e-06, "loss": 0.0001, "step": 2370 }, { "epoch": 0.15280015466907262, "grad_norm": 0.0012187265154810075, "learning_rate": 9.414249910490512e-06, "loss": 0.0, "step": 2371 }, { "epoch": 0.1528646001160018, "grad_norm": 0.024398932537146707, "learning_rate": 9.413533834586466e-06, "loss": 0.0001, "step": 2372 }, { "epoch": 0.15292904556293097, "grad_norm": 0.038167990425758816, "learning_rate": 9.41281775868242e-06, "loss": 0.0, "step": 2373 }, { "epoch": 0.15299349100986015, "grad_norm": 0.0026435490808041713, "learning_rate": 9.412101682778375e-06, "loss": 0.0, "step": 2374 }, { "epoch": 0.15305793645678933, "grad_norm": 0.06960011244006364, "learning_rate": 9.41138560687433e-06, "loss": 0.0001, "step": 2375 }, { "epoch": 0.1531223819037185, "grad_norm": 0.0007201561743522177, "learning_rate": 9.410669530970284e-06, "loss": 0.0, "step": 2376 }, { "epoch": 0.15318682735064768, "grad_norm": 0.003125982065041039, "learning_rate": 9.409953455066238e-06, "loss": 0.0, "step": 2377 }, { "epoch": 0.15325127279757686, "grad_norm": 0.0042100056123061, "learning_rate": 9.409237379162192e-06, "loss": 0.0, "step": 2378 }, { "epoch": 0.15331571824450602, "grad_norm": 0.004587416136859462, "learning_rate": 9.408521303258147e-06, "loss": 0.0, "step": 2379 }, { "epoch": 0.1533801636914352, "grad_norm": 0.04717204972693149, "learning_rate": 9.407805227354099e-06, "loss": 0.0, "step": 2380 }, { "epoch": 0.15344460913836439, "grad_norm": 0.04171196733747788, "learning_rate": 9.407089151450053e-06, "loss": 0.0001, "step": 2381 }, { "epoch": 0.15350905458529354, "grad_norm": 0.17253016101069935, "learning_rate": 9.406373075546008e-06, "loss": 0.0005, "step": 2382 }, { "epoch": 0.15357350003222273, "grad_norm": 0.03761734075604174, "learning_rate": 9.405656999641964e-06, "loss": 0.0, "step": 2383 }, { "epoch": 0.1536379454791519, "grad_norm": 0.011965065388645795, "learning_rate": 9.404940923737918e-06, "loss": 0.0, "step": 2384 }, { "epoch": 0.15370239092608107, "grad_norm": 0.04018939250144867, "learning_rate": 9.404224847833872e-06, "loss": 0.0, "step": 2385 }, { "epoch": 0.15376683637301025, "grad_norm": 0.3395482345981454, "learning_rate": 9.403508771929825e-06, "loss": 0.0008, "step": 2386 }, { "epoch": 0.1538312818199394, "grad_norm": 0.03146444998541529, "learning_rate": 9.40279269602578e-06, "loss": 0.0, "step": 2387 }, { "epoch": 0.1538957272668686, "grad_norm": 0.01010570402105599, "learning_rate": 9.402076620121734e-06, "loss": 0.0, "step": 2388 }, { "epoch": 0.15396017271379778, "grad_norm": 0.006316466860131675, "learning_rate": 9.401360544217688e-06, "loss": 0.0, "step": 2389 }, { "epoch": 0.15402461816072693, "grad_norm": 0.004210654737544568, "learning_rate": 9.400644468313642e-06, "loss": 0.0, "step": 2390 }, { "epoch": 0.15408906360765612, "grad_norm": 0.1352283370985708, "learning_rate": 9.399928392409596e-06, "loss": 0.0004, "step": 2391 }, { "epoch": 0.1541535090545853, "grad_norm": 0.02761470179688728, "learning_rate": 9.39921231650555e-06, "loss": 0.0001, "step": 2392 }, { "epoch": 0.15421795450151446, "grad_norm": 0.0005356983755468877, "learning_rate": 9.398496240601505e-06, "loss": 0.0, "step": 2393 }, { "epoch": 0.15428239994844364, "grad_norm": 0.11308521952425876, "learning_rate": 9.39778016469746e-06, "loss": 0.0004, "step": 2394 }, { "epoch": 0.15434684539537283, "grad_norm": 0.0003210339035201644, "learning_rate": 9.397064088793414e-06, "loss": 0.0, "step": 2395 }, { "epoch": 0.15441129084230198, "grad_norm": 0.36937508378132805, "learning_rate": 9.396348012889366e-06, "loss": 0.0027, "step": 2396 }, { "epoch": 0.15447573628923117, "grad_norm": 0.03054608444162845, "learning_rate": 9.39563193698532e-06, "loss": 0.0001, "step": 2397 }, { "epoch": 0.15454018173616035, "grad_norm": 0.49067405346123016, "learning_rate": 9.394915861081275e-06, "loss": 0.0011, "step": 2398 }, { "epoch": 0.1546046271830895, "grad_norm": 0.0164183152203303, "learning_rate": 9.394199785177229e-06, "loss": 0.0001, "step": 2399 }, { "epoch": 0.1546690726300187, "grad_norm": 0.0048164873590324534, "learning_rate": 9.393483709273183e-06, "loss": 0.0, "step": 2400 }, { "epoch": 0.15473351807694785, "grad_norm": 0.003437130484469497, "learning_rate": 9.392767633369138e-06, "loss": 0.0, "step": 2401 }, { "epoch": 0.15479796352387704, "grad_norm": 1.1498831648148473, "learning_rate": 9.392051557465092e-06, "loss": 0.0075, "step": 2402 }, { "epoch": 0.15486240897080622, "grad_norm": 0.5590278432647267, "learning_rate": 9.391335481561046e-06, "loss": 0.0039, "step": 2403 }, { "epoch": 0.15492685441773538, "grad_norm": 0.006440467338636328, "learning_rate": 9.390619405657e-06, "loss": 0.0, "step": 2404 }, { "epoch": 0.15499129986466456, "grad_norm": 0.005089887857211715, "learning_rate": 9.389903329752955e-06, "loss": 0.0, "step": 2405 }, { "epoch": 0.15505574531159375, "grad_norm": 0.03226839406679151, "learning_rate": 9.38918725384891e-06, "loss": 0.0002, "step": 2406 }, { "epoch": 0.1551201907585229, "grad_norm": 0.005760741049848393, "learning_rate": 9.388471177944863e-06, "loss": 0.0, "step": 2407 }, { "epoch": 0.1551846362054521, "grad_norm": 0.002409811586602318, "learning_rate": 9.387755102040818e-06, "loss": 0.0, "step": 2408 }, { "epoch": 0.15524908165238127, "grad_norm": 0.0002274626718670526, "learning_rate": 9.387039026136772e-06, "loss": 0.0, "step": 2409 }, { "epoch": 0.15531352709931043, "grad_norm": 0.009568083954336773, "learning_rate": 9.386322950232726e-06, "loss": 0.0, "step": 2410 }, { "epoch": 0.1553779725462396, "grad_norm": 0.0010595213282637791, "learning_rate": 9.38560687432868e-06, "loss": 0.0, "step": 2411 }, { "epoch": 0.15544241799316877, "grad_norm": 0.0015494971480794625, "learning_rate": 9.384890798424633e-06, "loss": 0.0, "step": 2412 }, { "epoch": 0.15550686344009795, "grad_norm": 0.027000724745569585, "learning_rate": 9.384174722520588e-06, "loss": 0.0001, "step": 2413 }, { "epoch": 0.15557130888702714, "grad_norm": 0.00021975748346756332, "learning_rate": 9.383458646616542e-06, "loss": 0.0, "step": 2414 }, { "epoch": 0.1556357543339563, "grad_norm": 0.22442388753739564, "learning_rate": 9.382742570712496e-06, "loss": 0.001, "step": 2415 }, { "epoch": 0.15570019978088548, "grad_norm": 0.058095847062474275, "learning_rate": 9.38202649480845e-06, "loss": 0.0002, "step": 2416 }, { "epoch": 0.15576464522781466, "grad_norm": 0.004947700956219011, "learning_rate": 9.381310418904405e-06, "loss": 0.0, "step": 2417 }, { "epoch": 0.15582909067474382, "grad_norm": 0.013964975233892866, "learning_rate": 9.380594343000359e-06, "loss": 0.0, "step": 2418 }, { "epoch": 0.155893536121673, "grad_norm": 0.00045088995588918313, "learning_rate": 9.379878267096313e-06, "loss": 0.0, "step": 2419 }, { "epoch": 0.1559579815686022, "grad_norm": 0.009997687306932127, "learning_rate": 9.379162191192268e-06, "loss": 0.0, "step": 2420 }, { "epoch": 0.15602242701553135, "grad_norm": 0.013743444217281438, "learning_rate": 9.378446115288222e-06, "loss": 0.0001, "step": 2421 }, { "epoch": 0.15608687246246053, "grad_norm": 0.0011079386020701972, "learning_rate": 9.377730039384175e-06, "loss": 0.0, "step": 2422 }, { "epoch": 0.15615131790938971, "grad_norm": 0.0005776427554553396, "learning_rate": 9.377013963480129e-06, "loss": 0.0, "step": 2423 }, { "epoch": 0.15621576335631887, "grad_norm": 0.02744104327440318, "learning_rate": 9.376297887576083e-06, "loss": 0.0001, "step": 2424 }, { "epoch": 0.15628020880324806, "grad_norm": 0.0002999203618423623, "learning_rate": 9.375581811672037e-06, "loss": 0.0, "step": 2425 }, { "epoch": 0.1563446542501772, "grad_norm": 0.0001923817727009459, "learning_rate": 9.374865735767992e-06, "loss": 0.0, "step": 2426 }, { "epoch": 0.1564090996971064, "grad_norm": 0.15707550729938025, "learning_rate": 9.374149659863946e-06, "loss": 0.0003, "step": 2427 }, { "epoch": 0.15647354514403558, "grad_norm": 0.0005202558905786767, "learning_rate": 9.3734335839599e-06, "loss": 0.0, "step": 2428 }, { "epoch": 0.15653799059096474, "grad_norm": 0.1341350078461987, "learning_rate": 9.372717508055855e-06, "loss": 0.0005, "step": 2429 }, { "epoch": 0.15660243603789392, "grad_norm": 0.03295119511432381, "learning_rate": 9.372001432151809e-06, "loss": 0.0001, "step": 2430 }, { "epoch": 0.1566668814848231, "grad_norm": 0.008991741543612479, "learning_rate": 9.371285356247763e-06, "loss": 0.0001, "step": 2431 }, { "epoch": 0.15673132693175226, "grad_norm": 0.005882962626867871, "learning_rate": 9.370569280343718e-06, "loss": 0.0, "step": 2432 }, { "epoch": 0.15679577237868145, "grad_norm": 0.14532828296553948, "learning_rate": 9.369853204439672e-06, "loss": 0.0001, "step": 2433 }, { "epoch": 0.15686021782561063, "grad_norm": 0.001661131622482854, "learning_rate": 9.369137128535626e-06, "loss": 0.0, "step": 2434 }, { "epoch": 0.1569246632725398, "grad_norm": 0.0013453870636462811, "learning_rate": 9.36842105263158e-06, "loss": 0.0, "step": 2435 }, { "epoch": 0.15698910871946897, "grad_norm": 0.0007773121412426143, "learning_rate": 9.367704976727535e-06, "loss": 0.0, "step": 2436 }, { "epoch": 0.15705355416639816, "grad_norm": 0.002945040172602348, "learning_rate": 9.366988900823489e-06, "loss": 0.0, "step": 2437 }, { "epoch": 0.1571179996133273, "grad_norm": 0.007691798343803482, "learning_rate": 9.366272824919442e-06, "loss": 0.0, "step": 2438 }, { "epoch": 0.1571824450602565, "grad_norm": 0.001754172241999642, "learning_rate": 9.365556749015396e-06, "loss": 0.0, "step": 2439 }, { "epoch": 0.15724689050718565, "grad_norm": 0.039875313526018905, "learning_rate": 9.36484067311135e-06, "loss": 0.0004, "step": 2440 }, { "epoch": 0.15731133595411484, "grad_norm": 0.15183961423677111, "learning_rate": 9.364124597207305e-06, "loss": 0.0005, "step": 2441 }, { "epoch": 0.15737578140104402, "grad_norm": 0.32147288064350765, "learning_rate": 9.363408521303259e-06, "loss": 0.0019, "step": 2442 }, { "epoch": 0.15744022684797318, "grad_norm": 0.015395113417899842, "learning_rate": 9.362692445399213e-06, "loss": 0.0, "step": 2443 }, { "epoch": 0.15750467229490236, "grad_norm": 0.007149663212477922, "learning_rate": 9.361976369495167e-06, "loss": 0.0, "step": 2444 }, { "epoch": 0.15756911774183155, "grad_norm": 0.006734754031812976, "learning_rate": 9.361260293591122e-06, "loss": 0.0, "step": 2445 }, { "epoch": 0.1576335631887607, "grad_norm": 0.0014427696968621231, "learning_rate": 9.360544217687076e-06, "loss": 0.0, "step": 2446 }, { "epoch": 0.1576980086356899, "grad_norm": 0.0067118994039908, "learning_rate": 9.359828141783029e-06, "loss": 0.0, "step": 2447 }, { "epoch": 0.15776245408261907, "grad_norm": 0.003678528787548895, "learning_rate": 9.359112065878983e-06, "loss": 0.0, "step": 2448 }, { "epoch": 0.15782689952954823, "grad_norm": 0.0029926002214066875, "learning_rate": 9.358395989974937e-06, "loss": 0.0, "step": 2449 }, { "epoch": 0.15789134497647742, "grad_norm": 0.0005757762038000473, "learning_rate": 9.357679914070892e-06, "loss": 0.0, "step": 2450 }, { "epoch": 0.1579557904234066, "grad_norm": 0.16343268918931092, "learning_rate": 9.356963838166846e-06, "loss": 0.0018, "step": 2451 }, { "epoch": 0.15802023587033576, "grad_norm": 0.011417497251374545, "learning_rate": 9.3562477622628e-06, "loss": 0.0001, "step": 2452 }, { "epoch": 0.15808468131726494, "grad_norm": 0.00034124654959428265, "learning_rate": 9.355531686358756e-06, "loss": 0.0, "step": 2453 }, { "epoch": 0.1581491267641941, "grad_norm": 0.009350126488007813, "learning_rate": 9.354815610454709e-06, "loss": 0.0, "step": 2454 }, { "epoch": 0.15821357221112328, "grad_norm": 0.012536311772587556, "learning_rate": 9.354099534550663e-06, "loss": 0.0, "step": 2455 }, { "epoch": 0.15827801765805247, "grad_norm": 0.0006414442846759456, "learning_rate": 9.353383458646617e-06, "loss": 0.0, "step": 2456 }, { "epoch": 0.15834246310498162, "grad_norm": 0.0014955118526876147, "learning_rate": 9.352667382742572e-06, "loss": 0.0, "step": 2457 }, { "epoch": 0.1584069085519108, "grad_norm": 0.012644152882045408, "learning_rate": 9.351951306838526e-06, "loss": 0.0, "step": 2458 }, { "epoch": 0.15847135399884, "grad_norm": 0.0022283322330553974, "learning_rate": 9.35123523093448e-06, "loss": 0.0, "step": 2459 }, { "epoch": 0.15853579944576915, "grad_norm": 0.00216890313118297, "learning_rate": 9.350519155030435e-06, "loss": 0.0, "step": 2460 }, { "epoch": 0.15860024489269833, "grad_norm": 4.773645306590904e-05, "learning_rate": 9.349803079126389e-06, "loss": 0.0, "step": 2461 }, { "epoch": 0.15866469033962752, "grad_norm": 0.0026673034609326894, "learning_rate": 9.349087003222343e-06, "loss": 0.0, "step": 2462 }, { "epoch": 0.15872913578655667, "grad_norm": 0.00012853098665182515, "learning_rate": 9.348370927318296e-06, "loss": 0.0, "step": 2463 }, { "epoch": 0.15879358123348586, "grad_norm": 0.023990334793908007, "learning_rate": 9.34765485141425e-06, "loss": 0.0002, "step": 2464 }, { "epoch": 0.15885802668041502, "grad_norm": 0.0015653462913017993, "learning_rate": 9.346938775510204e-06, "loss": 0.0, "step": 2465 }, { "epoch": 0.1589224721273442, "grad_norm": 0.0017677698246296816, "learning_rate": 9.346222699606159e-06, "loss": 0.0, "step": 2466 }, { "epoch": 0.15898691757427338, "grad_norm": 0.19936779468096666, "learning_rate": 9.345506623702113e-06, "loss": 0.0053, "step": 2467 }, { "epoch": 0.15905136302120254, "grad_norm": 0.010805132866747108, "learning_rate": 9.344790547798067e-06, "loss": 0.0001, "step": 2468 }, { "epoch": 0.15911580846813173, "grad_norm": 0.014871396083823452, "learning_rate": 9.344074471894022e-06, "loss": 0.0002, "step": 2469 }, { "epoch": 0.1591802539150609, "grad_norm": 0.003370385419454828, "learning_rate": 9.343358395989976e-06, "loss": 0.0, "step": 2470 }, { "epoch": 0.15924469936199007, "grad_norm": 0.0023136418019346424, "learning_rate": 9.34264232008593e-06, "loss": 0.0, "step": 2471 }, { "epoch": 0.15930914480891925, "grad_norm": 0.006850854848157116, "learning_rate": 9.341926244181884e-06, "loss": 0.0001, "step": 2472 }, { "epoch": 0.15937359025584844, "grad_norm": 0.011098353460658784, "learning_rate": 9.341210168277837e-06, "loss": 0.0, "step": 2473 }, { "epoch": 0.1594380357027776, "grad_norm": 0.0040408311809586265, "learning_rate": 9.340494092373791e-06, "loss": 0.0, "step": 2474 }, { "epoch": 0.15950248114970678, "grad_norm": 0.0017714230238855028, "learning_rate": 9.339778016469746e-06, "loss": 0.0, "step": 2475 }, { "epoch": 0.15956692659663596, "grad_norm": 0.0035697117594498335, "learning_rate": 9.339061940565702e-06, "loss": 0.0, "step": 2476 }, { "epoch": 0.15963137204356512, "grad_norm": 0.0014668561974660174, "learning_rate": 9.338345864661656e-06, "loss": 0.0, "step": 2477 }, { "epoch": 0.1596958174904943, "grad_norm": 1.4915300288922788, "learning_rate": 9.33762978875761e-06, "loss": 0.008, "step": 2478 }, { "epoch": 0.15976026293742346, "grad_norm": 0.0040878630786619, "learning_rate": 9.336913712853563e-06, "loss": 0.0, "step": 2479 }, { "epoch": 0.15982470838435264, "grad_norm": 0.0013943551804445802, "learning_rate": 9.336197636949517e-06, "loss": 0.0, "step": 2480 }, { "epoch": 0.15988915383128183, "grad_norm": 0.006055781747204741, "learning_rate": 9.335481561045471e-06, "loss": 0.0, "step": 2481 }, { "epoch": 0.15995359927821098, "grad_norm": 0.02593562146286775, "learning_rate": 9.334765485141426e-06, "loss": 0.0001, "step": 2482 }, { "epoch": 0.16001804472514017, "grad_norm": 0.00026559044513447465, "learning_rate": 9.33404940923738e-06, "loss": 0.0, "step": 2483 }, { "epoch": 0.16008249017206935, "grad_norm": 0.0022454719980279157, "learning_rate": 9.333333333333334e-06, "loss": 0.0, "step": 2484 }, { "epoch": 0.1601469356189985, "grad_norm": 0.019448823897386905, "learning_rate": 9.332617257429289e-06, "loss": 0.0001, "step": 2485 }, { "epoch": 0.1602113810659277, "grad_norm": 0.002218050878402869, "learning_rate": 9.331901181525243e-06, "loss": 0.0, "step": 2486 }, { "epoch": 0.16027582651285688, "grad_norm": 0.3381958283192187, "learning_rate": 9.331185105621197e-06, "loss": 0.0011, "step": 2487 }, { "epoch": 0.16034027195978603, "grad_norm": 0.00022144595144902289, "learning_rate": 9.330469029717152e-06, "loss": 0.0, "step": 2488 }, { "epoch": 0.16040471740671522, "grad_norm": 0.016463564310275253, "learning_rate": 9.329752953813104e-06, "loss": 0.0001, "step": 2489 }, { "epoch": 0.1604691628536444, "grad_norm": 0.0029835897225247788, "learning_rate": 9.329036877909058e-06, "loss": 0.0, "step": 2490 }, { "epoch": 0.16053360830057356, "grad_norm": 0.18282084405229299, "learning_rate": 9.328320802005013e-06, "loss": 0.0017, "step": 2491 }, { "epoch": 0.16059805374750274, "grad_norm": 0.011583676702772308, "learning_rate": 9.327604726100967e-06, "loss": 0.0001, "step": 2492 }, { "epoch": 0.1606624991944319, "grad_norm": 0.0034710967822639046, "learning_rate": 9.326888650196921e-06, "loss": 0.0, "step": 2493 }, { "epoch": 0.16072694464136109, "grad_norm": 0.0011947749959884256, "learning_rate": 9.326172574292876e-06, "loss": 0.0, "step": 2494 }, { "epoch": 0.16079139008829027, "grad_norm": 0.3944731943453859, "learning_rate": 9.32545649838883e-06, "loss": 0.0024, "step": 2495 }, { "epoch": 0.16085583553521943, "grad_norm": 0.0018753792654615559, "learning_rate": 9.324740422484784e-06, "loss": 0.0, "step": 2496 }, { "epoch": 0.1609202809821486, "grad_norm": 0.00033919158385829816, "learning_rate": 9.324024346580739e-06, "loss": 0.0, "step": 2497 }, { "epoch": 0.1609847264290778, "grad_norm": 0.008896867719477038, "learning_rate": 9.323308270676693e-06, "loss": 0.0001, "step": 2498 }, { "epoch": 0.16104917187600695, "grad_norm": 0.0010966606224628967, "learning_rate": 9.322592194772647e-06, "loss": 0.0, "step": 2499 }, { "epoch": 0.16111361732293614, "grad_norm": 6.286197510243029e-05, "learning_rate": 9.321876118868601e-06, "loss": 0.0, "step": 2500 }, { "epoch": 0.16117806276986532, "grad_norm": 0.012560768212650635, "learning_rate": 9.321160042964556e-06, "loss": 0.0001, "step": 2501 }, { "epoch": 0.16124250821679448, "grad_norm": 0.01912257629322411, "learning_rate": 9.32044396706051e-06, "loss": 0.0002, "step": 2502 }, { "epoch": 0.16130695366372366, "grad_norm": 0.016693878853747644, "learning_rate": 9.319727891156464e-06, "loss": 0.0001, "step": 2503 }, { "epoch": 0.16137139911065282, "grad_norm": 0.07219035829834394, "learning_rate": 9.319011815252419e-06, "loss": 0.0002, "step": 2504 }, { "epoch": 0.161435844557582, "grad_norm": 0.0012386043214729372, "learning_rate": 9.318295739348371e-06, "loss": 0.0, "step": 2505 }, { "epoch": 0.1615002900045112, "grad_norm": 0.016528571301615474, "learning_rate": 9.317579663444326e-06, "loss": 0.0001, "step": 2506 }, { "epoch": 0.16156473545144034, "grad_norm": 0.001469249498037701, "learning_rate": 9.31686358754028e-06, "loss": 0.0, "step": 2507 }, { "epoch": 0.16162918089836953, "grad_norm": 0.034161891732316735, "learning_rate": 9.316147511636234e-06, "loss": 0.0001, "step": 2508 }, { "epoch": 0.1616936263452987, "grad_norm": 0.4474646446177699, "learning_rate": 9.315431435732188e-06, "loss": 0.0014, "step": 2509 }, { "epoch": 0.16175807179222787, "grad_norm": 0.00017682347512700295, "learning_rate": 9.314715359828143e-06, "loss": 0.0, "step": 2510 }, { "epoch": 0.16182251723915705, "grad_norm": 0.0003216794260017332, "learning_rate": 9.313999283924097e-06, "loss": 0.0, "step": 2511 }, { "epoch": 0.16188696268608624, "grad_norm": 0.14043387697255008, "learning_rate": 9.313283208020051e-06, "loss": 0.0005, "step": 2512 }, { "epoch": 0.1619514081330154, "grad_norm": 0.005593976827827833, "learning_rate": 9.312567132116006e-06, "loss": 0.0, "step": 2513 }, { "epoch": 0.16201585357994458, "grad_norm": 0.00858530863042198, "learning_rate": 9.31185105621196e-06, "loss": 0.0, "step": 2514 }, { "epoch": 0.16208029902687376, "grad_norm": 0.21605553042851372, "learning_rate": 9.311134980307913e-06, "loss": 0.0015, "step": 2515 }, { "epoch": 0.16214474447380292, "grad_norm": 0.5249323210916667, "learning_rate": 9.310418904403867e-06, "loss": 0.0019, "step": 2516 }, { "epoch": 0.1622091899207321, "grad_norm": 0.0031300865219468605, "learning_rate": 9.309702828499821e-06, "loss": 0.0, "step": 2517 }, { "epoch": 0.16227363536766126, "grad_norm": 0.016066443398272942, "learning_rate": 9.308986752595775e-06, "loss": 0.0002, "step": 2518 }, { "epoch": 0.16233808081459045, "grad_norm": 0.011800571617004192, "learning_rate": 9.30827067669173e-06, "loss": 0.0001, "step": 2519 }, { "epoch": 0.16240252626151963, "grad_norm": 0.006443131808993591, "learning_rate": 9.307554600787684e-06, "loss": 0.0, "step": 2520 }, { "epoch": 0.1624669717084488, "grad_norm": 0.0002683456713499837, "learning_rate": 9.306838524883638e-06, "loss": 0.0, "step": 2521 }, { "epoch": 0.16253141715537797, "grad_norm": 0.012876411028502148, "learning_rate": 9.306122448979593e-06, "loss": 0.0, "step": 2522 }, { "epoch": 0.16259586260230716, "grad_norm": 0.052757710574246656, "learning_rate": 9.305406373075547e-06, "loss": 0.0, "step": 2523 }, { "epoch": 0.1626603080492363, "grad_norm": 0.001491306592329016, "learning_rate": 9.304690297171501e-06, "loss": 0.0, "step": 2524 }, { "epoch": 0.1627247534961655, "grad_norm": 0.03234166304160187, "learning_rate": 9.303974221267455e-06, "loss": 0.0002, "step": 2525 }, { "epoch": 0.16278919894309468, "grad_norm": 0.003989245770081959, "learning_rate": 9.30325814536341e-06, "loss": 0.0, "step": 2526 }, { "epoch": 0.16285364439002384, "grad_norm": 0.03451099249804787, "learning_rate": 9.302542069459364e-06, "loss": 0.0004, "step": 2527 }, { "epoch": 0.16291808983695302, "grad_norm": 0.0022142978663875178, "learning_rate": 9.301825993555318e-06, "loss": 0.0, "step": 2528 }, { "epoch": 0.1629825352838822, "grad_norm": 0.0008906830224453514, "learning_rate": 9.301109917651273e-06, "loss": 0.0, "step": 2529 }, { "epoch": 0.16304698073081136, "grad_norm": 0.40542301956458016, "learning_rate": 9.300393841747227e-06, "loss": 0.0016, "step": 2530 }, { "epoch": 0.16311142617774055, "grad_norm": 0.0012230303771541364, "learning_rate": 9.29967776584318e-06, "loss": 0.0, "step": 2531 }, { "epoch": 0.1631758716246697, "grad_norm": 0.056274097549309655, "learning_rate": 9.298961689939134e-06, "loss": 0.0, "step": 2532 }, { "epoch": 0.1632403170715989, "grad_norm": 0.001701698073510463, "learning_rate": 9.298245614035088e-06, "loss": 0.0, "step": 2533 }, { "epoch": 0.16330476251852807, "grad_norm": 0.004243688258801596, "learning_rate": 9.297529538131042e-06, "loss": 0.0, "step": 2534 }, { "epoch": 0.16336920796545723, "grad_norm": 0.0004493868624061053, "learning_rate": 9.296813462226997e-06, "loss": 0.0, "step": 2535 }, { "epoch": 0.16343365341238641, "grad_norm": 0.15120959538283224, "learning_rate": 9.296097386322951e-06, "loss": 0.0004, "step": 2536 }, { "epoch": 0.1634980988593156, "grad_norm": 0.012329401352514077, "learning_rate": 9.295381310418905e-06, "loss": 0.0001, "step": 2537 }, { "epoch": 0.16356254430624476, "grad_norm": 0.006242194356227512, "learning_rate": 9.29466523451486e-06, "loss": 0.0, "step": 2538 }, { "epoch": 0.16362698975317394, "grad_norm": 0.0004984472123917352, "learning_rate": 9.293949158610814e-06, "loss": 0.0, "step": 2539 }, { "epoch": 0.16369143520010312, "grad_norm": 0.005776456899099466, "learning_rate": 9.293233082706767e-06, "loss": 0.0, "step": 2540 }, { "epoch": 0.16375588064703228, "grad_norm": 0.0032904222645318234, "learning_rate": 9.292517006802721e-06, "loss": 0.0, "step": 2541 }, { "epoch": 0.16382032609396147, "grad_norm": 0.005594085108149277, "learning_rate": 9.291800930898675e-06, "loss": 0.0, "step": 2542 }, { "epoch": 0.16388477154089062, "grad_norm": 0.7958945806678982, "learning_rate": 9.29108485499463e-06, "loss": 0.0036, "step": 2543 }, { "epoch": 0.1639492169878198, "grad_norm": 0.007064345315062998, "learning_rate": 9.290368779090584e-06, "loss": 0.0, "step": 2544 }, { "epoch": 0.164013662434749, "grad_norm": 0.020122688256661223, "learning_rate": 9.289652703186538e-06, "loss": 0.0002, "step": 2545 }, { "epoch": 0.16407810788167815, "grad_norm": 0.024931361317891115, "learning_rate": 9.288936627282494e-06, "loss": 0.0, "step": 2546 }, { "epoch": 0.16414255332860733, "grad_norm": 0.2923833720903259, "learning_rate": 9.288220551378447e-06, "loss": 0.0008, "step": 2547 }, { "epoch": 0.16420699877553652, "grad_norm": 0.002371580849569522, "learning_rate": 9.287504475474401e-06, "loss": 0.0, "step": 2548 }, { "epoch": 0.16427144422246567, "grad_norm": 0.043302525215061886, "learning_rate": 9.286788399570355e-06, "loss": 0.0001, "step": 2549 }, { "epoch": 0.16433588966939486, "grad_norm": 0.002335608467180129, "learning_rate": 9.28607232366631e-06, "loss": 0.0, "step": 2550 }, { "epoch": 0.16440033511632404, "grad_norm": 0.06442228576493117, "learning_rate": 9.285356247762264e-06, "loss": 0.0006, "step": 2551 }, { "epoch": 0.1644647805632532, "grad_norm": 0.0255342682065745, "learning_rate": 9.284640171858218e-06, "loss": 0.0002, "step": 2552 }, { "epoch": 0.16452922601018238, "grad_norm": 0.023540083579128342, "learning_rate": 9.283924095954172e-06, "loss": 0.0001, "step": 2553 }, { "epoch": 0.16459367145711157, "grad_norm": 0.0017502331113364701, "learning_rate": 9.283208020050127e-06, "loss": 0.0, "step": 2554 }, { "epoch": 0.16465811690404072, "grad_norm": 0.0006908666154918732, "learning_rate": 9.282491944146081e-06, "loss": 0.0, "step": 2555 }, { "epoch": 0.1647225623509699, "grad_norm": 0.00048313697026926197, "learning_rate": 9.281775868242034e-06, "loss": 0.0, "step": 2556 }, { "epoch": 0.16478700779789907, "grad_norm": 0.0008612412057671483, "learning_rate": 9.281059792337988e-06, "loss": 0.0, "step": 2557 }, { "epoch": 0.16485145324482825, "grad_norm": 0.6262135406738908, "learning_rate": 9.280343716433942e-06, "loss": 0.0034, "step": 2558 }, { "epoch": 0.16491589869175743, "grad_norm": 0.0006356426223387895, "learning_rate": 9.279627640529897e-06, "loss": 0.0, "step": 2559 }, { "epoch": 0.1649803441386866, "grad_norm": 0.001305826023262251, "learning_rate": 9.278911564625851e-06, "loss": 0.0, "step": 2560 }, { "epoch": 0.16504478958561578, "grad_norm": 0.2166476208464855, "learning_rate": 9.278195488721805e-06, "loss": 0.0007, "step": 2561 }, { "epoch": 0.16510923503254496, "grad_norm": 0.0017773730090488518, "learning_rate": 9.27747941281776e-06, "loss": 0.0, "step": 2562 }, { "epoch": 0.16517368047947412, "grad_norm": 0.01864715096330182, "learning_rate": 9.276763336913714e-06, "loss": 0.0, "step": 2563 }, { "epoch": 0.1652381259264033, "grad_norm": 0.0009938659133838495, "learning_rate": 9.276047261009668e-06, "loss": 0.0, "step": 2564 }, { "epoch": 0.16530257137333249, "grad_norm": 0.1604910932287349, "learning_rate": 9.275331185105622e-06, "loss": 0.0001, "step": 2565 }, { "epoch": 0.16536701682026164, "grad_norm": 0.013365406361419188, "learning_rate": 9.274615109201575e-06, "loss": 0.0, "step": 2566 }, { "epoch": 0.16543146226719083, "grad_norm": 0.0015641780087289204, "learning_rate": 9.27389903329753e-06, "loss": 0.0, "step": 2567 }, { "epoch": 0.16549590771412, "grad_norm": 0.0015147951019427674, "learning_rate": 9.273182957393484e-06, "loss": 0.0, "step": 2568 }, { "epoch": 0.16556035316104917, "grad_norm": 0.003322727915939341, "learning_rate": 9.27246688148944e-06, "loss": 0.0, "step": 2569 }, { "epoch": 0.16562479860797835, "grad_norm": 0.3481242640776877, "learning_rate": 9.271750805585394e-06, "loss": 0.0082, "step": 2570 }, { "epoch": 0.1656892440549075, "grad_norm": 0.05511486226657862, "learning_rate": 9.271034729681348e-06, "loss": 0.0003, "step": 2571 }, { "epoch": 0.1657536895018367, "grad_norm": 0.0013772392389835638, "learning_rate": 9.2703186537773e-06, "loss": 0.0, "step": 2572 }, { "epoch": 0.16581813494876588, "grad_norm": 0.0028322900559294185, "learning_rate": 9.269602577873255e-06, "loss": 0.0, "step": 2573 }, { "epoch": 0.16588258039569503, "grad_norm": 0.0008771691379030808, "learning_rate": 9.26888650196921e-06, "loss": 0.0, "step": 2574 }, { "epoch": 0.16594702584262422, "grad_norm": 0.001837532065417171, "learning_rate": 9.268170426065164e-06, "loss": 0.0, "step": 2575 }, { "epoch": 0.1660114712895534, "grad_norm": 0.011364499836188676, "learning_rate": 9.267454350161118e-06, "loss": 0.0, "step": 2576 }, { "epoch": 0.16607591673648256, "grad_norm": 0.43601692392264785, "learning_rate": 9.266738274257072e-06, "loss": 0.0017, "step": 2577 }, { "epoch": 0.16614036218341174, "grad_norm": 0.2836992981645201, "learning_rate": 9.266022198353027e-06, "loss": 0.0005, "step": 2578 }, { "epoch": 0.16620480763034093, "grad_norm": 0.006381917270187372, "learning_rate": 9.26530612244898e-06, "loss": 0.0, "step": 2579 }, { "epoch": 0.16626925307727008, "grad_norm": 0.0995379968224912, "learning_rate": 9.264590046544935e-06, "loss": 0.0001, "step": 2580 }, { "epoch": 0.16633369852419927, "grad_norm": 0.005838220002381634, "learning_rate": 9.26387397064089e-06, "loss": 0.0, "step": 2581 }, { "epoch": 0.16639814397112843, "grad_norm": 0.1734023075655848, "learning_rate": 9.263157894736842e-06, "loss": 0.0015, "step": 2582 }, { "epoch": 0.1664625894180576, "grad_norm": 0.32543608770325944, "learning_rate": 9.262441818832796e-06, "loss": 0.001, "step": 2583 }, { "epoch": 0.1665270348649868, "grad_norm": 0.035015456749274816, "learning_rate": 9.26172574292875e-06, "loss": 0.002, "step": 2584 }, { "epoch": 0.16659148031191595, "grad_norm": 0.00900476722572773, "learning_rate": 9.261009667024705e-06, "loss": 0.0, "step": 2585 }, { "epoch": 0.16665592575884514, "grad_norm": 0.0008566073306815689, "learning_rate": 9.26029359112066e-06, "loss": 0.0, "step": 2586 }, { "epoch": 0.16672037120577432, "grad_norm": 0.4923978083746918, "learning_rate": 9.259577515216614e-06, "loss": 0.0022, "step": 2587 }, { "epoch": 0.16678481665270348, "grad_norm": 0.0018108477937682785, "learning_rate": 9.258861439312568e-06, "loss": 0.0, "step": 2588 }, { "epoch": 0.16684926209963266, "grad_norm": 0.004562858927769996, "learning_rate": 9.258145363408522e-06, "loss": 0.0, "step": 2589 }, { "epoch": 0.16691370754656185, "grad_norm": 0.07446345657383704, "learning_rate": 9.257429287504476e-06, "loss": 0.0001, "step": 2590 }, { "epoch": 0.166978152993491, "grad_norm": 0.0009006018433352618, "learning_rate": 9.25671321160043e-06, "loss": 0.0, "step": 2591 }, { "epoch": 0.1670425984404202, "grad_norm": 0.007707075664561584, "learning_rate": 9.255997135696383e-06, "loss": 0.0, "step": 2592 }, { "epoch": 0.16710704388734937, "grad_norm": 0.1405618313798484, "learning_rate": 9.25528105979234e-06, "loss": 0.0002, "step": 2593 }, { "epoch": 0.16717148933427853, "grad_norm": 0.0008934174065720114, "learning_rate": 9.254564983888294e-06, "loss": 0.0, "step": 2594 }, { "epoch": 0.1672359347812077, "grad_norm": 0.5161244256757117, "learning_rate": 9.253848907984248e-06, "loss": 0.0014, "step": 2595 }, { "epoch": 0.16730038022813687, "grad_norm": 0.05120134815256295, "learning_rate": 9.253132832080202e-06, "loss": 0.0001, "step": 2596 }, { "epoch": 0.16736482567506605, "grad_norm": 0.021043779453902272, "learning_rate": 9.252416756176157e-06, "loss": 0.0002, "step": 2597 }, { "epoch": 0.16742927112199524, "grad_norm": 0.0009231523664426139, "learning_rate": 9.251700680272109e-06, "loss": 0.0, "step": 2598 }, { "epoch": 0.1674937165689244, "grad_norm": 0.016473526227955518, "learning_rate": 9.250984604368063e-06, "loss": 0.0, "step": 2599 }, { "epoch": 0.16755816201585358, "grad_norm": 0.0006848207539497617, "learning_rate": 9.250268528464018e-06, "loss": 0.0, "step": 2600 }, { "epoch": 0.16762260746278276, "grad_norm": 0.001094208815293132, "learning_rate": 9.249552452559972e-06, "loss": 0.0, "step": 2601 }, { "epoch": 0.16768705290971192, "grad_norm": 0.00046785349861243374, "learning_rate": 9.248836376655926e-06, "loss": 0.0, "step": 2602 }, { "epoch": 0.1677514983566411, "grad_norm": 0.00014457733525141594, "learning_rate": 9.24812030075188e-06, "loss": 0.0, "step": 2603 }, { "epoch": 0.1678159438035703, "grad_norm": 0.029550263242095656, "learning_rate": 9.247404224847835e-06, "loss": 0.0001, "step": 2604 }, { "epoch": 0.16788038925049945, "grad_norm": 0.8862753948667788, "learning_rate": 9.24668814894379e-06, "loss": 0.003, "step": 2605 }, { "epoch": 0.16794483469742863, "grad_norm": 0.011991181543029776, "learning_rate": 9.245972073039744e-06, "loss": 0.0001, "step": 2606 }, { "epoch": 0.16800928014435781, "grad_norm": 0.05132832108348396, "learning_rate": 9.245255997135698e-06, "loss": 0.0016, "step": 2607 }, { "epoch": 0.16807372559128697, "grad_norm": 0.01007424389617214, "learning_rate": 9.24453992123165e-06, "loss": 0.0001, "step": 2608 }, { "epoch": 0.16813817103821616, "grad_norm": 0.3279116481888165, "learning_rate": 9.243823845327605e-06, "loss": 0.0008, "step": 2609 }, { "epoch": 0.1682026164851453, "grad_norm": 0.0010222354990563067, "learning_rate": 9.243107769423559e-06, "loss": 0.0, "step": 2610 }, { "epoch": 0.1682670619320745, "grad_norm": 0.0002879770515881767, "learning_rate": 9.242391693519513e-06, "loss": 0.0, "step": 2611 }, { "epoch": 0.16833150737900368, "grad_norm": 0.04284265966657461, "learning_rate": 9.241675617615468e-06, "loss": 0.0004, "step": 2612 }, { "epoch": 0.16839595282593284, "grad_norm": 0.02611223447159595, "learning_rate": 9.240959541711422e-06, "loss": 0.0004, "step": 2613 }, { "epoch": 0.16846039827286202, "grad_norm": 0.000562746261733419, "learning_rate": 9.240243465807376e-06, "loss": 0.0, "step": 2614 }, { "epoch": 0.1685248437197912, "grad_norm": 0.006530159238405348, "learning_rate": 9.23952738990333e-06, "loss": 0.0, "step": 2615 }, { "epoch": 0.16858928916672036, "grad_norm": 0.004683470390970493, "learning_rate": 9.238811313999285e-06, "loss": 0.0, "step": 2616 }, { "epoch": 0.16865373461364955, "grad_norm": 0.4705708583019044, "learning_rate": 9.238095238095239e-06, "loss": 0.0032, "step": 2617 }, { "epoch": 0.16871818006057873, "grad_norm": 0.0005363429561250486, "learning_rate": 9.237379162191193e-06, "loss": 0.0, "step": 2618 }, { "epoch": 0.1687826255075079, "grad_norm": 0.15183939342721997, "learning_rate": 9.236663086287148e-06, "loss": 0.0017, "step": 2619 }, { "epoch": 0.16884707095443707, "grad_norm": 0.0001052157179584856, "learning_rate": 9.235947010383102e-06, "loss": 0.0, "step": 2620 }, { "epoch": 0.16891151640136626, "grad_norm": 0.0007306970136187449, "learning_rate": 9.235230934479056e-06, "loss": 0.0, "step": 2621 }, { "epoch": 0.16897596184829541, "grad_norm": 0.002508690093415649, "learning_rate": 9.23451485857501e-06, "loss": 0.0, "step": 2622 }, { "epoch": 0.1690404072952246, "grad_norm": 0.0005834624117502399, "learning_rate": 9.233798782670965e-06, "loss": 0.0, "step": 2623 }, { "epoch": 0.16910485274215376, "grad_norm": 0.002580928592817899, "learning_rate": 9.233082706766918e-06, "loss": 0.0, "step": 2624 }, { "epoch": 0.16916929818908294, "grad_norm": 0.004280836334426163, "learning_rate": 9.232366630862872e-06, "loss": 0.0, "step": 2625 }, { "epoch": 0.16923374363601212, "grad_norm": 0.00018424144724289227, "learning_rate": 9.231650554958826e-06, "loss": 0.0, "step": 2626 }, { "epoch": 0.16929818908294128, "grad_norm": 0.0010581367877079988, "learning_rate": 9.23093447905478e-06, "loss": 0.0, "step": 2627 }, { "epoch": 0.16936263452987046, "grad_norm": 0.0007131972095357654, "learning_rate": 9.230218403150735e-06, "loss": 0.0, "step": 2628 }, { "epoch": 0.16942707997679965, "grad_norm": 0.00043519554939918065, "learning_rate": 9.229502327246689e-06, "loss": 0.0, "step": 2629 }, { "epoch": 0.1694915254237288, "grad_norm": 0.001351474322390404, "learning_rate": 9.228786251342643e-06, "loss": 0.0, "step": 2630 }, { "epoch": 0.169555970870658, "grad_norm": 0.020752774250390287, "learning_rate": 9.228070175438598e-06, "loss": 0.0002, "step": 2631 }, { "epoch": 0.16962041631758717, "grad_norm": 0.018716232806307055, "learning_rate": 9.227354099534552e-06, "loss": 0.0001, "step": 2632 }, { "epoch": 0.16968486176451633, "grad_norm": 0.00029643028839327425, "learning_rate": 9.226638023630505e-06, "loss": 0.0, "step": 2633 }, { "epoch": 0.16974930721144552, "grad_norm": 0.0022362696824027137, "learning_rate": 9.225921947726459e-06, "loss": 0.0, "step": 2634 }, { "epoch": 0.16981375265837467, "grad_norm": 0.00021475709016773186, "learning_rate": 9.225205871822413e-06, "loss": 0.0, "step": 2635 }, { "epoch": 0.16987819810530386, "grad_norm": 0.16918964578636836, "learning_rate": 9.224489795918367e-06, "loss": 0.0019, "step": 2636 }, { "epoch": 0.16994264355223304, "grad_norm": 0.12910555113432312, "learning_rate": 9.223773720014322e-06, "loss": 0.0002, "step": 2637 }, { "epoch": 0.1700070889991622, "grad_norm": 0.024268443228424336, "learning_rate": 9.223057644110276e-06, "loss": 0.0001, "step": 2638 }, { "epoch": 0.17007153444609138, "grad_norm": 0.0003196529914171969, "learning_rate": 9.222341568206232e-06, "loss": 0.0, "step": 2639 }, { "epoch": 0.17013597989302057, "grad_norm": 0.09965735102900866, "learning_rate": 9.221625492302185e-06, "loss": 0.0038, "step": 2640 }, { "epoch": 0.17020042533994972, "grad_norm": 0.009036427241455914, "learning_rate": 9.220909416398139e-06, "loss": 0.0001, "step": 2641 }, { "epoch": 0.1702648707868789, "grad_norm": 0.00011327834905210328, "learning_rate": 9.220193340494093e-06, "loss": 0.0, "step": 2642 }, { "epoch": 0.1703293162338081, "grad_norm": 0.00287235194554443, "learning_rate": 9.219477264590047e-06, "loss": 0.0, "step": 2643 }, { "epoch": 0.17039376168073725, "grad_norm": 0.015226695642371184, "learning_rate": 9.218761188686002e-06, "loss": 0.0001, "step": 2644 }, { "epoch": 0.17045820712766643, "grad_norm": 0.00020810014028015193, "learning_rate": 9.218045112781956e-06, "loss": 0.0, "step": 2645 }, { "epoch": 0.17052265257459562, "grad_norm": 0.00020155726396008463, "learning_rate": 9.21732903687791e-06, "loss": 0.0, "step": 2646 }, { "epoch": 0.17058709802152477, "grad_norm": 0.0010949986795731515, "learning_rate": 9.216612960973865e-06, "loss": 0.0, "step": 2647 }, { "epoch": 0.17065154346845396, "grad_norm": 0.0076949646895825684, "learning_rate": 9.215896885069819e-06, "loss": 0.0001, "step": 2648 }, { "epoch": 0.17071598891538312, "grad_norm": 0.001981525942460112, "learning_rate": 9.215180809165772e-06, "loss": 0.0, "step": 2649 }, { "epoch": 0.1707804343623123, "grad_norm": 0.038440089378423435, "learning_rate": 9.214464733261726e-06, "loss": 0.0001, "step": 2650 }, { "epoch": 0.17084487980924148, "grad_norm": 0.10907353425293582, "learning_rate": 9.21374865735768e-06, "loss": 0.0001, "step": 2651 }, { "epoch": 0.17090932525617064, "grad_norm": 0.0063255364810587255, "learning_rate": 9.213032581453634e-06, "loss": 0.0, "step": 2652 }, { "epoch": 0.17097377070309983, "grad_norm": 0.012754438453606867, "learning_rate": 9.212316505549589e-06, "loss": 0.0, "step": 2653 }, { "epoch": 0.171038216150029, "grad_norm": 0.00026067457484949056, "learning_rate": 9.211600429645543e-06, "loss": 0.0, "step": 2654 }, { "epoch": 0.17110266159695817, "grad_norm": 0.00021310395223525356, "learning_rate": 9.210884353741497e-06, "loss": 0.0, "step": 2655 }, { "epoch": 0.17116710704388735, "grad_norm": 0.010862934374835192, "learning_rate": 9.210168277837452e-06, "loss": 0.0, "step": 2656 }, { "epoch": 0.17123155249081654, "grad_norm": 0.06877897773526727, "learning_rate": 9.209452201933406e-06, "loss": 0.0002, "step": 2657 }, { "epoch": 0.1712959979377457, "grad_norm": 0.0003992406670996045, "learning_rate": 9.20873612602936e-06, "loss": 0.0, "step": 2658 }, { "epoch": 0.17136044338467488, "grad_norm": 0.000843099911044625, "learning_rate": 9.208020050125313e-06, "loss": 0.0, "step": 2659 }, { "epoch": 0.17142488883160406, "grad_norm": 0.03777898066302125, "learning_rate": 9.207303974221267e-06, "loss": 0.0004, "step": 2660 }, { "epoch": 0.17148933427853322, "grad_norm": 0.0013253862296127193, "learning_rate": 9.206587898317221e-06, "loss": 0.0, "step": 2661 }, { "epoch": 0.1715537797254624, "grad_norm": 0.0019887129236439605, "learning_rate": 9.205871822413176e-06, "loss": 0.0, "step": 2662 }, { "epoch": 0.17161822517239156, "grad_norm": 0.01141948088125074, "learning_rate": 9.205155746509132e-06, "loss": 0.0, "step": 2663 }, { "epoch": 0.17168267061932074, "grad_norm": 0.007296598030753204, "learning_rate": 9.204439670605086e-06, "loss": 0.0, "step": 2664 }, { "epoch": 0.17174711606624993, "grad_norm": 0.015167112561033686, "learning_rate": 9.203723594701039e-06, "loss": 0.0002, "step": 2665 }, { "epoch": 0.17181156151317908, "grad_norm": 0.006666472705684092, "learning_rate": 9.203007518796993e-06, "loss": 0.0, "step": 2666 }, { "epoch": 0.17187600696010827, "grad_norm": 0.005332642179530365, "learning_rate": 9.202291442892947e-06, "loss": 0.0, "step": 2667 }, { "epoch": 0.17194045240703745, "grad_norm": 0.0003287436028234088, "learning_rate": 9.201575366988902e-06, "loss": 0.0, "step": 2668 }, { "epoch": 0.1720048978539666, "grad_norm": 0.008185411046763031, "learning_rate": 9.200859291084856e-06, "loss": 0.0, "step": 2669 }, { "epoch": 0.1720693433008958, "grad_norm": 0.0003072117989800363, "learning_rate": 9.20014321518081e-06, "loss": 0.0, "step": 2670 }, { "epoch": 0.17213378874782498, "grad_norm": 0.14845676673531352, "learning_rate": 9.199427139276764e-06, "loss": 0.0001, "step": 2671 }, { "epoch": 0.17219823419475414, "grad_norm": 0.0023122495109587056, "learning_rate": 9.198711063372719e-06, "loss": 0.0, "step": 2672 }, { "epoch": 0.17226267964168332, "grad_norm": 2.9962439866178707, "learning_rate": 9.197994987468673e-06, "loss": 0.0269, "step": 2673 }, { "epoch": 0.17232712508861248, "grad_norm": 0.040175158651624854, "learning_rate": 9.197278911564627e-06, "loss": 0.0002, "step": 2674 }, { "epoch": 0.17239157053554166, "grad_norm": 0.0012490798280579728, "learning_rate": 9.19656283566058e-06, "loss": 0.0, "step": 2675 }, { "epoch": 0.17245601598247084, "grad_norm": 0.003016418042880189, "learning_rate": 9.195846759756534e-06, "loss": 0.0, "step": 2676 }, { "epoch": 0.1725204614294, "grad_norm": 0.003284682926061627, "learning_rate": 9.195130683852489e-06, "loss": 0.0, "step": 2677 }, { "epoch": 0.17258490687632919, "grad_norm": 0.011853928536630667, "learning_rate": 9.194414607948443e-06, "loss": 0.0001, "step": 2678 }, { "epoch": 0.17264935232325837, "grad_norm": 0.09941864921166386, "learning_rate": 9.193698532044397e-06, "loss": 0.0002, "step": 2679 }, { "epoch": 0.17271379777018753, "grad_norm": 0.008067099418627759, "learning_rate": 9.192982456140351e-06, "loss": 0.0, "step": 2680 }, { "epoch": 0.1727782432171167, "grad_norm": 0.5154138768623525, "learning_rate": 9.192266380236306e-06, "loss": 0.0166, "step": 2681 }, { "epoch": 0.1728426886640459, "grad_norm": 0.01469303930545766, "learning_rate": 9.19155030433226e-06, "loss": 0.0001, "step": 2682 }, { "epoch": 0.17290713411097505, "grad_norm": 0.220314544641192, "learning_rate": 9.190834228428214e-06, "loss": 0.006, "step": 2683 }, { "epoch": 0.17297157955790424, "grad_norm": 0.09990222740725906, "learning_rate": 9.190118152524169e-06, "loss": 0.0004, "step": 2684 }, { "epoch": 0.17303602500483342, "grad_norm": 0.00873880537834303, "learning_rate": 9.189402076620121e-06, "loss": 0.0, "step": 2685 }, { "epoch": 0.17310047045176258, "grad_norm": 0.0012456913305398055, "learning_rate": 9.188686000716077e-06, "loss": 0.0, "step": 2686 }, { "epoch": 0.17316491589869176, "grad_norm": 0.003980550458896421, "learning_rate": 9.187969924812032e-06, "loss": 0.0, "step": 2687 }, { "epoch": 0.17322936134562092, "grad_norm": 0.24734072155041284, "learning_rate": 9.187253848907986e-06, "loss": 0.0062, "step": 2688 }, { "epoch": 0.1732938067925501, "grad_norm": 0.0014917908984364223, "learning_rate": 9.18653777300394e-06, "loss": 0.0, "step": 2689 }, { "epoch": 0.1733582522394793, "grad_norm": 0.018215838914686967, "learning_rate": 9.185821697099894e-06, "loss": 0.0, "step": 2690 }, { "epoch": 0.17342269768640844, "grad_norm": 0.0007429930106887829, "learning_rate": 9.185105621195847e-06, "loss": 0.0, "step": 2691 }, { "epoch": 0.17348714313333763, "grad_norm": 0.0001631497864257658, "learning_rate": 9.184389545291801e-06, "loss": 0.0, "step": 2692 }, { "epoch": 0.1735515885802668, "grad_norm": 0.010403012017535249, "learning_rate": 9.183673469387756e-06, "loss": 0.0, "step": 2693 }, { "epoch": 0.17361603402719597, "grad_norm": 0.008222731931976913, "learning_rate": 9.18295739348371e-06, "loss": 0.0001, "step": 2694 }, { "epoch": 0.17368047947412515, "grad_norm": 0.03993690792998894, "learning_rate": 9.182241317579664e-06, "loss": 0.0001, "step": 2695 }, { "epoch": 0.17374492492105434, "grad_norm": 0.00030802079541501006, "learning_rate": 9.181525241675619e-06, "loss": 0.0, "step": 2696 }, { "epoch": 0.1738093703679835, "grad_norm": 0.005414459370388794, "learning_rate": 9.180809165771573e-06, "loss": 0.0, "step": 2697 }, { "epoch": 0.17387381581491268, "grad_norm": 0.006613869105241104, "learning_rate": 9.180093089867527e-06, "loss": 0.0, "step": 2698 }, { "epoch": 0.17393826126184186, "grad_norm": 0.3907094673384912, "learning_rate": 9.179377013963481e-06, "loss": 0.0019, "step": 2699 }, { "epoch": 0.17400270670877102, "grad_norm": 0.009057081484954533, "learning_rate": 9.178660938059436e-06, "loss": 0.0001, "step": 2700 }, { "epoch": 0.1740671521557002, "grad_norm": 0.7250289730333702, "learning_rate": 9.177944862155388e-06, "loss": 0.0027, "step": 2701 }, { "epoch": 0.17413159760262936, "grad_norm": 0.005397337466009714, "learning_rate": 9.177228786251343e-06, "loss": 0.0001, "step": 2702 }, { "epoch": 0.17419604304955855, "grad_norm": 0.0006209733526635416, "learning_rate": 9.176512710347297e-06, "loss": 0.0, "step": 2703 }, { "epoch": 0.17426048849648773, "grad_norm": 0.04377721584989303, "learning_rate": 9.175796634443251e-06, "loss": 0.0002, "step": 2704 }, { "epoch": 0.1743249339434169, "grad_norm": 0.0001704777087367316, "learning_rate": 9.175080558539206e-06, "loss": 0.0, "step": 2705 }, { "epoch": 0.17438937939034607, "grad_norm": 0.002415898461971982, "learning_rate": 9.17436448263516e-06, "loss": 0.0, "step": 2706 }, { "epoch": 0.17445382483727526, "grad_norm": 0.00010784267160302466, "learning_rate": 9.173648406731114e-06, "loss": 0.0, "step": 2707 }, { "epoch": 0.1745182702842044, "grad_norm": 0.382636049048164, "learning_rate": 9.172932330827068e-06, "loss": 0.0106, "step": 2708 }, { "epoch": 0.1745827157311336, "grad_norm": 0.0011183936260544516, "learning_rate": 9.172216254923023e-06, "loss": 0.0, "step": 2709 }, { "epoch": 0.17464716117806278, "grad_norm": 0.0002463259427477652, "learning_rate": 9.171500179018977e-06, "loss": 0.0, "step": 2710 }, { "epoch": 0.17471160662499194, "grad_norm": 1.1865234375, "learning_rate": 9.170784103114931e-06, "loss": 0.0072, "step": 2711 }, { "epoch": 0.17477605207192112, "grad_norm": 0.002127015512866035, "learning_rate": 9.170068027210886e-06, "loss": 0.0, "step": 2712 }, { "epoch": 0.17484049751885028, "grad_norm": 0.12030690802546253, "learning_rate": 9.16935195130684e-06, "loss": 0.0005, "step": 2713 }, { "epoch": 0.17490494296577946, "grad_norm": 0.521378924391397, "learning_rate": 9.168635875402794e-06, "loss": 0.0034, "step": 2714 }, { "epoch": 0.17496938841270865, "grad_norm": 0.018006203519038057, "learning_rate": 9.167919799498749e-06, "loss": 0.0001, "step": 2715 }, { "epoch": 0.1750338338596378, "grad_norm": 0.004143970490399163, "learning_rate": 9.167203723594703e-06, "loss": 0.0, "step": 2716 }, { "epoch": 0.175098279306567, "grad_norm": 0.00027283137921355467, "learning_rate": 9.166487647690655e-06, "loss": 0.0, "step": 2717 }, { "epoch": 0.17516272475349617, "grad_norm": 0.0491949441081807, "learning_rate": 9.16577157178661e-06, "loss": 0.0001, "step": 2718 }, { "epoch": 0.17522717020042533, "grad_norm": 0.0031770309749900405, "learning_rate": 9.165055495882564e-06, "loss": 0.0, "step": 2719 }, { "epoch": 0.17529161564735452, "grad_norm": 0.10751967746263864, "learning_rate": 9.164339419978518e-06, "loss": 0.0005, "step": 2720 }, { "epoch": 0.1753560610942837, "grad_norm": 0.14063707273371076, "learning_rate": 9.163623344074473e-06, "loss": 0.0003, "step": 2721 }, { "epoch": 0.17542050654121286, "grad_norm": 0.0011253487254859079, "learning_rate": 9.162907268170427e-06, "loss": 0.0, "step": 2722 }, { "epoch": 0.17548495198814204, "grad_norm": 0.030041388852838077, "learning_rate": 9.162191192266381e-06, "loss": 0.0001, "step": 2723 }, { "epoch": 0.17554939743507122, "grad_norm": 0.0030749777231569345, "learning_rate": 9.161475116362336e-06, "loss": 0.0, "step": 2724 }, { "epoch": 0.17561384288200038, "grad_norm": 0.01139113188354577, "learning_rate": 9.16075904045829e-06, "loss": 0.0001, "step": 2725 }, { "epoch": 0.17567828832892957, "grad_norm": 0.005132686334298028, "learning_rate": 9.160042964554242e-06, "loss": 0.0, "step": 2726 }, { "epoch": 0.17574273377585872, "grad_norm": 0.011252321098629682, "learning_rate": 9.159326888650197e-06, "loss": 0.0, "step": 2727 }, { "epoch": 0.1758071792227879, "grad_norm": 0.0001379375049884014, "learning_rate": 9.158610812746151e-06, "loss": 0.0, "step": 2728 }, { "epoch": 0.1758716246697171, "grad_norm": 0.009681077505664085, "learning_rate": 9.157894736842105e-06, "loss": 0.0001, "step": 2729 }, { "epoch": 0.17593607011664625, "grad_norm": 0.005116961384670569, "learning_rate": 9.15717866093806e-06, "loss": 0.0, "step": 2730 }, { "epoch": 0.17600051556357543, "grad_norm": 0.0266595739957785, "learning_rate": 9.156462585034014e-06, "loss": 0.0001, "step": 2731 }, { "epoch": 0.17606496101050462, "grad_norm": 0.00023531374018092132, "learning_rate": 9.155746509129968e-06, "loss": 0.0, "step": 2732 }, { "epoch": 0.17612940645743377, "grad_norm": 0.004908907718591901, "learning_rate": 9.155030433225923e-06, "loss": 0.0, "step": 2733 }, { "epoch": 0.17619385190436296, "grad_norm": 0.0026669341550138548, "learning_rate": 9.154314357321877e-06, "loss": 0.0, "step": 2734 }, { "epoch": 0.17625829735129214, "grad_norm": 0.024920427777757822, "learning_rate": 9.153598281417831e-06, "loss": 0.0001, "step": 2735 }, { "epoch": 0.1763227427982213, "grad_norm": 0.0002157229860912765, "learning_rate": 9.152882205513785e-06, "loss": 0.0, "step": 2736 }, { "epoch": 0.17638718824515048, "grad_norm": 0.003320399939081684, "learning_rate": 9.15216612960974e-06, "loss": 0.0, "step": 2737 }, { "epoch": 0.17645163369207967, "grad_norm": 0.0021092644703280157, "learning_rate": 9.151450053705694e-06, "loss": 0.0, "step": 2738 }, { "epoch": 0.17651607913900882, "grad_norm": 0.28572773821913267, "learning_rate": 9.150733977801648e-06, "loss": 0.0014, "step": 2739 }, { "epoch": 0.176580524585938, "grad_norm": 0.007860353343515316, "learning_rate": 9.150017901897603e-06, "loss": 0.0, "step": 2740 }, { "epoch": 0.17664497003286717, "grad_norm": 0.0074805734351772474, "learning_rate": 9.149301825993557e-06, "loss": 0.0001, "step": 2741 }, { "epoch": 0.17670941547979635, "grad_norm": 0.00011039618974302341, "learning_rate": 9.14858575008951e-06, "loss": 0.0, "step": 2742 }, { "epoch": 0.17677386092672553, "grad_norm": 0.0070548084467875975, "learning_rate": 9.147869674185464e-06, "loss": 0.0, "step": 2743 }, { "epoch": 0.1768383063736547, "grad_norm": 0.010001704121558984, "learning_rate": 9.147153598281418e-06, "loss": 0.0, "step": 2744 }, { "epoch": 0.17690275182058388, "grad_norm": 0.0019253162831213417, "learning_rate": 9.146437522377372e-06, "loss": 0.0, "step": 2745 }, { "epoch": 0.17696719726751306, "grad_norm": 0.01767115100918315, "learning_rate": 9.145721446473327e-06, "loss": 0.0001, "step": 2746 }, { "epoch": 0.17703164271444222, "grad_norm": 0.012779574033902407, "learning_rate": 9.145005370569281e-06, "loss": 0.0001, "step": 2747 }, { "epoch": 0.1770960881613714, "grad_norm": 0.0015406080229237796, "learning_rate": 9.144289294665235e-06, "loss": 0.0, "step": 2748 }, { "epoch": 0.17716053360830059, "grad_norm": 0.008029459760126534, "learning_rate": 9.14357321876119e-06, "loss": 0.0, "step": 2749 }, { "epoch": 0.17722497905522974, "grad_norm": 0.007691325124199665, "learning_rate": 9.142857142857144e-06, "loss": 0.0, "step": 2750 }, { "epoch": 0.17728942450215893, "grad_norm": 0.0007767056720782113, "learning_rate": 9.142141066953098e-06, "loss": 0.0, "step": 2751 }, { "epoch": 0.17735386994908808, "grad_norm": 0.0006003394971469371, "learning_rate": 9.14142499104905e-06, "loss": 0.0, "step": 2752 }, { "epoch": 0.17741831539601727, "grad_norm": 2.471258603101776, "learning_rate": 9.140708915145005e-06, "loss": 0.0177, "step": 2753 }, { "epoch": 0.17748276084294645, "grad_norm": 0.022019753049725126, "learning_rate": 9.13999283924096e-06, "loss": 0.0001, "step": 2754 }, { "epoch": 0.1775472062898756, "grad_norm": 0.42206641552426616, "learning_rate": 9.139276763336914e-06, "loss": 0.0028, "step": 2755 }, { "epoch": 0.1776116517368048, "grad_norm": 0.0005502695640614431, "learning_rate": 9.13856068743287e-06, "loss": 0.0, "step": 2756 }, { "epoch": 0.17767609718373398, "grad_norm": 0.01922958508482105, "learning_rate": 9.137844611528824e-06, "loss": 0.0, "step": 2757 }, { "epoch": 0.17774054263066313, "grad_norm": 0.19847522624781685, "learning_rate": 9.137128535624777e-06, "loss": 0.0001, "step": 2758 }, { "epoch": 0.17780498807759232, "grad_norm": 0.46543913466278036, "learning_rate": 9.136412459720731e-06, "loss": 0.0013, "step": 2759 }, { "epoch": 0.1778694335245215, "grad_norm": 0.02188902143711718, "learning_rate": 9.135696383816685e-06, "loss": 0.0001, "step": 2760 }, { "epoch": 0.17793387897145066, "grad_norm": 0.35200723492504954, "learning_rate": 9.13498030791264e-06, "loss": 0.0011, "step": 2761 }, { "epoch": 0.17799832441837984, "grad_norm": 0.000621212590110667, "learning_rate": 9.134264232008594e-06, "loss": 0.0, "step": 2762 }, { "epoch": 0.17806276986530903, "grad_norm": 0.0041229362491163645, "learning_rate": 9.133548156104548e-06, "loss": 0.0, "step": 2763 }, { "epoch": 0.17812721531223819, "grad_norm": 0.1805057863321118, "learning_rate": 9.132832080200502e-06, "loss": 0.0005, "step": 2764 }, { "epoch": 0.17819166075916737, "grad_norm": 0.04216859049336486, "learning_rate": 9.132116004296457e-06, "loss": 0.0005, "step": 2765 }, { "epoch": 0.17825610620609653, "grad_norm": 0.007158825226122906, "learning_rate": 9.131399928392411e-06, "loss": 0.0, "step": 2766 }, { "epoch": 0.1783205516530257, "grad_norm": 0.0007151610788651363, "learning_rate": 9.130683852488365e-06, "loss": 0.0, "step": 2767 }, { "epoch": 0.1783849970999549, "grad_norm": 0.026471367081296403, "learning_rate": 9.129967776584318e-06, "loss": 0.0, "step": 2768 }, { "epoch": 0.17844944254688405, "grad_norm": 0.028729129156446532, "learning_rate": 9.129251700680272e-06, "loss": 0.0001, "step": 2769 }, { "epoch": 0.17851388799381324, "grad_norm": 0.05488665115941958, "learning_rate": 9.128535624776226e-06, "loss": 0.0001, "step": 2770 }, { "epoch": 0.17857833344074242, "grad_norm": 0.06607381270845136, "learning_rate": 9.12781954887218e-06, "loss": 0.0002, "step": 2771 }, { "epoch": 0.17864277888767158, "grad_norm": 0.04787404155176431, "learning_rate": 9.127103472968135e-06, "loss": 0.0001, "step": 2772 }, { "epoch": 0.17870722433460076, "grad_norm": 0.10605978049032681, "learning_rate": 9.12638739706409e-06, "loss": 0.0003, "step": 2773 }, { "epoch": 0.17877166978152995, "grad_norm": 0.0007944870144507999, "learning_rate": 9.125671321160044e-06, "loss": 0.0, "step": 2774 }, { "epoch": 0.1788361152284591, "grad_norm": 0.00026383111149123895, "learning_rate": 9.124955245255998e-06, "loss": 0.0, "step": 2775 }, { "epoch": 0.1789005606753883, "grad_norm": 0.00323695762220021, "learning_rate": 9.124239169351952e-06, "loss": 0.0, "step": 2776 }, { "epoch": 0.17896500612231747, "grad_norm": 0.12140674287801527, "learning_rate": 9.123523093447907e-06, "loss": 0.0002, "step": 2777 }, { "epoch": 0.17902945156924663, "grad_norm": 0.05398408619186501, "learning_rate": 9.12280701754386e-06, "loss": 0.0001, "step": 2778 }, { "epoch": 0.1790938970161758, "grad_norm": 0.0087486923606527, "learning_rate": 9.122090941639813e-06, "loss": 0.0001, "step": 2779 }, { "epoch": 0.17915834246310497, "grad_norm": 0.0002230895636351413, "learning_rate": 9.12137486573577e-06, "loss": 0.0, "step": 2780 }, { "epoch": 0.17922278791003415, "grad_norm": 0.33086147690502815, "learning_rate": 9.120658789831724e-06, "loss": 0.0008, "step": 2781 }, { "epoch": 0.17928723335696334, "grad_norm": 0.909170935370485, "learning_rate": 9.119942713927678e-06, "loss": 0.0027, "step": 2782 }, { "epoch": 0.1793516788038925, "grad_norm": 0.0024790891799992814, "learning_rate": 9.119226638023632e-06, "loss": 0.0, "step": 2783 }, { "epoch": 0.17941612425082168, "grad_norm": 0.10431613976366509, "learning_rate": 9.118510562119585e-06, "loss": 0.0004, "step": 2784 }, { "epoch": 0.17948056969775086, "grad_norm": 0.012409698026434208, "learning_rate": 9.11779448621554e-06, "loss": 0.0, "step": 2785 }, { "epoch": 0.17954501514468002, "grad_norm": 0.2690126294650971, "learning_rate": 9.117078410311494e-06, "loss": 0.0038, "step": 2786 }, { "epoch": 0.1796094605916092, "grad_norm": 0.0033128166764828226, "learning_rate": 9.116362334407448e-06, "loss": 0.0, "step": 2787 }, { "epoch": 0.1796739060385384, "grad_norm": 0.0009229370739567911, "learning_rate": 9.115646258503402e-06, "loss": 0.0, "step": 2788 }, { "epoch": 0.17973835148546755, "grad_norm": 0.012281191385820282, "learning_rate": 9.114930182599356e-06, "loss": 0.0, "step": 2789 }, { "epoch": 0.17980279693239673, "grad_norm": 0.0010946180994970097, "learning_rate": 9.11421410669531e-06, "loss": 0.0, "step": 2790 }, { "epoch": 0.1798672423793259, "grad_norm": 0.0022886774893961163, "learning_rate": 9.113498030791265e-06, "loss": 0.0, "step": 2791 }, { "epoch": 0.17993168782625507, "grad_norm": 0.18050407336452123, "learning_rate": 9.11278195488722e-06, "loss": 0.0004, "step": 2792 }, { "epoch": 0.17999613327318426, "grad_norm": 0.00776095822730694, "learning_rate": 9.112065878983174e-06, "loss": 0.0, "step": 2793 }, { "epoch": 0.1800605787201134, "grad_norm": 0.010017379040207797, "learning_rate": 9.111349803079126e-06, "loss": 0.0, "step": 2794 }, { "epoch": 0.1801250241670426, "grad_norm": 0.06924958627733009, "learning_rate": 9.11063372717508e-06, "loss": 0.0003, "step": 2795 }, { "epoch": 0.18018946961397178, "grad_norm": 0.2812996264226042, "learning_rate": 9.109917651271035e-06, "loss": 0.0004, "step": 2796 }, { "epoch": 0.18025391506090094, "grad_norm": 0.0031109287427372747, "learning_rate": 9.10920157536699e-06, "loss": 0.0, "step": 2797 }, { "epoch": 0.18031836050783012, "grad_norm": 0.005299891749785947, "learning_rate": 9.108485499462943e-06, "loss": 0.0, "step": 2798 }, { "epoch": 0.1803828059547593, "grad_norm": 0.159811229772372, "learning_rate": 9.107769423558898e-06, "loss": 0.0094, "step": 2799 }, { "epoch": 0.18044725140168846, "grad_norm": 0.3035861307779333, "learning_rate": 9.107053347654852e-06, "loss": 0.0011, "step": 2800 }, { "epoch": 0.18051169684861765, "grad_norm": 0.10506831640956556, "learning_rate": 9.106337271750806e-06, "loss": 0.0003, "step": 2801 }, { "epoch": 0.18057614229554683, "grad_norm": 0.06075173568196115, "learning_rate": 9.10562119584676e-06, "loss": 0.0001, "step": 2802 }, { "epoch": 0.180640587742476, "grad_norm": 0.011675944604708841, "learning_rate": 9.104905119942715e-06, "loss": 0.0, "step": 2803 }, { "epoch": 0.18070503318940517, "grad_norm": 0.017210122448086982, "learning_rate": 9.10418904403867e-06, "loss": 0.0001, "step": 2804 }, { "epoch": 0.18076947863633433, "grad_norm": 0.0010844863697502473, "learning_rate": 9.103472968134624e-06, "loss": 0.0, "step": 2805 }, { "epoch": 0.18083392408326351, "grad_norm": 0.03426975746377976, "learning_rate": 9.102756892230578e-06, "loss": 0.0, "step": 2806 }, { "epoch": 0.1808983695301927, "grad_norm": 0.0514928133710475, "learning_rate": 9.102040816326532e-06, "loss": 0.0001, "step": 2807 }, { "epoch": 0.18096281497712186, "grad_norm": 0.00034102055632519174, "learning_rate": 9.101324740422486e-06, "loss": 0.0, "step": 2808 }, { "epoch": 0.18102726042405104, "grad_norm": 0.007900137649701421, "learning_rate": 9.10060866451844e-06, "loss": 0.0, "step": 2809 }, { "epoch": 0.18109170587098022, "grad_norm": 0.0012440836350956035, "learning_rate": 9.099892588614393e-06, "loss": 0.0, "step": 2810 }, { "epoch": 0.18115615131790938, "grad_norm": 0.0045855459787961725, "learning_rate": 9.099176512710348e-06, "loss": 0.0, "step": 2811 }, { "epoch": 0.18122059676483857, "grad_norm": 0.1941159407275899, "learning_rate": 9.098460436806302e-06, "loss": 0.0007, "step": 2812 }, { "epoch": 0.18128504221176775, "grad_norm": 0.025967662808676583, "learning_rate": 9.097744360902256e-06, "loss": 0.0001, "step": 2813 }, { "epoch": 0.1813494876586969, "grad_norm": 0.0331286074506034, "learning_rate": 9.09702828499821e-06, "loss": 0.0001, "step": 2814 }, { "epoch": 0.1814139331056261, "grad_norm": 0.08495830115250962, "learning_rate": 9.096312209094165e-06, "loss": 0.0022, "step": 2815 }, { "epoch": 0.18147837855255528, "grad_norm": 0.00813836807265155, "learning_rate": 9.095596133190119e-06, "loss": 0.0, "step": 2816 }, { "epoch": 0.18154282399948443, "grad_norm": 0.008071176914842531, "learning_rate": 9.094880057286073e-06, "loss": 0.0, "step": 2817 }, { "epoch": 0.18160726944641362, "grad_norm": 0.03360537068790458, "learning_rate": 9.094163981382028e-06, "loss": 0.0, "step": 2818 }, { "epoch": 0.18167171489334277, "grad_norm": 3.9930071502552744, "learning_rate": 9.09344790547798e-06, "loss": 0.0428, "step": 2819 }, { "epoch": 0.18173616034027196, "grad_norm": 0.3067133750386272, "learning_rate": 9.092731829573935e-06, "loss": 0.0021, "step": 2820 }, { "epoch": 0.18180060578720114, "grad_norm": 0.09692007100777007, "learning_rate": 9.092015753669889e-06, "loss": 0.0003, "step": 2821 }, { "epoch": 0.1818650512341303, "grad_norm": 0.008326567539547143, "learning_rate": 9.091299677765843e-06, "loss": 0.0, "step": 2822 }, { "epoch": 0.18192949668105948, "grad_norm": 0.09641887182616365, "learning_rate": 9.090583601861798e-06, "loss": 0.0011, "step": 2823 }, { "epoch": 0.18199394212798867, "grad_norm": 0.007352684257062721, "learning_rate": 9.089867525957752e-06, "loss": 0.0, "step": 2824 }, { "epoch": 0.18205838757491782, "grad_norm": 0.030274536320525708, "learning_rate": 9.089151450053706e-06, "loss": 0.0001, "step": 2825 }, { "epoch": 0.182122833021847, "grad_norm": 0.0006278500122715883, "learning_rate": 9.08843537414966e-06, "loss": 0.0, "step": 2826 }, { "epoch": 0.1821872784687762, "grad_norm": 0.0030603911056034895, "learning_rate": 9.087719298245615e-06, "loss": 0.0, "step": 2827 }, { "epoch": 0.18225172391570535, "grad_norm": 0.0004505991303399384, "learning_rate": 9.087003222341569e-06, "loss": 0.0, "step": 2828 }, { "epoch": 0.18231616936263453, "grad_norm": 0.017218593559380928, "learning_rate": 9.086287146437523e-06, "loss": 0.0001, "step": 2829 }, { "epoch": 0.18238061480956372, "grad_norm": 0.0021886677003908977, "learning_rate": 9.085571070533478e-06, "loss": 0.0, "step": 2830 }, { "epoch": 0.18244506025649287, "grad_norm": 0.0004787494491278347, "learning_rate": 9.084854994629432e-06, "loss": 0.0, "step": 2831 }, { "epoch": 0.18250950570342206, "grad_norm": 0.0060502932594086204, "learning_rate": 9.084138918725386e-06, "loss": 0.0, "step": 2832 }, { "epoch": 0.18257395115035122, "grad_norm": 0.00035090977070178326, "learning_rate": 9.08342284282134e-06, "loss": 0.0, "step": 2833 }, { "epoch": 0.1826383965972804, "grad_norm": 0.12514816384262056, "learning_rate": 9.082706766917295e-06, "loss": 0.0004, "step": 2834 }, { "epoch": 0.18270284204420958, "grad_norm": 0.3302171515942961, "learning_rate": 9.081990691013247e-06, "loss": 0.0015, "step": 2835 }, { "epoch": 0.18276728749113874, "grad_norm": 0.012892377663801916, "learning_rate": 9.081274615109202e-06, "loss": 0.0001, "step": 2836 }, { "epoch": 0.18283173293806793, "grad_norm": 0.1393849681809212, "learning_rate": 9.080558539205156e-06, "loss": 0.0001, "step": 2837 }, { "epoch": 0.1828961783849971, "grad_norm": 0.004492926812105762, "learning_rate": 9.07984246330111e-06, "loss": 0.0, "step": 2838 }, { "epoch": 0.18296062383192627, "grad_norm": 0.03560397375172943, "learning_rate": 9.079126387397065e-06, "loss": 0.0, "step": 2839 }, { "epoch": 0.18302506927885545, "grad_norm": 0.32795772375842486, "learning_rate": 9.078410311493019e-06, "loss": 0.0007, "step": 2840 }, { "epoch": 0.18308951472578464, "grad_norm": 0.01319255954466059, "learning_rate": 9.077694235588973e-06, "loss": 0.0, "step": 2841 }, { "epoch": 0.1831539601727138, "grad_norm": 0.1890710931196515, "learning_rate": 9.076978159684928e-06, "loss": 0.0016, "step": 2842 }, { "epoch": 0.18321840561964298, "grad_norm": 0.019405285126641924, "learning_rate": 9.076262083780882e-06, "loss": 0.0001, "step": 2843 }, { "epoch": 0.18328285106657213, "grad_norm": 0.000751944682332039, "learning_rate": 9.075546007876836e-06, "loss": 0.0, "step": 2844 }, { "epoch": 0.18334729651350132, "grad_norm": 0.3132701562643847, "learning_rate": 9.074829931972789e-06, "loss": 0.0006, "step": 2845 }, { "epoch": 0.1834117419604305, "grad_norm": 0.000840455758111162, "learning_rate": 9.074113856068743e-06, "loss": 0.0, "step": 2846 }, { "epoch": 0.18347618740735966, "grad_norm": 0.010120712128541575, "learning_rate": 9.073397780164697e-06, "loss": 0.0001, "step": 2847 }, { "epoch": 0.18354063285428884, "grad_norm": 0.009966635973211107, "learning_rate": 9.072681704260652e-06, "loss": 0.0001, "step": 2848 }, { "epoch": 0.18360507830121803, "grad_norm": 0.006565756711698041, "learning_rate": 9.071965628356606e-06, "loss": 0.0, "step": 2849 }, { "epoch": 0.18366952374814718, "grad_norm": 0.0005553512116047497, "learning_rate": 9.071249552452562e-06, "loss": 0.0, "step": 2850 }, { "epoch": 0.18373396919507637, "grad_norm": 0.00028353594343350657, "learning_rate": 9.070533476548515e-06, "loss": 0.0, "step": 2851 }, { "epoch": 0.18379841464200555, "grad_norm": 0.20578208174425175, "learning_rate": 9.069817400644469e-06, "loss": 0.0014, "step": 2852 }, { "epoch": 0.1838628600889347, "grad_norm": 0.04055908694973615, "learning_rate": 9.069101324740423e-06, "loss": 0.0002, "step": 2853 }, { "epoch": 0.1839273055358639, "grad_norm": 0.06899265144614615, "learning_rate": 9.068385248836377e-06, "loss": 0.0007, "step": 2854 }, { "epoch": 0.18399175098279308, "grad_norm": 0.0006986279102526963, "learning_rate": 9.067669172932332e-06, "loss": 0.0, "step": 2855 }, { "epoch": 0.18405619642972224, "grad_norm": 0.00019407767512415275, "learning_rate": 9.066953097028286e-06, "loss": 0.0, "step": 2856 }, { "epoch": 0.18412064187665142, "grad_norm": 0.15103341496607176, "learning_rate": 9.06623702112424e-06, "loss": 0.0008, "step": 2857 }, { "epoch": 0.18418508732358058, "grad_norm": 0.12748822634696041, "learning_rate": 9.065520945220195e-06, "loss": 0.0002, "step": 2858 }, { "epoch": 0.18424953277050976, "grad_norm": 0.010185844246403907, "learning_rate": 9.064804869316149e-06, "loss": 0.0, "step": 2859 }, { "epoch": 0.18431397821743895, "grad_norm": 0.0005492418436434324, "learning_rate": 9.064088793412103e-06, "loss": 0.0, "step": 2860 }, { "epoch": 0.1843784236643681, "grad_norm": 0.06054778444468528, "learning_rate": 9.063372717508056e-06, "loss": 0.0002, "step": 2861 }, { "epoch": 0.1844428691112973, "grad_norm": 0.00015870494723684052, "learning_rate": 9.06265664160401e-06, "loss": 0.0, "step": 2862 }, { "epoch": 0.18450731455822647, "grad_norm": 0.2531298430886071, "learning_rate": 9.061940565699964e-06, "loss": 0.0008, "step": 2863 }, { "epoch": 0.18457176000515563, "grad_norm": 0.00035488257119721176, "learning_rate": 9.061224489795919e-06, "loss": 0.0, "step": 2864 }, { "epoch": 0.1846362054520848, "grad_norm": 0.001592378038638373, "learning_rate": 9.060508413891873e-06, "loss": 0.0, "step": 2865 }, { "epoch": 0.184700650899014, "grad_norm": 0.005239850862371177, "learning_rate": 9.059792337987827e-06, "loss": 0.0, "step": 2866 }, { "epoch": 0.18476509634594315, "grad_norm": 0.0012565288853653319, "learning_rate": 9.059076262083782e-06, "loss": 0.0, "step": 2867 }, { "epoch": 0.18482954179287234, "grad_norm": 0.003256885058237763, "learning_rate": 9.058360186179736e-06, "loss": 0.0, "step": 2868 }, { "epoch": 0.18489398723980152, "grad_norm": 0.0013340724323056313, "learning_rate": 9.05764411027569e-06, "loss": 0.0, "step": 2869 }, { "epoch": 0.18495843268673068, "grad_norm": 0.009623622364578288, "learning_rate": 9.056928034371644e-06, "loss": 0.0001, "step": 2870 }, { "epoch": 0.18502287813365986, "grad_norm": 0.02283470135952233, "learning_rate": 9.056211958467597e-06, "loss": 0.0, "step": 2871 }, { "epoch": 0.18508732358058902, "grad_norm": 3.1352344007944293, "learning_rate": 9.055495882563551e-06, "loss": 0.0312, "step": 2872 }, { "epoch": 0.1851517690275182, "grad_norm": 0.1283916257810219, "learning_rate": 9.054779806659507e-06, "loss": 0.0022, "step": 2873 }, { "epoch": 0.1852162144744474, "grad_norm": 0.0017243622373718673, "learning_rate": 9.054063730755462e-06, "loss": 0.0, "step": 2874 }, { "epoch": 0.18528065992137654, "grad_norm": 0.02626256695329013, "learning_rate": 9.053347654851416e-06, "loss": 0.0001, "step": 2875 }, { "epoch": 0.18534510536830573, "grad_norm": 1.8560834970519415, "learning_rate": 9.05263157894737e-06, "loss": 0.0055, "step": 2876 }, { "epoch": 0.1854095508152349, "grad_norm": 0.22453528301728987, "learning_rate": 9.051915503043323e-06, "loss": 0.0003, "step": 2877 }, { "epoch": 0.18547399626216407, "grad_norm": 0.01902833171278348, "learning_rate": 9.051199427139277e-06, "loss": 0.0001, "step": 2878 }, { "epoch": 0.18553844170909325, "grad_norm": 0.0015275143263481297, "learning_rate": 9.050483351235231e-06, "loss": 0.0, "step": 2879 }, { "epoch": 0.18560288715602244, "grad_norm": 0.0001940454087729942, "learning_rate": 9.049767275331186e-06, "loss": 0.0, "step": 2880 }, { "epoch": 0.1856673326029516, "grad_norm": 0.00048513116384772117, "learning_rate": 9.04905119942714e-06, "loss": 0.0, "step": 2881 }, { "epoch": 0.18573177804988078, "grad_norm": 0.0037608151201876908, "learning_rate": 9.048335123523094e-06, "loss": 0.0, "step": 2882 }, { "epoch": 0.18579622349680994, "grad_norm": 1.1987838702145488, "learning_rate": 9.047619047619049e-06, "loss": 0.001, "step": 2883 }, { "epoch": 0.18586066894373912, "grad_norm": 0.010770438174594095, "learning_rate": 9.046902971715003e-06, "loss": 0.0001, "step": 2884 }, { "epoch": 0.1859251143906683, "grad_norm": 0.003791891232412315, "learning_rate": 9.046186895810957e-06, "loss": 0.0, "step": 2885 }, { "epoch": 0.18598955983759746, "grad_norm": 1.4171702761376253, "learning_rate": 9.045470819906912e-06, "loss": 0.0036, "step": 2886 }, { "epoch": 0.18605400528452665, "grad_norm": 0.013678521963010951, "learning_rate": 9.044754744002864e-06, "loss": 0.0, "step": 2887 }, { "epoch": 0.18611845073145583, "grad_norm": 0.2775792210281082, "learning_rate": 9.044038668098818e-06, "loss": 0.0021, "step": 2888 }, { "epoch": 0.186182896178385, "grad_norm": 0.021857381142348316, "learning_rate": 9.043322592194773e-06, "loss": 0.0001, "step": 2889 }, { "epoch": 0.18624734162531417, "grad_norm": 0.10352626332013336, "learning_rate": 9.042606516290727e-06, "loss": 0.0002, "step": 2890 }, { "epoch": 0.18631178707224336, "grad_norm": 0.043887566667352385, "learning_rate": 9.041890440386681e-06, "loss": 0.0001, "step": 2891 }, { "epoch": 0.1863762325191725, "grad_norm": 0.00426374508594024, "learning_rate": 9.041174364482636e-06, "loss": 0.0, "step": 2892 }, { "epoch": 0.1864406779661017, "grad_norm": 0.0017327150241980725, "learning_rate": 9.04045828857859e-06, "loss": 0.0, "step": 2893 }, { "epoch": 0.18650512341303088, "grad_norm": 0.16207233258521805, "learning_rate": 9.039742212674544e-06, "loss": 0.0003, "step": 2894 }, { "epoch": 0.18656956885996004, "grad_norm": 0.08029956264882238, "learning_rate": 9.039026136770499e-06, "loss": 0.0002, "step": 2895 }, { "epoch": 0.18663401430688922, "grad_norm": 0.002952531018276622, "learning_rate": 9.038310060866453e-06, "loss": 0.0, "step": 2896 }, { "epoch": 0.18669845975381838, "grad_norm": 0.001077306109601978, "learning_rate": 9.037593984962407e-06, "loss": 0.0, "step": 2897 }, { "epoch": 0.18676290520074756, "grad_norm": 0.5327763508814157, "learning_rate": 9.036877909058361e-06, "loss": 0.0018, "step": 2898 }, { "epoch": 0.18682735064767675, "grad_norm": 0.0011653264573898313, "learning_rate": 9.036161833154316e-06, "loss": 0.0, "step": 2899 }, { "epoch": 0.1868917960946059, "grad_norm": 0.010833147865541404, "learning_rate": 9.03544575725027e-06, "loss": 0.0001, "step": 2900 }, { "epoch": 0.1869562415415351, "grad_norm": 0.0035407077531820256, "learning_rate": 9.034729681346224e-06, "loss": 0.0, "step": 2901 }, { "epoch": 0.18702068698846427, "grad_norm": 0.19864257068351776, "learning_rate": 9.034013605442179e-06, "loss": 0.0003, "step": 2902 }, { "epoch": 0.18708513243539343, "grad_norm": 0.5965458646506104, "learning_rate": 9.033297529538131e-06, "loss": 0.0033, "step": 2903 }, { "epoch": 0.18714957788232262, "grad_norm": 0.0016764956381874533, "learning_rate": 9.032581453634086e-06, "loss": 0.0, "step": 2904 }, { "epoch": 0.1872140233292518, "grad_norm": 0.3784683095611159, "learning_rate": 9.03186537773004e-06, "loss": 0.0007, "step": 2905 }, { "epoch": 0.18727846877618096, "grad_norm": 0.005355622180015852, "learning_rate": 9.031149301825994e-06, "loss": 0.0, "step": 2906 }, { "epoch": 0.18734291422311014, "grad_norm": 0.1916487189541943, "learning_rate": 9.030433225921948e-06, "loss": 0.0003, "step": 2907 }, { "epoch": 0.18740735967003933, "grad_norm": 0.08662085151423458, "learning_rate": 9.029717150017903e-06, "loss": 0.0006, "step": 2908 }, { "epoch": 0.18747180511696848, "grad_norm": 0.1926151557278316, "learning_rate": 9.029001074113857e-06, "loss": 0.0004, "step": 2909 }, { "epoch": 0.18753625056389767, "grad_norm": 0.3587466842551305, "learning_rate": 9.028284998209811e-06, "loss": 0.003, "step": 2910 }, { "epoch": 0.18760069601082682, "grad_norm": 0.01522610071722062, "learning_rate": 9.027568922305766e-06, "loss": 0.0001, "step": 2911 }, { "epoch": 0.187665141457756, "grad_norm": 0.09025906889595814, "learning_rate": 9.026852846401718e-06, "loss": 0.001, "step": 2912 }, { "epoch": 0.1877295869046852, "grad_norm": 0.0005104981904117869, "learning_rate": 9.026136770497673e-06, "loss": 0.0, "step": 2913 }, { "epoch": 0.18779403235161435, "grad_norm": 0.009975611318601715, "learning_rate": 9.025420694593627e-06, "loss": 0.0, "step": 2914 }, { "epoch": 0.18785847779854353, "grad_norm": 0.04623598655275114, "learning_rate": 9.024704618689581e-06, "loss": 0.0002, "step": 2915 }, { "epoch": 0.18792292324547272, "grad_norm": 0.003555617732486728, "learning_rate": 9.023988542785535e-06, "loss": 0.0, "step": 2916 }, { "epoch": 0.18798736869240187, "grad_norm": 0.018560679936225576, "learning_rate": 9.02327246688149e-06, "loss": 0.0, "step": 2917 }, { "epoch": 0.18805181413933106, "grad_norm": 0.43325214848147997, "learning_rate": 9.022556390977444e-06, "loss": 0.0021, "step": 2918 }, { "epoch": 0.18811625958626024, "grad_norm": 0.07998140821266131, "learning_rate": 9.021840315073398e-06, "loss": 0.0017, "step": 2919 }, { "epoch": 0.1881807050331894, "grad_norm": 0.07237012820470437, "learning_rate": 9.021124239169353e-06, "loss": 0.0002, "step": 2920 }, { "epoch": 0.18824515048011858, "grad_norm": 0.000759668939913165, "learning_rate": 9.020408163265307e-06, "loss": 0.0, "step": 2921 }, { "epoch": 0.18830959592704774, "grad_norm": 0.18338005331275695, "learning_rate": 9.019692087361261e-06, "loss": 0.0006, "step": 2922 }, { "epoch": 0.18837404137397692, "grad_norm": 0.2098501933593441, "learning_rate": 9.018976011457216e-06, "loss": 0.0005, "step": 2923 }, { "epoch": 0.1884384868209061, "grad_norm": 0.0018100893660102637, "learning_rate": 9.01825993555317e-06, "loss": 0.0, "step": 2924 }, { "epoch": 0.18850293226783527, "grad_norm": 0.0450810406327063, "learning_rate": 9.017543859649124e-06, "loss": 0.0001, "step": 2925 }, { "epoch": 0.18856737771476445, "grad_norm": 0.014848699673825443, "learning_rate": 9.016827783745078e-06, "loss": 0.0, "step": 2926 }, { "epoch": 0.18863182316169363, "grad_norm": 0.015089833418442411, "learning_rate": 9.016111707841033e-06, "loss": 0.0, "step": 2927 }, { "epoch": 0.1886962686086228, "grad_norm": 0.37162824586463794, "learning_rate": 9.015395631936985e-06, "loss": 0.003, "step": 2928 }, { "epoch": 0.18876071405555198, "grad_norm": 0.00679900766430204, "learning_rate": 9.01467955603294e-06, "loss": 0.0, "step": 2929 }, { "epoch": 0.18882515950248116, "grad_norm": 0.19478660354221924, "learning_rate": 9.013963480128894e-06, "loss": 0.0007, "step": 2930 }, { "epoch": 0.18888960494941032, "grad_norm": 0.08228354880526026, "learning_rate": 9.013247404224848e-06, "loss": 0.0003, "step": 2931 }, { "epoch": 0.1889540503963395, "grad_norm": 0.07801816609421663, "learning_rate": 9.012531328320803e-06, "loss": 0.0002, "step": 2932 }, { "epoch": 0.18901849584326869, "grad_norm": 0.015425874953560122, "learning_rate": 9.011815252416757e-06, "loss": 0.0, "step": 2933 }, { "epoch": 0.18908294129019784, "grad_norm": 0.12362967331985299, "learning_rate": 9.011099176512711e-06, "loss": 0.0001, "step": 2934 }, { "epoch": 0.18914738673712703, "grad_norm": 0.002316840415064825, "learning_rate": 9.010383100608665e-06, "loss": 0.0, "step": 2935 }, { "epoch": 0.18921183218405618, "grad_norm": 0.0053035649105632595, "learning_rate": 9.00966702470462e-06, "loss": 0.0, "step": 2936 }, { "epoch": 0.18927627763098537, "grad_norm": 0.016294097918252204, "learning_rate": 9.008950948800574e-06, "loss": 0.0, "step": 2937 }, { "epoch": 0.18934072307791455, "grad_norm": 0.008288165719355206, "learning_rate": 9.008234872896527e-06, "loss": 0.0, "step": 2938 }, { "epoch": 0.1894051685248437, "grad_norm": 0.19698208553230082, "learning_rate": 9.007518796992481e-06, "loss": 0.0014, "step": 2939 }, { "epoch": 0.1894696139717729, "grad_norm": 0.003667401671959796, "learning_rate": 9.006802721088435e-06, "loss": 0.0, "step": 2940 }, { "epoch": 0.18953405941870208, "grad_norm": 0.002796827125301907, "learning_rate": 9.00608664518439e-06, "loss": 0.0, "step": 2941 }, { "epoch": 0.18959850486563123, "grad_norm": 0.3992818972816783, "learning_rate": 9.005370569280344e-06, "loss": 0.0002, "step": 2942 }, { "epoch": 0.18966295031256042, "grad_norm": 0.05893500611468803, "learning_rate": 9.0046544933763e-06, "loss": 0.0004, "step": 2943 }, { "epoch": 0.1897273957594896, "grad_norm": 0.002527947056167966, "learning_rate": 9.003938417472252e-06, "loss": 0.0, "step": 2944 }, { "epoch": 0.18979184120641876, "grad_norm": 0.03283716045698447, "learning_rate": 9.003222341568207e-06, "loss": 0.0001, "step": 2945 }, { "epoch": 0.18985628665334794, "grad_norm": 0.003239810900464001, "learning_rate": 9.002506265664161e-06, "loss": 0.0, "step": 2946 }, { "epoch": 0.18992073210027713, "grad_norm": 0.034203012205389266, "learning_rate": 9.001790189760115e-06, "loss": 0.0003, "step": 2947 }, { "epoch": 0.18998517754720629, "grad_norm": 0.01083160668170668, "learning_rate": 9.00107411385607e-06, "loss": 0.0, "step": 2948 }, { "epoch": 0.19004962299413547, "grad_norm": 0.007940915211813882, "learning_rate": 9.000358037952024e-06, "loss": 0.0, "step": 2949 }, { "epoch": 0.19011406844106463, "grad_norm": 0.1084319956272911, "learning_rate": 8.999641962047978e-06, "loss": 0.0008, "step": 2950 }, { "epoch": 0.1901785138879938, "grad_norm": 0.01166456268566552, "learning_rate": 8.998925886143933e-06, "loss": 0.0, "step": 2951 }, { "epoch": 0.190242959334923, "grad_norm": 0.009255105740318771, "learning_rate": 8.998209810239887e-06, "loss": 0.0, "step": 2952 }, { "epoch": 0.19030740478185215, "grad_norm": 0.046569376752111036, "learning_rate": 8.997493734335841e-06, "loss": 0.0001, "step": 2953 }, { "epoch": 0.19037185022878134, "grad_norm": 0.01405700880200295, "learning_rate": 8.996777658431794e-06, "loss": 0.0001, "step": 2954 }, { "epoch": 0.19043629567571052, "grad_norm": 0.01268672480723258, "learning_rate": 8.996061582527748e-06, "loss": 0.0, "step": 2955 }, { "epoch": 0.19050074112263968, "grad_norm": 0.002450229581765563, "learning_rate": 8.995345506623702e-06, "loss": 0.0, "step": 2956 }, { "epoch": 0.19056518656956886, "grad_norm": 0.0003489092380548279, "learning_rate": 8.994629430719657e-06, "loss": 0.0, "step": 2957 }, { "epoch": 0.19062963201649805, "grad_norm": 0.0029454670887810084, "learning_rate": 8.993913354815611e-06, "loss": 0.0, "step": 2958 }, { "epoch": 0.1906940774634272, "grad_norm": 0.1718268218574252, "learning_rate": 8.993197278911565e-06, "loss": 0.0018, "step": 2959 }, { "epoch": 0.1907585229103564, "grad_norm": 0.11376702629245902, "learning_rate": 8.99248120300752e-06, "loss": 0.0003, "step": 2960 }, { "epoch": 0.19082296835728554, "grad_norm": 0.0007795738133778679, "learning_rate": 8.991765127103474e-06, "loss": 0.0, "step": 2961 }, { "epoch": 0.19088741380421473, "grad_norm": 1.4979985713009711, "learning_rate": 8.991049051199428e-06, "loss": 0.0109, "step": 2962 }, { "epoch": 0.1909518592511439, "grad_norm": 0.8103406896663958, "learning_rate": 8.990332975295382e-06, "loss": 0.0002, "step": 2963 }, { "epoch": 0.19101630469807307, "grad_norm": 0.02842256055845789, "learning_rate": 8.989616899391335e-06, "loss": 0.0001, "step": 2964 }, { "epoch": 0.19108075014500225, "grad_norm": 0.020678844417695182, "learning_rate": 8.98890082348729e-06, "loss": 0.0002, "step": 2965 }, { "epoch": 0.19114519559193144, "grad_norm": 0.0028593214061483157, "learning_rate": 8.988184747583245e-06, "loss": 0.0, "step": 2966 }, { "epoch": 0.1912096410388606, "grad_norm": 0.009822979528012175, "learning_rate": 8.9874686716792e-06, "loss": 0.0, "step": 2967 }, { "epoch": 0.19127408648578978, "grad_norm": 0.022687103734926797, "learning_rate": 8.986752595775154e-06, "loss": 0.0001, "step": 2968 }, { "epoch": 0.19133853193271896, "grad_norm": 0.06149599999979963, "learning_rate": 8.986036519871108e-06, "loss": 0.0002, "step": 2969 }, { "epoch": 0.19140297737964812, "grad_norm": 1.2065422771333003, "learning_rate": 8.98532044396706e-06, "loss": 0.0034, "step": 2970 }, { "epoch": 0.1914674228265773, "grad_norm": 0.018639562538012196, "learning_rate": 8.984604368063015e-06, "loss": 0.0, "step": 2971 }, { "epoch": 0.1915318682735065, "grad_norm": 0.01474286627896713, "learning_rate": 8.98388829215897e-06, "loss": 0.0001, "step": 2972 }, { "epoch": 0.19159631372043565, "grad_norm": 1.5631704036642082, "learning_rate": 8.983172216254924e-06, "loss": 0.0107, "step": 2973 }, { "epoch": 0.19166075916736483, "grad_norm": 0.00574327599457403, "learning_rate": 8.982456140350878e-06, "loss": 0.0001, "step": 2974 }, { "epoch": 0.191725204614294, "grad_norm": 0.23094776449504884, "learning_rate": 8.981740064446832e-06, "loss": 0.0028, "step": 2975 }, { "epoch": 0.19178965006122317, "grad_norm": 0.0028454445462574494, "learning_rate": 8.981023988542787e-06, "loss": 0.0, "step": 2976 }, { "epoch": 0.19185409550815236, "grad_norm": 0.004666714035464535, "learning_rate": 8.980307912638741e-06, "loss": 0.0, "step": 2977 }, { "epoch": 0.1919185409550815, "grad_norm": 0.004573597119030054, "learning_rate": 8.979591836734695e-06, "loss": 0.0, "step": 2978 }, { "epoch": 0.1919829864020107, "grad_norm": 0.011798074217064835, "learning_rate": 8.97887576083065e-06, "loss": 0.0, "step": 2979 }, { "epoch": 0.19204743184893988, "grad_norm": 0.0004367517808790769, "learning_rate": 8.978159684926602e-06, "loss": 0.0, "step": 2980 }, { "epoch": 0.19211187729586904, "grad_norm": 0.0033874185502569326, "learning_rate": 8.977443609022556e-06, "loss": 0.0, "step": 2981 }, { "epoch": 0.19217632274279822, "grad_norm": 0.003283052887576775, "learning_rate": 8.97672753311851e-06, "loss": 0.0, "step": 2982 }, { "epoch": 0.1922407681897274, "grad_norm": 0.19926155789775196, "learning_rate": 8.976011457214465e-06, "loss": 0.0006, "step": 2983 }, { "epoch": 0.19230521363665656, "grad_norm": 0.004192863160750905, "learning_rate": 8.97529538131042e-06, "loss": 0.0, "step": 2984 }, { "epoch": 0.19236965908358575, "grad_norm": 0.00110007929077268, "learning_rate": 8.974579305406374e-06, "loss": 0.0, "step": 2985 }, { "epoch": 0.19243410453051493, "grad_norm": 0.039425082084022116, "learning_rate": 8.973863229502328e-06, "loss": 0.0001, "step": 2986 }, { "epoch": 0.1924985499774441, "grad_norm": 0.00043550038157316295, "learning_rate": 8.973147153598282e-06, "loss": 0.0, "step": 2987 }, { "epoch": 0.19256299542437327, "grad_norm": 0.05183727983491678, "learning_rate": 8.972431077694236e-06, "loss": 0.0016, "step": 2988 }, { "epoch": 0.19262744087130243, "grad_norm": 0.15039922020860932, "learning_rate": 8.97171500179019e-06, "loss": 0.0019, "step": 2989 }, { "epoch": 0.19269188631823161, "grad_norm": 0.0006762037012585999, "learning_rate": 8.970998925886145e-06, "loss": 0.0, "step": 2990 }, { "epoch": 0.1927563317651608, "grad_norm": 0.10825269799498864, "learning_rate": 8.9702828499821e-06, "loss": 0.0011, "step": 2991 }, { "epoch": 0.19282077721208996, "grad_norm": 0.3270196347969351, "learning_rate": 8.969566774078054e-06, "loss": 0.0011, "step": 2992 }, { "epoch": 0.19288522265901914, "grad_norm": 0.007802911038854596, "learning_rate": 8.968850698174008e-06, "loss": 0.0, "step": 2993 }, { "epoch": 0.19294966810594832, "grad_norm": 0.0008345035135385656, "learning_rate": 8.968134622269962e-06, "loss": 0.0, "step": 2994 }, { "epoch": 0.19301411355287748, "grad_norm": 0.08876152359166628, "learning_rate": 8.967418546365917e-06, "loss": 0.0001, "step": 2995 }, { "epoch": 0.19307855899980667, "grad_norm": 0.1668909055741231, "learning_rate": 8.96670247046187e-06, "loss": 0.0005, "step": 2996 }, { "epoch": 0.19314300444673585, "grad_norm": 0.060401569099000595, "learning_rate": 8.965986394557823e-06, "loss": 0.0003, "step": 2997 }, { "epoch": 0.193207449893665, "grad_norm": 0.0018142782884662798, "learning_rate": 8.965270318653778e-06, "loss": 0.0, "step": 2998 }, { "epoch": 0.1932718953405942, "grad_norm": 0.01917432223078058, "learning_rate": 8.964554242749732e-06, "loss": 0.0001, "step": 2999 }, { "epoch": 0.19333634078752338, "grad_norm": 0.00013875258473411826, "learning_rate": 8.963838166845686e-06, "loss": 0.0, "step": 3000 }, { "epoch": 0.19340078623445253, "grad_norm": 0.11576714445206707, "learning_rate": 8.96312209094164e-06, "loss": 0.0028, "step": 3001 }, { "epoch": 0.19346523168138172, "grad_norm": 0.6482484840760784, "learning_rate": 8.962406015037595e-06, "loss": 0.0014, "step": 3002 }, { "epoch": 0.19352967712831087, "grad_norm": 0.6482484840760784, "learning_rate": 8.962406015037595e-06, "loss": 0.001, "step": 3003 }, { "epoch": 0.19359412257524006, "grad_norm": 0.0007944590031801373, "learning_rate": 8.96168993913355e-06, "loss": 0.0, "step": 3004 }, { "epoch": 0.19365856802216924, "grad_norm": 0.003729115719289283, "learning_rate": 8.960973863229504e-06, "loss": 0.0, "step": 3005 }, { "epoch": 0.1937230134690984, "grad_norm": 0.0008843086516153837, "learning_rate": 8.960257787325458e-06, "loss": 0.0, "step": 3006 }, { "epoch": 0.19378745891602758, "grad_norm": 0.21990022366392237, "learning_rate": 8.95954171142141e-06, "loss": 0.0021, "step": 3007 }, { "epoch": 0.19385190436295677, "grad_norm": 0.15541553592994967, "learning_rate": 8.958825635517365e-06, "loss": 0.0018, "step": 3008 }, { "epoch": 0.19391634980988592, "grad_norm": 0.02511120376754912, "learning_rate": 8.958109559613319e-06, "loss": 0.0001, "step": 3009 }, { "epoch": 0.1939807952568151, "grad_norm": 0.001173436761283878, "learning_rate": 8.957393483709273e-06, "loss": 0.0, "step": 3010 }, { "epoch": 0.1940452407037443, "grad_norm": 0.014118849072544383, "learning_rate": 8.956677407805228e-06, "loss": 0.0001, "step": 3011 }, { "epoch": 0.19410968615067345, "grad_norm": 0.02563359621196255, "learning_rate": 8.955961331901182e-06, "loss": 0.0015, "step": 3012 }, { "epoch": 0.19417413159760263, "grad_norm": 0.0006445896627271484, "learning_rate": 8.955245255997136e-06, "loss": 0.0, "step": 3013 }, { "epoch": 0.1942385770445318, "grad_norm": 0.008495704221291287, "learning_rate": 8.95452918009309e-06, "loss": 0.0001, "step": 3014 }, { "epoch": 0.19430302249146097, "grad_norm": 0.9487614727206138, "learning_rate": 8.953813104189045e-06, "loss": 0.0081, "step": 3015 }, { "epoch": 0.19436746793839016, "grad_norm": 0.0034573415451793093, "learning_rate": 8.953097028285e-06, "loss": 0.0001, "step": 3016 }, { "epoch": 0.19443191338531932, "grad_norm": 0.024194100837835554, "learning_rate": 8.952380952380953e-06, "loss": 0.0002, "step": 3017 }, { "epoch": 0.1944963588322485, "grad_norm": 0.08266217977115524, "learning_rate": 8.951664876476908e-06, "loss": 0.0002, "step": 3018 }, { "epoch": 0.19456080427917768, "grad_norm": 0.05253149672988689, "learning_rate": 8.950948800572862e-06, "loss": 0.0001, "step": 3019 }, { "epoch": 0.19462524972610684, "grad_norm": 0.26598516057325405, "learning_rate": 8.950232724668816e-06, "loss": 0.0012, "step": 3020 }, { "epoch": 0.19468969517303603, "grad_norm": 0.001171713922511211, "learning_rate": 8.94951664876477e-06, "loss": 0.0, "step": 3021 }, { "epoch": 0.1947541406199652, "grad_norm": 0.006140044099780373, "learning_rate": 8.948800572860725e-06, "loss": 0.0, "step": 3022 }, { "epoch": 0.19481858606689437, "grad_norm": 0.17568814673045194, "learning_rate": 8.948084496956678e-06, "loss": 0.0008, "step": 3023 }, { "epoch": 0.19488303151382355, "grad_norm": 0.0013122715834712388, "learning_rate": 8.947368421052632e-06, "loss": 0.0, "step": 3024 }, { "epoch": 0.19494747696075274, "grad_norm": 0.004215928276184871, "learning_rate": 8.946652345148586e-06, "loss": 0.0, "step": 3025 }, { "epoch": 0.1950119224076819, "grad_norm": 0.17655713718844582, "learning_rate": 8.94593626924454e-06, "loss": 0.0013, "step": 3026 }, { "epoch": 0.19507636785461108, "grad_norm": 0.0023065439930651447, "learning_rate": 8.945220193340495e-06, "loss": 0.0, "step": 3027 }, { "epoch": 0.19514081330154023, "grad_norm": 0.00540466461391632, "learning_rate": 8.944504117436449e-06, "loss": 0.0, "step": 3028 }, { "epoch": 0.19520525874846942, "grad_norm": 0.0007297223900245063, "learning_rate": 8.943788041532403e-06, "loss": 0.0, "step": 3029 }, { "epoch": 0.1952697041953986, "grad_norm": 0.05844247682053747, "learning_rate": 8.943071965628358e-06, "loss": 0.0003, "step": 3030 }, { "epoch": 0.19533414964232776, "grad_norm": 0.31681117297173256, "learning_rate": 8.942355889724312e-06, "loss": 0.001, "step": 3031 }, { "epoch": 0.19539859508925694, "grad_norm": 0.28899940884754083, "learning_rate": 8.941639813820265e-06, "loss": 0.0007, "step": 3032 }, { "epoch": 0.19546304053618613, "grad_norm": 0.0033367080062562983, "learning_rate": 8.940923737916219e-06, "loss": 0.0, "step": 3033 }, { "epoch": 0.19552748598311528, "grad_norm": 0.09525066093311885, "learning_rate": 8.940207662012173e-06, "loss": 0.0007, "step": 3034 }, { "epoch": 0.19559193143004447, "grad_norm": 0.00012517123703986328, "learning_rate": 8.939491586108127e-06, "loss": 0.0, "step": 3035 }, { "epoch": 0.19565637687697365, "grad_norm": 0.05815377551468865, "learning_rate": 8.938775510204082e-06, "loss": 0.0003, "step": 3036 }, { "epoch": 0.1957208223239028, "grad_norm": 0.0007073388646762897, "learning_rate": 8.938059434300038e-06, "loss": 0.0, "step": 3037 }, { "epoch": 0.195785267770832, "grad_norm": 0.015940901006851475, "learning_rate": 8.937343358395992e-06, "loss": 0.0001, "step": 3038 }, { "epoch": 0.19584971321776118, "grad_norm": 0.0006422838104322162, "learning_rate": 8.936627282491945e-06, "loss": 0.0, "step": 3039 }, { "epoch": 0.19591415866469034, "grad_norm": 0.010320363651081372, "learning_rate": 8.935911206587899e-06, "loss": 0.0, "step": 3040 }, { "epoch": 0.19597860411161952, "grad_norm": 0.016090234798197458, "learning_rate": 8.935195130683853e-06, "loss": 0.0001, "step": 3041 }, { "epoch": 0.19604304955854868, "grad_norm": 0.35970697411549507, "learning_rate": 8.934479054779808e-06, "loss": 0.0013, "step": 3042 }, { "epoch": 0.19610749500547786, "grad_norm": 0.0001557431387520329, "learning_rate": 8.933762978875762e-06, "loss": 0.0, "step": 3043 }, { "epoch": 0.19617194045240705, "grad_norm": 0.001276511107407491, "learning_rate": 8.933046902971716e-06, "loss": 0.0, "step": 3044 }, { "epoch": 0.1962363858993362, "grad_norm": 0.0005064007813871364, "learning_rate": 8.93233082706767e-06, "loss": 0.0, "step": 3045 }, { "epoch": 0.1963008313462654, "grad_norm": 0.006093401575577427, "learning_rate": 8.931614751163625e-06, "loss": 0.0001, "step": 3046 }, { "epoch": 0.19636527679319457, "grad_norm": 0.11769669691171146, "learning_rate": 8.930898675259579e-06, "loss": 0.0003, "step": 3047 }, { "epoch": 0.19642972224012373, "grad_norm": 0.0010719782400106074, "learning_rate": 8.930182599355532e-06, "loss": 0.0, "step": 3048 }, { "epoch": 0.1964941676870529, "grad_norm": 0.0005714457417295161, "learning_rate": 8.929466523451486e-06, "loss": 0.0, "step": 3049 }, { "epoch": 0.1965586131339821, "grad_norm": 0.4581438037557388, "learning_rate": 8.92875044754744e-06, "loss": 0.0014, "step": 3050 }, { "epoch": 0.19662305858091125, "grad_norm": 0.0068193888884800335, "learning_rate": 8.928034371643395e-06, "loss": 0.0001, "step": 3051 }, { "epoch": 0.19668750402784044, "grad_norm": 0.02994317793845591, "learning_rate": 8.927318295739349e-06, "loss": 0.0, "step": 3052 }, { "epoch": 0.1967519494747696, "grad_norm": 0.014345473768226343, "learning_rate": 8.926602219835303e-06, "loss": 0.0001, "step": 3053 }, { "epoch": 0.19681639492169878, "grad_norm": 0.0005049755597789164, "learning_rate": 8.925886143931257e-06, "loss": 0.0, "step": 3054 }, { "epoch": 0.19688084036862796, "grad_norm": 0.2326998450872154, "learning_rate": 8.925170068027212e-06, "loss": 0.0002, "step": 3055 }, { "epoch": 0.19694528581555712, "grad_norm": 0.2033664753568645, "learning_rate": 8.924453992123166e-06, "loss": 0.0006, "step": 3056 }, { "epoch": 0.1970097312624863, "grad_norm": 0.06406544280271197, "learning_rate": 8.92373791621912e-06, "loss": 0.0002, "step": 3057 }, { "epoch": 0.1970741767094155, "grad_norm": 0.01635489248533111, "learning_rate": 8.923021840315073e-06, "loss": 0.0001, "step": 3058 }, { "epoch": 0.19713862215634465, "grad_norm": 0.0020399615266398115, "learning_rate": 8.922305764411027e-06, "loss": 0.0, "step": 3059 }, { "epoch": 0.19720306760327383, "grad_norm": 0.007834388015339783, "learning_rate": 8.921589688506982e-06, "loss": 0.0, "step": 3060 }, { "epoch": 0.197267513050203, "grad_norm": 0.0030011656472259296, "learning_rate": 8.920873612602938e-06, "loss": 0.0, "step": 3061 }, { "epoch": 0.19733195849713217, "grad_norm": 0.010005728413372437, "learning_rate": 8.920157536698892e-06, "loss": 0.0001, "step": 3062 }, { "epoch": 0.19739640394406135, "grad_norm": 0.0002145815971270866, "learning_rate": 8.919441460794846e-06, "loss": 0.0, "step": 3063 }, { "epoch": 0.19746084939099054, "grad_norm": 0.11205250164730646, "learning_rate": 8.918725384890799e-06, "loss": 0.0019, "step": 3064 }, { "epoch": 0.1975252948379197, "grad_norm": 0.02060544100981016, "learning_rate": 8.918009308986753e-06, "loss": 0.0, "step": 3065 }, { "epoch": 0.19758974028484888, "grad_norm": 0.001616753284672874, "learning_rate": 8.917293233082707e-06, "loss": 0.0, "step": 3066 }, { "epoch": 0.19765418573177804, "grad_norm": 0.005112549724463105, "learning_rate": 8.916577157178662e-06, "loss": 0.0, "step": 3067 }, { "epoch": 0.19771863117870722, "grad_norm": 0.003087953257347784, "learning_rate": 8.915861081274616e-06, "loss": 0.0, "step": 3068 }, { "epoch": 0.1977830766256364, "grad_norm": 0.012052220127453446, "learning_rate": 8.91514500537057e-06, "loss": 0.0001, "step": 3069 }, { "epoch": 0.19784752207256556, "grad_norm": 0.0009411924538636635, "learning_rate": 8.914428929466525e-06, "loss": 0.0, "step": 3070 }, { "epoch": 0.19791196751949475, "grad_norm": 0.018949288941459586, "learning_rate": 8.913712853562479e-06, "loss": 0.0002, "step": 3071 }, { "epoch": 0.19797641296642393, "grad_norm": 0.011494391843931702, "learning_rate": 8.912996777658433e-06, "loss": 0.0, "step": 3072 }, { "epoch": 0.1980408584133531, "grad_norm": 0.0031116002090642793, "learning_rate": 8.912280701754387e-06, "loss": 0.0, "step": 3073 }, { "epoch": 0.19810530386028227, "grad_norm": 0.022021763285966188, "learning_rate": 8.91156462585034e-06, "loss": 0.0001, "step": 3074 }, { "epoch": 0.19816974930721146, "grad_norm": 0.007751726489644879, "learning_rate": 8.910848549946294e-06, "loss": 0.0, "step": 3075 }, { "epoch": 0.1982341947541406, "grad_norm": 0.0032938459913306323, "learning_rate": 8.910132474042249e-06, "loss": 0.0, "step": 3076 }, { "epoch": 0.1982986402010698, "grad_norm": 0.026446706021640256, "learning_rate": 8.909416398138203e-06, "loss": 0.0003, "step": 3077 }, { "epoch": 0.19836308564799898, "grad_norm": 0.0027147322304744165, "learning_rate": 8.908700322234157e-06, "loss": 0.0, "step": 3078 }, { "epoch": 0.19842753109492814, "grad_norm": 0.03446864700507342, "learning_rate": 8.907984246330112e-06, "loss": 0.0, "step": 3079 }, { "epoch": 0.19849197654185732, "grad_norm": 0.002107508255067804, "learning_rate": 8.907268170426066e-06, "loss": 0.0, "step": 3080 }, { "epoch": 0.19855642198878648, "grad_norm": 0.0015772077216705193, "learning_rate": 8.90655209452202e-06, "loss": 0.0, "step": 3081 }, { "epoch": 0.19862086743571566, "grad_norm": 0.0014874691914585742, "learning_rate": 8.905836018617974e-06, "loss": 0.0, "step": 3082 }, { "epoch": 0.19868531288264485, "grad_norm": 0.0007431047392924327, "learning_rate": 8.905119942713929e-06, "loss": 0.0, "step": 3083 }, { "epoch": 0.198749758329574, "grad_norm": 0.0018363159310733638, "learning_rate": 8.904403866809883e-06, "loss": 0.0, "step": 3084 }, { "epoch": 0.1988142037765032, "grad_norm": 0.0013460365521944676, "learning_rate": 8.903687790905837e-06, "loss": 0.0, "step": 3085 }, { "epoch": 0.19887864922343237, "grad_norm": 0.15260297166684111, "learning_rate": 8.902971715001792e-06, "loss": 0.0019, "step": 3086 }, { "epoch": 0.19894309467036153, "grad_norm": 0.005164714457936164, "learning_rate": 8.902255639097746e-06, "loss": 0.0, "step": 3087 }, { "epoch": 0.19900754011729072, "grad_norm": 0.005644038945726389, "learning_rate": 8.9015395631937e-06, "loss": 0.0, "step": 3088 }, { "epoch": 0.1990719855642199, "grad_norm": 0.05408689312074812, "learning_rate": 8.900823487289655e-06, "loss": 0.0005, "step": 3089 }, { "epoch": 0.19913643101114906, "grad_norm": 0.016265788623832402, "learning_rate": 8.900107411385607e-06, "loss": 0.0, "step": 3090 }, { "epoch": 0.19920087645807824, "grad_norm": 0.2519519865025074, "learning_rate": 8.899391335481561e-06, "loss": 0.0026, "step": 3091 }, { "epoch": 0.1992653219050074, "grad_norm": 0.019179351740819434, "learning_rate": 8.898675259577516e-06, "loss": 0.0003, "step": 3092 }, { "epoch": 0.19932976735193658, "grad_norm": 0.00027986388418838086, "learning_rate": 8.89795918367347e-06, "loss": 0.0, "step": 3093 }, { "epoch": 0.19939421279886577, "grad_norm": 0.015353708675322387, "learning_rate": 8.897243107769424e-06, "loss": 0.0, "step": 3094 }, { "epoch": 0.19945865824579492, "grad_norm": 0.0026218758351446818, "learning_rate": 8.896527031865379e-06, "loss": 0.0, "step": 3095 }, { "epoch": 0.1995231036927241, "grad_norm": 0.0033979739761200935, "learning_rate": 8.895810955961333e-06, "loss": 0.0, "step": 3096 }, { "epoch": 0.1995875491396533, "grad_norm": 0.18854867217886892, "learning_rate": 8.895094880057287e-06, "loss": 0.0007, "step": 3097 }, { "epoch": 0.19965199458658245, "grad_norm": 0.20573892851899744, "learning_rate": 8.894378804153241e-06, "loss": 0.0006, "step": 3098 }, { "epoch": 0.19971644003351163, "grad_norm": 0.497828072749145, "learning_rate": 8.893662728249196e-06, "loss": 0.0014, "step": 3099 }, { "epoch": 0.19978088548044082, "grad_norm": 0.00453579094805275, "learning_rate": 8.892946652345148e-06, "loss": 0.0, "step": 3100 }, { "epoch": 0.19984533092736997, "grad_norm": 0.005429255738304247, "learning_rate": 8.892230576441103e-06, "loss": 0.0, "step": 3101 }, { "epoch": 0.19990977637429916, "grad_norm": 0.0028107191266686154, "learning_rate": 8.891514500537057e-06, "loss": 0.0, "step": 3102 }, { "epoch": 0.19997422182122834, "grad_norm": 0.001090017515992669, "learning_rate": 8.890798424633011e-06, "loss": 0.0, "step": 3103 }, { "epoch": 0.2000386672681575, "grad_norm": 0.005462728398524046, "learning_rate": 8.890082348728966e-06, "loss": 0.0001, "step": 3104 }, { "epoch": 0.20010311271508668, "grad_norm": 0.21584340658087367, "learning_rate": 8.88936627282492e-06, "loss": 0.0011, "step": 3105 }, { "epoch": 0.20016755816201584, "grad_norm": 0.026409045302834108, "learning_rate": 8.888650196920874e-06, "loss": 0.0001, "step": 3106 }, { "epoch": 0.20023200360894503, "grad_norm": 0.00022268280929360672, "learning_rate": 8.887934121016828e-06, "loss": 0.0, "step": 3107 }, { "epoch": 0.2002964490558742, "grad_norm": 0.7561233808588724, "learning_rate": 8.887218045112783e-06, "loss": 0.0056, "step": 3108 }, { "epoch": 0.20036089450280337, "grad_norm": 0.0008315596068303258, "learning_rate": 8.886501969208737e-06, "loss": 0.0, "step": 3109 }, { "epoch": 0.20042533994973255, "grad_norm": 0.03906717868442174, "learning_rate": 8.885785893304691e-06, "loss": 0.0001, "step": 3110 }, { "epoch": 0.20048978539666173, "grad_norm": 0.004649140480185962, "learning_rate": 8.885069817400646e-06, "loss": 0.0, "step": 3111 }, { "epoch": 0.2005542308435909, "grad_norm": 7.197497173441826e-05, "learning_rate": 8.8843537414966e-06, "loss": 0.0, "step": 3112 }, { "epoch": 0.20061867629052008, "grad_norm": 0.01971261560067173, "learning_rate": 8.883637665592554e-06, "loss": 0.0, "step": 3113 }, { "epoch": 0.20068312173744926, "grad_norm": 0.008505374211518214, "learning_rate": 8.882921589688509e-06, "loss": 0.0, "step": 3114 }, { "epoch": 0.20074756718437842, "grad_norm": 0.0005514684611620498, "learning_rate": 8.882205513784463e-06, "loss": 0.0, "step": 3115 }, { "epoch": 0.2008120126313076, "grad_norm": 0.3043897714692487, "learning_rate": 8.881489437880415e-06, "loss": 0.0014, "step": 3116 }, { "epoch": 0.20087645807823679, "grad_norm": 0.0006227491004178154, "learning_rate": 8.88077336197637e-06, "loss": 0.0, "step": 3117 }, { "epoch": 0.20094090352516594, "grad_norm": 0.2678906919268471, "learning_rate": 8.880057286072324e-06, "loss": 0.0005, "step": 3118 }, { "epoch": 0.20100534897209513, "grad_norm": 0.015165462236630976, "learning_rate": 8.879341210168278e-06, "loss": 0.0, "step": 3119 }, { "epoch": 0.20106979441902428, "grad_norm": 2.0520360976719396, "learning_rate": 8.878625134264233e-06, "loss": 0.0165, "step": 3120 }, { "epoch": 0.20113423986595347, "grad_norm": 0.35805703099009856, "learning_rate": 8.877909058360187e-06, "loss": 0.0012, "step": 3121 }, { "epoch": 0.20119868531288265, "grad_norm": 0.17738543831285275, "learning_rate": 8.877192982456141e-06, "loss": 0.0019, "step": 3122 }, { "epoch": 0.2012631307598118, "grad_norm": 0.00017845454458807985, "learning_rate": 8.876476906552096e-06, "loss": 0.0, "step": 3123 }, { "epoch": 0.201327576206741, "grad_norm": 0.1073608615304861, "learning_rate": 8.87576083064805e-06, "loss": 0.0004, "step": 3124 }, { "epoch": 0.20139202165367018, "grad_norm": 0.0004145412500641842, "learning_rate": 8.875044754744002e-06, "loss": 0.0, "step": 3125 }, { "epoch": 0.20145646710059933, "grad_norm": 0.003809234403064291, "learning_rate": 8.874328678839957e-06, "loss": 0.0001, "step": 3126 }, { "epoch": 0.20152091254752852, "grad_norm": 0.00033783976806183987, "learning_rate": 8.873612602935911e-06, "loss": 0.0, "step": 3127 }, { "epoch": 0.2015853579944577, "grad_norm": 0.013849292965756455, "learning_rate": 8.872896527031865e-06, "loss": 0.0, "step": 3128 }, { "epoch": 0.20164980344138686, "grad_norm": 0.045313850949965165, "learning_rate": 8.87218045112782e-06, "loss": 0.0002, "step": 3129 }, { "epoch": 0.20171424888831604, "grad_norm": 0.236568006038209, "learning_rate": 8.871464375223774e-06, "loss": 0.0003, "step": 3130 }, { "epoch": 0.2017786943352452, "grad_norm": 0.0038200420844738477, "learning_rate": 8.87074829931973e-06, "loss": 0.0, "step": 3131 }, { "epoch": 0.20184313978217439, "grad_norm": 0.014478036074284993, "learning_rate": 8.870032223415683e-06, "loss": 0.0001, "step": 3132 }, { "epoch": 0.20190758522910357, "grad_norm": 0.015151909350715245, "learning_rate": 8.869316147511637e-06, "loss": 0.0, "step": 3133 }, { "epoch": 0.20197203067603273, "grad_norm": 0.009762932361485831, "learning_rate": 8.868600071607591e-06, "loss": 0.0001, "step": 3134 }, { "epoch": 0.2020364761229619, "grad_norm": 0.00021053239735446382, "learning_rate": 8.867883995703545e-06, "loss": 0.0, "step": 3135 }, { "epoch": 0.2021009215698911, "grad_norm": 0.44029179548069913, "learning_rate": 8.8671679197995e-06, "loss": 0.0016, "step": 3136 }, { "epoch": 0.20216536701682025, "grad_norm": 0.0019125566904534606, "learning_rate": 8.866451843895454e-06, "loss": 0.0, "step": 3137 }, { "epoch": 0.20222981246374944, "grad_norm": 0.0030594121784513393, "learning_rate": 8.865735767991408e-06, "loss": 0.0, "step": 3138 }, { "epoch": 0.20229425791067862, "grad_norm": 0.002322376722079305, "learning_rate": 8.865019692087363e-06, "loss": 0.0, "step": 3139 }, { "epoch": 0.20235870335760778, "grad_norm": 0.025830919381848335, "learning_rate": 8.864303616183317e-06, "loss": 0.0001, "step": 3140 }, { "epoch": 0.20242314880453696, "grad_norm": 0.004267439275289735, "learning_rate": 8.86358754027927e-06, "loss": 0.0, "step": 3141 }, { "epoch": 0.20248759425146615, "grad_norm": 0.0005659775357358226, "learning_rate": 8.862871464375224e-06, "loss": 0.0, "step": 3142 }, { "epoch": 0.2025520396983953, "grad_norm": 0.03815054914874814, "learning_rate": 8.862155388471178e-06, "loss": 0.0003, "step": 3143 }, { "epoch": 0.2026164851453245, "grad_norm": 0.3565567469487967, "learning_rate": 8.861439312567132e-06, "loss": 0.0031, "step": 3144 }, { "epoch": 0.20268093059225364, "grad_norm": 0.040210932144915466, "learning_rate": 8.860723236663087e-06, "loss": 0.0002, "step": 3145 }, { "epoch": 0.20274537603918283, "grad_norm": 0.41041854464235217, "learning_rate": 8.860007160759041e-06, "loss": 0.0008, "step": 3146 }, { "epoch": 0.202809821486112, "grad_norm": 0.25541055057218365, "learning_rate": 8.859291084854995e-06, "loss": 0.001, "step": 3147 }, { "epoch": 0.20287426693304117, "grad_norm": 0.010501430279528583, "learning_rate": 8.85857500895095e-06, "loss": 0.0001, "step": 3148 }, { "epoch": 0.20293871237997035, "grad_norm": 0.003696393179009786, "learning_rate": 8.857858933046904e-06, "loss": 0.0, "step": 3149 }, { "epoch": 0.20300315782689954, "grad_norm": 0.003631618092946609, "learning_rate": 8.857142857142858e-06, "loss": 0.0, "step": 3150 }, { "epoch": 0.2030676032738287, "grad_norm": 0.00258595354685201, "learning_rate": 8.856426781238811e-06, "loss": 0.0, "step": 3151 }, { "epoch": 0.20313204872075788, "grad_norm": 0.0008821024753706213, "learning_rate": 8.855710705334765e-06, "loss": 0.0, "step": 3152 }, { "epoch": 0.20319649416768706, "grad_norm": 0.3704561075782924, "learning_rate": 8.85499462943072e-06, "loss": 0.0043, "step": 3153 }, { "epoch": 0.20326093961461622, "grad_norm": 0.014684063583619683, "learning_rate": 8.854278553526675e-06, "loss": 0.0, "step": 3154 }, { "epoch": 0.2033253850615454, "grad_norm": 0.006794537516212787, "learning_rate": 8.85356247762263e-06, "loss": 0.0, "step": 3155 }, { "epoch": 0.2033898305084746, "grad_norm": 1.0445839023372976, "learning_rate": 8.852846401718584e-06, "loss": 0.0022, "step": 3156 }, { "epoch": 0.20345427595540375, "grad_norm": 0.004375746475512265, "learning_rate": 8.852130325814537e-06, "loss": 0.0, "step": 3157 }, { "epoch": 0.20351872140233293, "grad_norm": 0.15032337592505507, "learning_rate": 8.851414249910491e-06, "loss": 0.0002, "step": 3158 }, { "epoch": 0.2035831668492621, "grad_norm": 0.0040612406359037175, "learning_rate": 8.850698174006445e-06, "loss": 0.0, "step": 3159 }, { "epoch": 0.20364761229619127, "grad_norm": 0.0018185464603422453, "learning_rate": 8.8499820981024e-06, "loss": 0.0, "step": 3160 }, { "epoch": 0.20371205774312046, "grad_norm": 0.11170526669906726, "learning_rate": 8.849266022198354e-06, "loss": 0.0005, "step": 3161 }, { "epoch": 0.2037765031900496, "grad_norm": 0.1659282005347964, "learning_rate": 8.848549946294308e-06, "loss": 0.0016, "step": 3162 }, { "epoch": 0.2038409486369788, "grad_norm": 0.016492687346802256, "learning_rate": 8.847833870390262e-06, "loss": 0.0001, "step": 3163 }, { "epoch": 0.20390539408390798, "grad_norm": 0.9390481563748461, "learning_rate": 8.847117794486217e-06, "loss": 0.0031, "step": 3164 }, { "epoch": 0.20396983953083714, "grad_norm": 0.0022533603744609204, "learning_rate": 8.846401718582171e-06, "loss": 0.0, "step": 3165 }, { "epoch": 0.20403428497776632, "grad_norm": 0.07041904205568279, "learning_rate": 8.845685642678125e-06, "loss": 0.0001, "step": 3166 }, { "epoch": 0.2040987304246955, "grad_norm": 0.2668981613989854, "learning_rate": 8.844969566774078e-06, "loss": 0.0052, "step": 3167 }, { "epoch": 0.20416317587162466, "grad_norm": 0.027716524548335512, "learning_rate": 8.844253490870032e-06, "loss": 0.0, "step": 3168 }, { "epoch": 0.20422762131855385, "grad_norm": 0.003601240274471943, "learning_rate": 8.843537414965987e-06, "loss": 0.0, "step": 3169 }, { "epoch": 0.20429206676548303, "grad_norm": 0.012017656279619291, "learning_rate": 8.84282133906194e-06, "loss": 0.0, "step": 3170 }, { "epoch": 0.2043565122124122, "grad_norm": 0.02095705244991158, "learning_rate": 8.842105263157895e-06, "loss": 0.0, "step": 3171 }, { "epoch": 0.20442095765934137, "grad_norm": 0.040138181546046925, "learning_rate": 8.84138918725385e-06, "loss": 0.0, "step": 3172 }, { "epoch": 0.20448540310627053, "grad_norm": 0.009901321405853671, "learning_rate": 8.840673111349804e-06, "loss": 0.0, "step": 3173 }, { "epoch": 0.20454984855319971, "grad_norm": 0.007040968629763532, "learning_rate": 8.839957035445758e-06, "loss": 0.0, "step": 3174 }, { "epoch": 0.2046142940001289, "grad_norm": 0.014848915765067815, "learning_rate": 8.839240959541712e-06, "loss": 0.0, "step": 3175 }, { "epoch": 0.20467873944705806, "grad_norm": 0.23642315018145912, "learning_rate": 8.838524883637667e-06, "loss": 0.0008, "step": 3176 }, { "epoch": 0.20474318489398724, "grad_norm": 0.0029172454636086998, "learning_rate": 8.83780880773362e-06, "loss": 0.0, "step": 3177 }, { "epoch": 0.20480763034091642, "grad_norm": 0.10179572652957916, "learning_rate": 8.837092731829575e-06, "loss": 0.0002, "step": 3178 }, { "epoch": 0.20487207578784558, "grad_norm": 0.24513314487750718, "learning_rate": 8.83637665592553e-06, "loss": 0.0041, "step": 3179 }, { "epoch": 0.20493652123477477, "grad_norm": 0.012036311721826174, "learning_rate": 8.835660580021484e-06, "loss": 0.0, "step": 3180 }, { "epoch": 0.20500096668170395, "grad_norm": 0.2696918479020835, "learning_rate": 8.834944504117438e-06, "loss": 0.0005, "step": 3181 }, { "epoch": 0.2050654121286331, "grad_norm": 0.0003936673405125266, "learning_rate": 8.834228428213392e-06, "loss": 0.0, "step": 3182 }, { "epoch": 0.2051298575755623, "grad_norm": 0.058986318316246796, "learning_rate": 8.833512352309345e-06, "loss": 0.0, "step": 3183 }, { "epoch": 0.20519430302249145, "grad_norm": 0.13323453158189, "learning_rate": 8.8327962764053e-06, "loss": 0.0013, "step": 3184 }, { "epoch": 0.20525874846942063, "grad_norm": 1.0706027674551744, "learning_rate": 8.832080200501254e-06, "loss": 0.0052, "step": 3185 }, { "epoch": 0.20532319391634982, "grad_norm": 0.04365196867916086, "learning_rate": 8.831364124597208e-06, "loss": 0.0001, "step": 3186 }, { "epoch": 0.20538763936327897, "grad_norm": 0.00400724211170088, "learning_rate": 8.830648048693162e-06, "loss": 0.0, "step": 3187 }, { "epoch": 0.20545208481020816, "grad_norm": 0.25079876728614287, "learning_rate": 8.829931972789117e-06, "loss": 0.0051, "step": 3188 }, { "epoch": 0.20551653025713734, "grad_norm": 0.6116119554626651, "learning_rate": 8.82921589688507e-06, "loss": 0.002, "step": 3189 }, { "epoch": 0.2055809757040665, "grad_norm": 0.9003768555752997, "learning_rate": 8.828499820981025e-06, "loss": 0.0081, "step": 3190 }, { "epoch": 0.20564542115099568, "grad_norm": 0.14866784067739025, "learning_rate": 8.82778374507698e-06, "loss": 0.0005, "step": 3191 }, { "epoch": 0.20570986659792487, "grad_norm": 0.004681531754656857, "learning_rate": 8.827067669172934e-06, "loss": 0.0001, "step": 3192 }, { "epoch": 0.20577431204485402, "grad_norm": 0.0016474154499777846, "learning_rate": 8.826351593268886e-06, "loss": 0.0, "step": 3193 }, { "epoch": 0.2058387574917832, "grad_norm": 0.7216092334678865, "learning_rate": 8.82563551736484e-06, "loss": 0.0047, "step": 3194 }, { "epoch": 0.2059032029387124, "grad_norm": 0.046842534027833414, "learning_rate": 8.824919441460795e-06, "loss": 0.0, "step": 3195 }, { "epoch": 0.20596764838564155, "grad_norm": 0.008319201874511975, "learning_rate": 8.82420336555675e-06, "loss": 0.0, "step": 3196 }, { "epoch": 0.20603209383257073, "grad_norm": 0.07627724121336131, "learning_rate": 8.823487289652704e-06, "loss": 0.0001, "step": 3197 }, { "epoch": 0.2060965392794999, "grad_norm": 0.04090758410395465, "learning_rate": 8.822771213748658e-06, "loss": 0.0001, "step": 3198 }, { "epoch": 0.20616098472642908, "grad_norm": 0.004577226024324245, "learning_rate": 8.822055137844612e-06, "loss": 0.0, "step": 3199 }, { "epoch": 0.20622543017335826, "grad_norm": 0.5695110071506763, "learning_rate": 8.821339061940566e-06, "loss": 0.0015, "step": 3200 }, { "epoch": 0.20628987562028742, "grad_norm": 0.20783832970335034, "learning_rate": 8.82062298603652e-06, "loss": 0.0007, "step": 3201 }, { "epoch": 0.2063543210672166, "grad_norm": 0.07589570440082365, "learning_rate": 8.819906910132475e-06, "loss": 0.0003, "step": 3202 }, { "epoch": 0.20641876651414579, "grad_norm": 0.025595976228796117, "learning_rate": 8.81919083422843e-06, "loss": 0.0001, "step": 3203 }, { "epoch": 0.20648321196107494, "grad_norm": 0.008649726581106711, "learning_rate": 8.818474758324384e-06, "loss": 0.0001, "step": 3204 }, { "epoch": 0.20654765740800413, "grad_norm": 0.019401712300670126, "learning_rate": 8.817758682420338e-06, "loss": 0.0001, "step": 3205 }, { "epoch": 0.2066121028549333, "grad_norm": 0.018649523956299447, "learning_rate": 8.817042606516292e-06, "loss": 0.0, "step": 3206 }, { "epoch": 0.20667654830186247, "grad_norm": 0.004175074407233036, "learning_rate": 8.816326530612247e-06, "loss": 0.0, "step": 3207 }, { "epoch": 0.20674099374879165, "grad_norm": 0.001537868439082998, "learning_rate": 8.8156104547082e-06, "loss": 0.0, "step": 3208 }, { "epoch": 0.20680543919572084, "grad_norm": 0.004367193650683198, "learning_rate": 8.814894378804153e-06, "loss": 0.0, "step": 3209 }, { "epoch": 0.20686988464265, "grad_norm": 0.0008591282411366313, "learning_rate": 8.814178302900108e-06, "loss": 0.0, "step": 3210 }, { "epoch": 0.20693433008957918, "grad_norm": 0.029196901412854644, "learning_rate": 8.813462226996062e-06, "loss": 0.0001, "step": 3211 }, { "epoch": 0.20699877553650833, "grad_norm": 0.014151467748943703, "learning_rate": 8.812746151092016e-06, "loss": 0.0, "step": 3212 }, { "epoch": 0.20706322098343752, "grad_norm": 0.0005395895476555126, "learning_rate": 8.81203007518797e-06, "loss": 0.0, "step": 3213 }, { "epoch": 0.2071276664303667, "grad_norm": 0.1063896173481837, "learning_rate": 8.811313999283925e-06, "loss": 0.0008, "step": 3214 }, { "epoch": 0.20719211187729586, "grad_norm": 0.04907908321108309, "learning_rate": 8.81059792337988e-06, "loss": 0.0001, "step": 3215 }, { "epoch": 0.20725655732422504, "grad_norm": 0.0009487456876486122, "learning_rate": 8.809881847475833e-06, "loss": 0.0, "step": 3216 }, { "epoch": 0.20732100277115423, "grad_norm": 0.012295272892583854, "learning_rate": 8.809165771571788e-06, "loss": 0.0, "step": 3217 }, { "epoch": 0.20738544821808338, "grad_norm": 0.003048664850237199, "learning_rate": 8.80844969566774e-06, "loss": 0.0, "step": 3218 }, { "epoch": 0.20744989366501257, "grad_norm": 0.007495688176306054, "learning_rate": 8.807733619763695e-06, "loss": 0.0001, "step": 3219 }, { "epoch": 0.20751433911194175, "grad_norm": 0.0035060227618026463, "learning_rate": 8.807017543859649e-06, "loss": 0.0, "step": 3220 }, { "epoch": 0.2075787845588709, "grad_norm": 0.03656436499960002, "learning_rate": 8.806301467955603e-06, "loss": 0.0002, "step": 3221 }, { "epoch": 0.2076432300058001, "grad_norm": 0.29249599460157977, "learning_rate": 8.805585392051558e-06, "loss": 0.0006, "step": 3222 }, { "epoch": 0.20770767545272925, "grad_norm": 0.011994526837623365, "learning_rate": 8.804869316147512e-06, "loss": 0.0, "step": 3223 }, { "epoch": 0.20777212089965844, "grad_norm": 0.004782641801488116, "learning_rate": 8.804153240243468e-06, "loss": 0.0, "step": 3224 }, { "epoch": 0.20783656634658762, "grad_norm": 0.04384486055700967, "learning_rate": 8.80343716433942e-06, "loss": 0.0001, "step": 3225 }, { "epoch": 0.20790101179351678, "grad_norm": 0.0033315971745620817, "learning_rate": 8.802721088435375e-06, "loss": 0.0, "step": 3226 }, { "epoch": 0.20796545724044596, "grad_norm": 0.016471005833125225, "learning_rate": 8.802005012531329e-06, "loss": 0.0, "step": 3227 }, { "epoch": 0.20802990268737515, "grad_norm": 0.008197529248738967, "learning_rate": 8.801288936627283e-06, "loss": 0.0001, "step": 3228 }, { "epoch": 0.2080943481343043, "grad_norm": 0.012575679376179708, "learning_rate": 8.800572860723238e-06, "loss": 0.0, "step": 3229 }, { "epoch": 0.2081587935812335, "grad_norm": 0.0986802487508774, "learning_rate": 8.799856784819192e-06, "loss": 0.0021, "step": 3230 }, { "epoch": 0.20822323902816267, "grad_norm": 0.0008527289712501518, "learning_rate": 8.799140708915146e-06, "loss": 0.0, "step": 3231 }, { "epoch": 0.20828768447509183, "grad_norm": 0.9886804912648364, "learning_rate": 8.7984246330111e-06, "loss": 0.0021, "step": 3232 }, { "epoch": 0.208352129922021, "grad_norm": 0.016484567955490047, "learning_rate": 8.797708557107055e-06, "loss": 0.0002, "step": 3233 }, { "epoch": 0.2084165753689502, "grad_norm": 0.28071698703188924, "learning_rate": 8.796992481203007e-06, "loss": 0.0022, "step": 3234 }, { "epoch": 0.20848102081587935, "grad_norm": 0.021459780780179235, "learning_rate": 8.796276405298962e-06, "loss": 0.0002, "step": 3235 }, { "epoch": 0.20854546626280854, "grad_norm": 0.058684126375482114, "learning_rate": 8.795560329394916e-06, "loss": 0.0017, "step": 3236 }, { "epoch": 0.2086099117097377, "grad_norm": 0.0011527392584699309, "learning_rate": 8.79484425349087e-06, "loss": 0.0, "step": 3237 }, { "epoch": 0.20867435715666688, "grad_norm": 0.0020617141823999893, "learning_rate": 8.794128177586825e-06, "loss": 0.0, "step": 3238 }, { "epoch": 0.20873880260359606, "grad_norm": 0.2818069506547426, "learning_rate": 8.793412101682779e-06, "loss": 0.0003, "step": 3239 }, { "epoch": 0.20880324805052522, "grad_norm": 0.0039035938963142623, "learning_rate": 8.792696025778733e-06, "loss": 0.0, "step": 3240 }, { "epoch": 0.2088676934974544, "grad_norm": 0.2977870433530856, "learning_rate": 8.791979949874688e-06, "loss": 0.0011, "step": 3241 }, { "epoch": 0.2089321389443836, "grad_norm": 0.0770880788983326, "learning_rate": 8.791263873970642e-06, "loss": 0.0003, "step": 3242 }, { "epoch": 0.20899658439131275, "grad_norm": 2.4195809605252174, "learning_rate": 8.790547798066596e-06, "loss": 0.0161, "step": 3243 }, { "epoch": 0.20906102983824193, "grad_norm": 0.0061660864423312214, "learning_rate": 8.789831722162549e-06, "loss": 0.0, "step": 3244 }, { "epoch": 0.20912547528517111, "grad_norm": 0.12889428516693582, "learning_rate": 8.789115646258503e-06, "loss": 0.0001, "step": 3245 }, { "epoch": 0.20918992073210027, "grad_norm": 0.09151028984381641, "learning_rate": 8.788399570354457e-06, "loss": 0.0001, "step": 3246 }, { "epoch": 0.20925436617902946, "grad_norm": 0.009779283910463333, "learning_rate": 8.787683494450412e-06, "loss": 0.0, "step": 3247 }, { "epoch": 0.20931881162595864, "grad_norm": 0.006560546257411386, "learning_rate": 8.786967418546368e-06, "loss": 0.0, "step": 3248 }, { "epoch": 0.2093832570728878, "grad_norm": 0.0038019288472966287, "learning_rate": 8.786251342642322e-06, "loss": 0.0, "step": 3249 }, { "epoch": 0.20944770251981698, "grad_norm": 0.09019723577914596, "learning_rate": 8.785535266738275e-06, "loss": 0.0029, "step": 3250 }, { "epoch": 0.20951214796674614, "grad_norm": 0.050902314027495524, "learning_rate": 8.784819190834229e-06, "loss": 0.0001, "step": 3251 }, { "epoch": 0.20957659341367532, "grad_norm": 0.006432140117759434, "learning_rate": 8.784103114930183e-06, "loss": 0.0001, "step": 3252 }, { "epoch": 0.2096410388606045, "grad_norm": 0.006941493151334226, "learning_rate": 8.783387039026137e-06, "loss": 0.0, "step": 3253 }, { "epoch": 0.20970548430753366, "grad_norm": 0.13453681844557985, "learning_rate": 8.782670963122092e-06, "loss": 0.0007, "step": 3254 }, { "epoch": 0.20976992975446285, "grad_norm": 0.16346047845287467, "learning_rate": 8.781954887218046e-06, "loss": 0.0008, "step": 3255 }, { "epoch": 0.20983437520139203, "grad_norm": 0.0006596375563727548, "learning_rate": 8.781238811314e-06, "loss": 0.0, "step": 3256 }, { "epoch": 0.2098988206483212, "grad_norm": 0.004409057604210094, "learning_rate": 8.780522735409955e-06, "loss": 0.0, "step": 3257 }, { "epoch": 0.20996326609525037, "grad_norm": 0.003006464597436745, "learning_rate": 8.779806659505909e-06, "loss": 0.0, "step": 3258 }, { "epoch": 0.21002771154217956, "grad_norm": 2.8408788922178685, "learning_rate": 8.779090583601863e-06, "loss": 0.0237, "step": 3259 }, { "epoch": 0.2100921569891087, "grad_norm": 0.01016413326544834, "learning_rate": 8.778374507697816e-06, "loss": 0.0, "step": 3260 }, { "epoch": 0.2101566024360379, "grad_norm": 0.01772877014468529, "learning_rate": 8.77765843179377e-06, "loss": 0.0, "step": 3261 }, { "epoch": 0.21022104788296705, "grad_norm": 0.0024843355972861882, "learning_rate": 8.776942355889724e-06, "loss": 0.0, "step": 3262 }, { "epoch": 0.21028549332989624, "grad_norm": 0.016083136451063594, "learning_rate": 8.776226279985679e-06, "loss": 0.0001, "step": 3263 }, { "epoch": 0.21034993877682542, "grad_norm": 0.006143627681656301, "learning_rate": 8.775510204081633e-06, "loss": 0.0, "step": 3264 }, { "epoch": 0.21041438422375458, "grad_norm": 0.6915285034718149, "learning_rate": 8.774794128177587e-06, "loss": 0.0009, "step": 3265 }, { "epoch": 0.21047882967068376, "grad_norm": 0.003211719254704211, "learning_rate": 8.774078052273542e-06, "loss": 0.0, "step": 3266 }, { "epoch": 0.21054327511761295, "grad_norm": 0.04889199609424895, "learning_rate": 8.773361976369496e-06, "loss": 0.0003, "step": 3267 }, { "epoch": 0.2106077205645421, "grad_norm": 0.005836996667072234, "learning_rate": 8.77264590046545e-06, "loss": 0.0, "step": 3268 }, { "epoch": 0.2106721660114713, "grad_norm": 0.04504886589983157, "learning_rate": 8.771929824561405e-06, "loss": 0.0002, "step": 3269 }, { "epoch": 0.21073661145840047, "grad_norm": 0.007049011440276931, "learning_rate": 8.771213748657357e-06, "loss": 0.0, "step": 3270 }, { "epoch": 0.21080105690532963, "grad_norm": 0.01300277264543531, "learning_rate": 8.770497672753313e-06, "loss": 0.0, "step": 3271 }, { "epoch": 0.21086550235225882, "grad_norm": 0.09114491995853359, "learning_rate": 8.769781596849267e-06, "loss": 0.0004, "step": 3272 }, { "epoch": 0.210929947799188, "grad_norm": 0.20353056693551833, "learning_rate": 8.769065520945222e-06, "loss": 0.0003, "step": 3273 }, { "epoch": 0.21099439324611716, "grad_norm": 0.7785442417855192, "learning_rate": 8.768349445041176e-06, "loss": 0.0016, "step": 3274 }, { "epoch": 0.21105883869304634, "grad_norm": 0.979300330893555, "learning_rate": 8.76763336913713e-06, "loss": 0.0006, "step": 3275 }, { "epoch": 0.2111232841399755, "grad_norm": 0.004026414319159687, "learning_rate": 8.766917293233083e-06, "loss": 0.0, "step": 3276 }, { "epoch": 0.21118772958690468, "grad_norm": 1.9066745097770412, "learning_rate": 8.766201217329037e-06, "loss": 0.0091, "step": 3277 }, { "epoch": 0.21125217503383387, "grad_norm": 0.02170203435620058, "learning_rate": 8.765485141424992e-06, "loss": 0.0001, "step": 3278 }, { "epoch": 0.21131662048076302, "grad_norm": 0.001396143696547249, "learning_rate": 8.764769065520946e-06, "loss": 0.0, "step": 3279 }, { "epoch": 0.2113810659276922, "grad_norm": 0.07585580675491908, "learning_rate": 8.7640529896169e-06, "loss": 0.0002, "step": 3280 }, { "epoch": 0.2114455113746214, "grad_norm": 0.05296037073165522, "learning_rate": 8.763336913712854e-06, "loss": 0.0001, "step": 3281 }, { "epoch": 0.21150995682155055, "grad_norm": 0.00801547689700731, "learning_rate": 8.762620837808809e-06, "loss": 0.0, "step": 3282 }, { "epoch": 0.21157440226847973, "grad_norm": 0.05558661669894557, "learning_rate": 8.761904761904763e-06, "loss": 0.0001, "step": 3283 }, { "epoch": 0.21163884771540892, "grad_norm": 0.03368207035083663, "learning_rate": 8.761188686000717e-06, "loss": 0.0, "step": 3284 }, { "epoch": 0.21170329316233807, "grad_norm": 0.16717958253313203, "learning_rate": 8.760472610096672e-06, "loss": 0.0005, "step": 3285 }, { "epoch": 0.21176773860926726, "grad_norm": 0.20278401364321336, "learning_rate": 8.759756534192624e-06, "loss": 0.002, "step": 3286 }, { "epoch": 0.21183218405619644, "grad_norm": 0.03134582958365876, "learning_rate": 8.759040458288579e-06, "loss": 0.0002, "step": 3287 }, { "epoch": 0.2118966295031256, "grad_norm": 0.003064029577034968, "learning_rate": 8.758324382384533e-06, "loss": 0.0, "step": 3288 }, { "epoch": 0.21196107495005478, "grad_norm": 0.013964800171850807, "learning_rate": 8.757608306480487e-06, "loss": 0.0001, "step": 3289 }, { "epoch": 0.21202552039698394, "grad_norm": 0.3455218362415116, "learning_rate": 8.756892230576441e-06, "loss": 0.0008, "step": 3290 }, { "epoch": 0.21208996584391313, "grad_norm": 0.1274534928593735, "learning_rate": 8.756176154672396e-06, "loss": 0.0003, "step": 3291 }, { "epoch": 0.2121544112908423, "grad_norm": 0.02204250914795714, "learning_rate": 8.75546007876835e-06, "loss": 0.0001, "step": 3292 }, { "epoch": 0.21221885673777147, "grad_norm": 0.0020332618231904947, "learning_rate": 8.754744002864304e-06, "loss": 0.0, "step": 3293 }, { "epoch": 0.21228330218470065, "grad_norm": 0.010261200257673677, "learning_rate": 8.754027926960259e-06, "loss": 0.0, "step": 3294 }, { "epoch": 0.21234774763162984, "grad_norm": 0.005997398083936662, "learning_rate": 8.753311851056213e-06, "loss": 0.0, "step": 3295 }, { "epoch": 0.212412193078559, "grad_norm": 0.007660184481136306, "learning_rate": 8.752595775152167e-06, "loss": 0.0, "step": 3296 }, { "epoch": 0.21247663852548818, "grad_norm": 0.038827415225720155, "learning_rate": 8.751879699248122e-06, "loss": 0.0, "step": 3297 }, { "epoch": 0.21254108397241736, "grad_norm": 0.33644458291898005, "learning_rate": 8.751163623344076e-06, "loss": 0.0024, "step": 3298 }, { "epoch": 0.21260552941934652, "grad_norm": 0.017198641204569848, "learning_rate": 8.75044754744003e-06, "loss": 0.0001, "step": 3299 }, { "epoch": 0.2126699748662757, "grad_norm": 0.15987451125906116, "learning_rate": 8.749731471535984e-06, "loss": 0.0014, "step": 3300 }, { "epoch": 0.21273442031320486, "grad_norm": 0.007898780452521827, "learning_rate": 8.749015395631939e-06, "loss": 0.0, "step": 3301 }, { "epoch": 0.21279886576013404, "grad_norm": 0.2562249607552893, "learning_rate": 8.748299319727891e-06, "loss": 0.003, "step": 3302 }, { "epoch": 0.21286331120706323, "grad_norm": 0.0024193017311194913, "learning_rate": 8.747583243823846e-06, "loss": 0.0, "step": 3303 }, { "epoch": 0.21292775665399238, "grad_norm": 0.005256035814212943, "learning_rate": 8.7468671679198e-06, "loss": 0.0, "step": 3304 }, { "epoch": 0.21299220210092157, "grad_norm": 0.012534387630057374, "learning_rate": 8.746151092015754e-06, "loss": 0.0, "step": 3305 }, { "epoch": 0.21305664754785075, "grad_norm": 0.002335593572439098, "learning_rate": 8.745435016111709e-06, "loss": 0.0, "step": 3306 }, { "epoch": 0.2131210929947799, "grad_norm": 3.6138689872499303, "learning_rate": 8.744718940207663e-06, "loss": 0.0088, "step": 3307 }, { "epoch": 0.2131855384417091, "grad_norm": 0.021828677739768283, "learning_rate": 8.744002864303617e-06, "loss": 0.0001, "step": 3308 }, { "epoch": 0.21324998388863828, "grad_norm": 0.008283227528941124, "learning_rate": 8.743286788399571e-06, "loss": 0.0, "step": 3309 }, { "epoch": 0.21331442933556743, "grad_norm": 0.05812750326426742, "learning_rate": 8.742570712495526e-06, "loss": 0.0006, "step": 3310 }, { "epoch": 0.21337887478249662, "grad_norm": 0.016621163486587685, "learning_rate": 8.741854636591478e-06, "loss": 0.0, "step": 3311 }, { "epoch": 0.2134433202294258, "grad_norm": 0.00017105550687940947, "learning_rate": 8.741138560687433e-06, "loss": 0.0, "step": 3312 }, { "epoch": 0.21350776567635496, "grad_norm": 0.6441706949390169, "learning_rate": 8.740422484783387e-06, "loss": 0.0009, "step": 3313 }, { "epoch": 0.21357221112328414, "grad_norm": 0.04369000739798586, "learning_rate": 8.739706408879341e-06, "loss": 0.0, "step": 3314 }, { "epoch": 0.2136366565702133, "grad_norm": 0.09402632719574942, "learning_rate": 8.738990332975296e-06, "loss": 0.0003, "step": 3315 }, { "epoch": 0.21370110201714249, "grad_norm": 0.05016807446108145, "learning_rate": 8.73827425707125e-06, "loss": 0.0001, "step": 3316 }, { "epoch": 0.21376554746407167, "grad_norm": 0.010129704308617241, "learning_rate": 8.737558181167204e-06, "loss": 0.0, "step": 3317 }, { "epoch": 0.21382999291100083, "grad_norm": 0.06499619789764004, "learning_rate": 8.736842105263158e-06, "loss": 0.0001, "step": 3318 }, { "epoch": 0.21389443835793, "grad_norm": 0.005310078119373596, "learning_rate": 8.736126029359113e-06, "loss": 0.0, "step": 3319 }, { "epoch": 0.2139588838048592, "grad_norm": 0.0006348497178582359, "learning_rate": 8.735409953455067e-06, "loss": 0.0, "step": 3320 }, { "epoch": 0.21402332925178835, "grad_norm": 0.00472366162278607, "learning_rate": 8.734693877551021e-06, "loss": 0.0, "step": 3321 }, { "epoch": 0.21408777469871754, "grad_norm": 0.015531686896680753, "learning_rate": 8.733977801646976e-06, "loss": 0.0002, "step": 3322 }, { "epoch": 0.21415222014564672, "grad_norm": 0.02151993293429727, "learning_rate": 8.73326172574293e-06, "loss": 0.0002, "step": 3323 }, { "epoch": 0.21421666559257588, "grad_norm": 0.010721432144462448, "learning_rate": 8.732545649838884e-06, "loss": 0.0, "step": 3324 }, { "epoch": 0.21428111103950506, "grad_norm": 0.07421909194164812, "learning_rate": 8.731829573934839e-06, "loss": 0.0034, "step": 3325 }, { "epoch": 0.21434555648643425, "grad_norm": 0.003490125792385133, "learning_rate": 8.731113498030793e-06, "loss": 0.0, "step": 3326 }, { "epoch": 0.2144100019333634, "grad_norm": 0.6003774389582377, "learning_rate": 8.730397422126745e-06, "loss": 0.003, "step": 3327 }, { "epoch": 0.2144744473802926, "grad_norm": 0.0031710861298442842, "learning_rate": 8.7296813462227e-06, "loss": 0.0, "step": 3328 }, { "epoch": 0.21453889282722174, "grad_norm": 0.006062084484470189, "learning_rate": 8.728965270318654e-06, "loss": 0.0, "step": 3329 }, { "epoch": 0.21460333827415093, "grad_norm": 0.0046390183140608615, "learning_rate": 8.728249194414608e-06, "loss": 0.0, "step": 3330 }, { "epoch": 0.2146677837210801, "grad_norm": 0.0028126984205885065, "learning_rate": 8.727533118510563e-06, "loss": 0.0, "step": 3331 }, { "epoch": 0.21473222916800927, "grad_norm": 0.33433982271918034, "learning_rate": 8.726817042606517e-06, "loss": 0.0026, "step": 3332 }, { "epoch": 0.21479667461493845, "grad_norm": 0.09134759936800937, "learning_rate": 8.726100966702471e-06, "loss": 0.0004, "step": 3333 }, { "epoch": 0.21486112006186764, "grad_norm": 0.10058142364892066, "learning_rate": 8.725384890798425e-06, "loss": 0.0001, "step": 3334 }, { "epoch": 0.2149255655087968, "grad_norm": 0.0016853936002382036, "learning_rate": 8.72466881489438e-06, "loss": 0.0, "step": 3335 }, { "epoch": 0.21499001095572598, "grad_norm": 0.15968984634818126, "learning_rate": 8.723952738990334e-06, "loss": 0.0004, "step": 3336 }, { "epoch": 0.21505445640265516, "grad_norm": 0.014139192268035285, "learning_rate": 8.723236663086287e-06, "loss": 0.0, "step": 3337 }, { "epoch": 0.21511890184958432, "grad_norm": 0.0609662983623595, "learning_rate": 8.722520587182241e-06, "loss": 0.0001, "step": 3338 }, { "epoch": 0.2151833472965135, "grad_norm": 0.00030584942007946705, "learning_rate": 8.721804511278195e-06, "loss": 0.0, "step": 3339 }, { "epoch": 0.2152477927434427, "grad_norm": 0.010851115075375938, "learning_rate": 8.72108843537415e-06, "loss": 0.0, "step": 3340 }, { "epoch": 0.21531223819037185, "grad_norm": 0.0025828937431415975, "learning_rate": 8.720372359470106e-06, "loss": 0.0, "step": 3341 }, { "epoch": 0.21537668363730103, "grad_norm": 0.27412838166825926, "learning_rate": 8.71965628356606e-06, "loss": 0.0002, "step": 3342 }, { "epoch": 0.2154411290842302, "grad_norm": 0.0014394433601773147, "learning_rate": 8.718940207662012e-06, "loss": 0.0, "step": 3343 }, { "epoch": 0.21550557453115937, "grad_norm": 0.03302873013551384, "learning_rate": 8.718224131757967e-06, "loss": 0.0001, "step": 3344 }, { "epoch": 0.21557001997808856, "grad_norm": 0.003994157454168405, "learning_rate": 8.717508055853921e-06, "loss": 0.0, "step": 3345 }, { "epoch": 0.2156344654250177, "grad_norm": 0.03668763155929853, "learning_rate": 8.716791979949875e-06, "loss": 0.0001, "step": 3346 }, { "epoch": 0.2156989108719469, "grad_norm": 0.0035984155267912584, "learning_rate": 8.71607590404583e-06, "loss": 0.0, "step": 3347 }, { "epoch": 0.21576335631887608, "grad_norm": 0.000645077782702887, "learning_rate": 8.715359828141784e-06, "loss": 0.0, "step": 3348 }, { "epoch": 0.21582780176580524, "grad_norm": 0.0007157999193808737, "learning_rate": 8.714643752237738e-06, "loss": 0.0, "step": 3349 }, { "epoch": 0.21589224721273442, "grad_norm": 0.08779469924341486, "learning_rate": 8.713927676333693e-06, "loss": 0.0007, "step": 3350 }, { "epoch": 0.2159566926596636, "grad_norm": 0.0009734634672225255, "learning_rate": 8.713211600429647e-06, "loss": 0.0, "step": 3351 }, { "epoch": 0.21602113810659276, "grad_norm": 0.009129763667514236, "learning_rate": 8.712495524525601e-06, "loss": 0.0001, "step": 3352 }, { "epoch": 0.21608558355352195, "grad_norm": 0.0015141456217384983, "learning_rate": 8.711779448621554e-06, "loss": 0.0, "step": 3353 }, { "epoch": 0.2161500290004511, "grad_norm": 0.009750178906905728, "learning_rate": 8.711063372717508e-06, "loss": 0.0, "step": 3354 }, { "epoch": 0.2162144744473803, "grad_norm": 0.7822016070286729, "learning_rate": 8.710347296813462e-06, "loss": 0.0092, "step": 3355 }, { "epoch": 0.21627891989430947, "grad_norm": 0.0008353497719524981, "learning_rate": 8.709631220909417e-06, "loss": 0.0, "step": 3356 }, { "epoch": 0.21634336534123863, "grad_norm": 0.0837409934184648, "learning_rate": 8.708915145005371e-06, "loss": 0.0001, "step": 3357 }, { "epoch": 0.21640781078816781, "grad_norm": 0.022414822248022617, "learning_rate": 8.708199069101325e-06, "loss": 0.0001, "step": 3358 }, { "epoch": 0.216472256235097, "grad_norm": 0.11345686946837005, "learning_rate": 8.70748299319728e-06, "loss": 0.0001, "step": 3359 }, { "epoch": 0.21653670168202616, "grad_norm": 0.08448661016414556, "learning_rate": 8.706766917293234e-06, "loss": 0.0009, "step": 3360 }, { "epoch": 0.21660114712895534, "grad_norm": 0.0220006539202053, "learning_rate": 8.706050841389188e-06, "loss": 0.0, "step": 3361 }, { "epoch": 0.21666559257588452, "grad_norm": 0.08631784358443949, "learning_rate": 8.705334765485142e-06, "loss": 0.0001, "step": 3362 }, { "epoch": 0.21673003802281368, "grad_norm": 0.029033055171980918, "learning_rate": 8.704618689581095e-06, "loss": 0.0001, "step": 3363 }, { "epoch": 0.21679448346974287, "grad_norm": 0.4350533533262354, "learning_rate": 8.703902613677051e-06, "loss": 0.0039, "step": 3364 }, { "epoch": 0.21685892891667205, "grad_norm": 0.001498889728926459, "learning_rate": 8.703186537773005e-06, "loss": 0.0, "step": 3365 }, { "epoch": 0.2169233743636012, "grad_norm": 0.010148400387620866, "learning_rate": 8.70247046186896e-06, "loss": 0.0, "step": 3366 }, { "epoch": 0.2169878198105304, "grad_norm": 0.007033440864080341, "learning_rate": 8.701754385964914e-06, "loss": 0.0, "step": 3367 }, { "epoch": 0.21705226525745955, "grad_norm": 0.05006730273660623, "learning_rate": 8.701038310060868e-06, "loss": 0.0001, "step": 3368 }, { "epoch": 0.21711671070438873, "grad_norm": 0.007402992324705263, "learning_rate": 8.700322234156821e-06, "loss": 0.0, "step": 3369 }, { "epoch": 0.21718115615131792, "grad_norm": 0.0057464255085265354, "learning_rate": 8.699606158252775e-06, "loss": 0.0, "step": 3370 }, { "epoch": 0.21724560159824707, "grad_norm": 0.05918935218428574, "learning_rate": 8.69889008234873e-06, "loss": 0.0003, "step": 3371 }, { "epoch": 0.21731004704517626, "grad_norm": 0.025653915972399525, "learning_rate": 8.698174006444684e-06, "loss": 0.0002, "step": 3372 }, { "epoch": 0.21737449249210544, "grad_norm": 0.012820261677368707, "learning_rate": 8.697457930540638e-06, "loss": 0.0002, "step": 3373 }, { "epoch": 0.2174389379390346, "grad_norm": 0.003222630195440105, "learning_rate": 8.696741854636592e-06, "loss": 0.0, "step": 3374 }, { "epoch": 0.21750338338596378, "grad_norm": 0.005536295847970089, "learning_rate": 8.696025778732547e-06, "loss": 0.0, "step": 3375 }, { "epoch": 0.21756782883289297, "grad_norm": 0.0475632780772114, "learning_rate": 8.695309702828501e-06, "loss": 0.0001, "step": 3376 }, { "epoch": 0.21763227427982212, "grad_norm": 0.013851927429286565, "learning_rate": 8.694593626924455e-06, "loss": 0.0, "step": 3377 }, { "epoch": 0.2176967197267513, "grad_norm": 0.006665526372697656, "learning_rate": 8.69387755102041e-06, "loss": 0.0, "step": 3378 }, { "epoch": 0.2177611651736805, "grad_norm": 0.0003302123249551213, "learning_rate": 8.693161475116362e-06, "loss": 0.0, "step": 3379 }, { "epoch": 0.21782561062060965, "grad_norm": 0.04042210473255954, "learning_rate": 8.692445399212316e-06, "loss": 0.0, "step": 3380 }, { "epoch": 0.21789005606753883, "grad_norm": 0.032240357119762836, "learning_rate": 8.69172932330827e-06, "loss": 0.0001, "step": 3381 }, { "epoch": 0.217954501514468, "grad_norm": 0.028356624417666038, "learning_rate": 8.691013247404225e-06, "loss": 0.0001, "step": 3382 }, { "epoch": 0.21801894696139718, "grad_norm": 0.24312603511442854, "learning_rate": 8.69029717150018e-06, "loss": 0.0007, "step": 3383 }, { "epoch": 0.21808339240832636, "grad_norm": 0.06490065794875387, "learning_rate": 8.689581095596134e-06, "loss": 0.0001, "step": 3384 }, { "epoch": 0.21814783785525552, "grad_norm": 0.0003474384346755857, "learning_rate": 8.688865019692088e-06, "loss": 0.0, "step": 3385 }, { "epoch": 0.2182122833021847, "grad_norm": 0.010435808402401832, "learning_rate": 8.688148943788042e-06, "loss": 0.0001, "step": 3386 }, { "epoch": 0.21827672874911389, "grad_norm": 0.10267596869027987, "learning_rate": 8.687432867883997e-06, "loss": 0.0004, "step": 3387 }, { "epoch": 0.21834117419604304, "grad_norm": 0.00680259145453217, "learning_rate": 8.686716791979951e-06, "loss": 0.0, "step": 3388 }, { "epoch": 0.21840561964297223, "grad_norm": 0.0007357830666917632, "learning_rate": 8.686000716075905e-06, "loss": 0.0, "step": 3389 }, { "epoch": 0.2184700650899014, "grad_norm": 0.017961030447330713, "learning_rate": 8.68528464017186e-06, "loss": 0.0, "step": 3390 }, { "epoch": 0.21853451053683057, "grad_norm": 0.015245958178808318, "learning_rate": 8.684568564267814e-06, "loss": 0.0, "step": 3391 }, { "epoch": 0.21859895598375975, "grad_norm": 0.2522366576990323, "learning_rate": 8.683852488363768e-06, "loss": 0.0013, "step": 3392 }, { "epoch": 0.2186634014306889, "grad_norm": 0.004980891866569009, "learning_rate": 8.683136412459722e-06, "loss": 0.0, "step": 3393 }, { "epoch": 0.2187278468776181, "grad_norm": 0.005101799462129407, "learning_rate": 8.682420336555677e-06, "loss": 0.0, "step": 3394 }, { "epoch": 0.21879229232454728, "grad_norm": 0.0034149603184703468, "learning_rate": 8.68170426065163e-06, "loss": 0.0, "step": 3395 }, { "epoch": 0.21885673777147643, "grad_norm": 0.03652058159275285, "learning_rate": 8.680988184747584e-06, "loss": 0.0002, "step": 3396 }, { "epoch": 0.21892118321840562, "grad_norm": 0.007019698589223098, "learning_rate": 8.680272108843538e-06, "loss": 0.0, "step": 3397 }, { "epoch": 0.2189856286653348, "grad_norm": 0.0005208419070086863, "learning_rate": 8.679556032939492e-06, "loss": 0.0, "step": 3398 }, { "epoch": 0.21905007411226396, "grad_norm": 0.022129151887111145, "learning_rate": 8.678839957035446e-06, "loss": 0.0, "step": 3399 }, { "epoch": 0.21911451955919314, "grad_norm": 0.008741674890565838, "learning_rate": 8.6781238811314e-06, "loss": 0.0, "step": 3400 }, { "epoch": 0.21917896500612233, "grad_norm": 0.0028274312509968943, "learning_rate": 8.677407805227355e-06, "loss": 0.0, "step": 3401 }, { "epoch": 0.21924341045305148, "grad_norm": 0.027357096054692588, "learning_rate": 8.67669172932331e-06, "loss": 0.0001, "step": 3402 }, { "epoch": 0.21930785589998067, "grad_norm": 9.549562120148108e-05, "learning_rate": 8.675975653419264e-06, "loss": 0.0, "step": 3403 }, { "epoch": 0.21937230134690985, "grad_norm": 0.00035971321640775695, "learning_rate": 8.675259577515216e-06, "loss": 0.0, "step": 3404 }, { "epoch": 0.219436746793839, "grad_norm": 0.001148672309388114, "learning_rate": 8.67454350161117e-06, "loss": 0.0, "step": 3405 }, { "epoch": 0.2195011922407682, "grad_norm": 0.0022274004349375665, "learning_rate": 8.673827425707125e-06, "loss": 0.0, "step": 3406 }, { "epoch": 0.21956563768769735, "grad_norm": 0.001988663302904296, "learning_rate": 8.673111349803079e-06, "loss": 0.0, "step": 3407 }, { "epoch": 0.21963008313462654, "grad_norm": 0.004393998501194764, "learning_rate": 8.672395273899033e-06, "loss": 0.0, "step": 3408 }, { "epoch": 0.21969452858155572, "grad_norm": 0.026292318215849746, "learning_rate": 8.671679197994988e-06, "loss": 0.0, "step": 3409 }, { "epoch": 0.21975897402848488, "grad_norm": 0.037110821168728526, "learning_rate": 8.670963122090942e-06, "loss": 0.0004, "step": 3410 }, { "epoch": 0.21982341947541406, "grad_norm": 0.0004869931471118294, "learning_rate": 8.670247046186896e-06, "loss": 0.0, "step": 3411 }, { "epoch": 0.21988786492234325, "grad_norm": 0.035249077579844044, "learning_rate": 8.66953097028285e-06, "loss": 0.0003, "step": 3412 }, { "epoch": 0.2199523103692724, "grad_norm": 0.001970382648606595, "learning_rate": 8.668814894378805e-06, "loss": 0.0, "step": 3413 }, { "epoch": 0.2200167558162016, "grad_norm": 0.011623401973596743, "learning_rate": 8.66809881847476e-06, "loss": 0.0001, "step": 3414 }, { "epoch": 0.22008120126313077, "grad_norm": 0.00034764643982887803, "learning_rate": 8.667382742570714e-06, "loss": 0.0, "step": 3415 }, { "epoch": 0.22014564671005993, "grad_norm": 8.975251794623639e-05, "learning_rate": 8.666666666666668e-06, "loss": 0.0, "step": 3416 }, { "epoch": 0.2202100921569891, "grad_norm": 0.07822764627206331, "learning_rate": 8.665950590762622e-06, "loss": 0.0007, "step": 3417 }, { "epoch": 0.2202745376039183, "grad_norm": 0.020361744152594136, "learning_rate": 8.665234514858576e-06, "loss": 0.0001, "step": 3418 }, { "epoch": 0.22033898305084745, "grad_norm": 0.031516272508776165, "learning_rate": 8.66451843895453e-06, "loss": 0.0001, "step": 3419 }, { "epoch": 0.22040342849777664, "grad_norm": 0.0008974531050145935, "learning_rate": 8.663802363050483e-06, "loss": 0.0, "step": 3420 }, { "epoch": 0.2204678739447058, "grad_norm": 0.16912508093612774, "learning_rate": 8.663086287146438e-06, "loss": 0.0003, "step": 3421 }, { "epoch": 0.22053231939163498, "grad_norm": 0.00018829353512949963, "learning_rate": 8.662370211242392e-06, "loss": 0.0, "step": 3422 }, { "epoch": 0.22059676483856416, "grad_norm": 0.00521659484946839, "learning_rate": 8.661654135338346e-06, "loss": 0.0, "step": 3423 }, { "epoch": 0.22066121028549332, "grad_norm": 0.35939284985206355, "learning_rate": 8.6609380594343e-06, "loss": 0.0027, "step": 3424 }, { "epoch": 0.2207256557324225, "grad_norm": 0.07403550301307933, "learning_rate": 8.660221983530255e-06, "loss": 0.0009, "step": 3425 }, { "epoch": 0.2207901011793517, "grad_norm": 0.004889930625470678, "learning_rate": 8.659505907626209e-06, "loss": 0.0, "step": 3426 }, { "epoch": 0.22085454662628085, "grad_norm": 0.003421520565400845, "learning_rate": 8.658789831722163e-06, "loss": 0.0, "step": 3427 }, { "epoch": 0.22091899207321003, "grad_norm": 0.0053065978274310565, "learning_rate": 8.658073755818118e-06, "loss": 0.0, "step": 3428 }, { "epoch": 0.22098343752013921, "grad_norm": 0.013211383273678805, "learning_rate": 8.657357679914072e-06, "loss": 0.0001, "step": 3429 }, { "epoch": 0.22104788296706837, "grad_norm": 0.0009982449528573667, "learning_rate": 8.656641604010025e-06, "loss": 0.0, "step": 3430 }, { "epoch": 0.22111232841399756, "grad_norm": 0.0015086585130352167, "learning_rate": 8.655925528105979e-06, "loss": 0.0, "step": 3431 }, { "epoch": 0.2211767738609267, "grad_norm": 0.02614516921378581, "learning_rate": 8.655209452201933e-06, "loss": 0.0001, "step": 3432 }, { "epoch": 0.2212412193078559, "grad_norm": 0.018234096510467625, "learning_rate": 8.654493376297888e-06, "loss": 0.0001, "step": 3433 }, { "epoch": 0.22130566475478508, "grad_norm": 0.0011630213225405184, "learning_rate": 8.653777300393844e-06, "loss": 0.0, "step": 3434 }, { "epoch": 0.22137011020171424, "grad_norm": 0.7110074187051323, "learning_rate": 8.653061224489798e-06, "loss": 0.0061, "step": 3435 }, { "epoch": 0.22143455564864342, "grad_norm": 0.00033229507672846427, "learning_rate": 8.65234514858575e-06, "loss": 0.0, "step": 3436 }, { "epoch": 0.2214990010955726, "grad_norm": 0.002534487956548706, "learning_rate": 8.651629072681705e-06, "loss": 0.0, "step": 3437 }, { "epoch": 0.22156344654250176, "grad_norm": 0.17261034087608992, "learning_rate": 8.650912996777659e-06, "loss": 0.0003, "step": 3438 }, { "epoch": 0.22162789198943095, "grad_norm": 0.002774698501918162, "learning_rate": 8.650196920873613e-06, "loss": 0.0, "step": 3439 }, { "epoch": 0.22169233743636013, "grad_norm": 0.008339194776038943, "learning_rate": 8.649480844969568e-06, "loss": 0.0, "step": 3440 }, { "epoch": 0.2217567828832893, "grad_norm": 0.004585672715958108, "learning_rate": 8.648764769065522e-06, "loss": 0.0, "step": 3441 }, { "epoch": 0.22182122833021847, "grad_norm": 0.0030527512602284202, "learning_rate": 8.648048693161476e-06, "loss": 0.0, "step": 3442 }, { "epoch": 0.22188567377714766, "grad_norm": 0.0939224861790215, "learning_rate": 8.64733261725743e-06, "loss": 0.0001, "step": 3443 }, { "epoch": 0.22195011922407681, "grad_norm": 0.003773801458241794, "learning_rate": 8.646616541353385e-06, "loss": 0.0, "step": 3444 }, { "epoch": 0.222014564671006, "grad_norm": 0.019486883556046962, "learning_rate": 8.645900465449339e-06, "loss": 0.0, "step": 3445 }, { "epoch": 0.22207901011793515, "grad_norm": 0.07323750163029638, "learning_rate": 8.645184389545292e-06, "loss": 0.0005, "step": 3446 }, { "epoch": 0.22214345556486434, "grad_norm": 0.00814745306731119, "learning_rate": 8.644468313641246e-06, "loss": 0.0, "step": 3447 }, { "epoch": 0.22220790101179352, "grad_norm": 0.29745805104008016, "learning_rate": 8.6437522377372e-06, "loss": 0.0002, "step": 3448 }, { "epoch": 0.22227234645872268, "grad_norm": 0.010189280976991384, "learning_rate": 8.643036161833155e-06, "loss": 0.0, "step": 3449 }, { "epoch": 0.22233679190565186, "grad_norm": 0.350625219243027, "learning_rate": 8.642320085929109e-06, "loss": 0.0011, "step": 3450 }, { "epoch": 0.22240123735258105, "grad_norm": 0.09586779868305924, "learning_rate": 8.641604010025063e-06, "loss": 0.0014, "step": 3451 }, { "epoch": 0.2224656827995102, "grad_norm": 0.002273792676528464, "learning_rate": 8.640887934121017e-06, "loss": 0.0, "step": 3452 }, { "epoch": 0.2225301282464394, "grad_norm": 0.0036304762924196463, "learning_rate": 8.640171858216972e-06, "loss": 0.0, "step": 3453 }, { "epoch": 0.22259457369336857, "grad_norm": 0.24947871309025466, "learning_rate": 8.639455782312926e-06, "loss": 0.0037, "step": 3454 }, { "epoch": 0.22265901914029773, "grad_norm": 0.00028613456653341616, "learning_rate": 8.63873970640888e-06, "loss": 0.0, "step": 3455 }, { "epoch": 0.22272346458722692, "grad_norm": 7.548515570305155e-05, "learning_rate": 8.638023630504833e-06, "loss": 0.0, "step": 3456 }, { "epoch": 0.2227879100341561, "grad_norm": 0.07466611177857914, "learning_rate": 8.637307554600787e-06, "loss": 0.0002, "step": 3457 }, { "epoch": 0.22285235548108526, "grad_norm": 0.0011913602007814256, "learning_rate": 8.636591478696743e-06, "loss": 0.0, "step": 3458 }, { "epoch": 0.22291680092801444, "grad_norm": 0.0005066447257724092, "learning_rate": 8.635875402792698e-06, "loss": 0.0, "step": 3459 }, { "epoch": 0.2229812463749436, "grad_norm": 0.00195445982760958, "learning_rate": 8.635159326888652e-06, "loss": 0.0, "step": 3460 }, { "epoch": 0.22304569182187278, "grad_norm": 0.00460945396853242, "learning_rate": 8.634443250984606e-06, "loss": 0.0, "step": 3461 }, { "epoch": 0.22311013726880197, "grad_norm": 0.22631486151336772, "learning_rate": 8.633727175080559e-06, "loss": 0.0005, "step": 3462 }, { "epoch": 0.22317458271573112, "grad_norm": 1.5542394097752636, "learning_rate": 8.633011099176513e-06, "loss": 0.0056, "step": 3463 }, { "epoch": 0.2232390281626603, "grad_norm": 0.0003889234814934518, "learning_rate": 8.632295023272467e-06, "loss": 0.0, "step": 3464 }, { "epoch": 0.2233034736095895, "grad_norm": 0.014474523316253513, "learning_rate": 8.631578947368422e-06, "loss": 0.0001, "step": 3465 }, { "epoch": 0.22336791905651865, "grad_norm": 0.000770039970009564, "learning_rate": 8.630862871464376e-06, "loss": 0.0, "step": 3466 }, { "epoch": 0.22343236450344783, "grad_norm": 0.02406613506883519, "learning_rate": 8.63014679556033e-06, "loss": 0.0002, "step": 3467 }, { "epoch": 0.22349680995037702, "grad_norm": 0.8807060632696435, "learning_rate": 8.629430719656285e-06, "loss": 0.0038, "step": 3468 }, { "epoch": 0.22356125539730617, "grad_norm": 0.010200052529701138, "learning_rate": 8.628714643752239e-06, "loss": 0.0001, "step": 3469 }, { "epoch": 0.22362570084423536, "grad_norm": 0.004726596178967311, "learning_rate": 8.627998567848193e-06, "loss": 0.0001, "step": 3470 }, { "epoch": 0.22369014629116452, "grad_norm": 0.0035062183509143477, "learning_rate": 8.627282491944147e-06, "loss": 0.0, "step": 3471 }, { "epoch": 0.2237545917380937, "grad_norm": 0.006413737365501235, "learning_rate": 8.6265664160401e-06, "loss": 0.0, "step": 3472 }, { "epoch": 0.22381903718502288, "grad_norm": 0.17002202513660772, "learning_rate": 8.625850340136054e-06, "loss": 0.002, "step": 3473 }, { "epoch": 0.22388348263195204, "grad_norm": 0.000330794695075691, "learning_rate": 8.625134264232009e-06, "loss": 0.0, "step": 3474 }, { "epoch": 0.22394792807888123, "grad_norm": 0.2625006482706694, "learning_rate": 8.624418188327963e-06, "loss": 0.0024, "step": 3475 }, { "epoch": 0.2240123735258104, "grad_norm": 0.00048218474812608737, "learning_rate": 8.623702112423917e-06, "loss": 0.0, "step": 3476 }, { "epoch": 0.22407681897273957, "grad_norm": 1.2268953297000005, "learning_rate": 8.622986036519872e-06, "loss": 0.0004, "step": 3477 }, { "epoch": 0.22414126441966875, "grad_norm": 0.03838107792094849, "learning_rate": 8.622269960615826e-06, "loss": 0.0, "step": 3478 }, { "epoch": 0.22420570986659794, "grad_norm": 0.0007060704934618854, "learning_rate": 8.62155388471178e-06, "loss": 0.0, "step": 3479 }, { "epoch": 0.2242701553135271, "grad_norm": 0.01650770562641032, "learning_rate": 8.620837808807734e-06, "loss": 0.0, "step": 3480 }, { "epoch": 0.22433460076045628, "grad_norm": 0.18594924064241675, "learning_rate": 8.620121732903689e-06, "loss": 0.0019, "step": 3481 }, { "epoch": 0.22439904620738546, "grad_norm": 0.0001218993788027957, "learning_rate": 8.619405656999643e-06, "loss": 0.0, "step": 3482 }, { "epoch": 0.22446349165431462, "grad_norm": 0.012246611324123326, "learning_rate": 8.618689581095597e-06, "loss": 0.0, "step": 3483 }, { "epoch": 0.2245279371012438, "grad_norm": 0.0007658951102295896, "learning_rate": 8.617973505191552e-06, "loss": 0.0, "step": 3484 }, { "epoch": 0.22459238254817296, "grad_norm": 0.024163193440480502, "learning_rate": 8.617257429287506e-06, "loss": 0.0002, "step": 3485 }, { "epoch": 0.22465682799510214, "grad_norm": 0.00452679815357398, "learning_rate": 8.61654135338346e-06, "loss": 0.0, "step": 3486 }, { "epoch": 0.22472127344203133, "grad_norm": 0.11612035923873511, "learning_rate": 8.615825277479415e-06, "loss": 0.0006, "step": 3487 }, { "epoch": 0.22478571888896048, "grad_norm": 0.040972251040244025, "learning_rate": 8.615109201575367e-06, "loss": 0.0, "step": 3488 }, { "epoch": 0.22485016433588967, "grad_norm": 0.001675342434363501, "learning_rate": 8.614393125671321e-06, "loss": 0.0, "step": 3489 }, { "epoch": 0.22491460978281885, "grad_norm": 0.002689598627667504, "learning_rate": 8.613677049767276e-06, "loss": 0.0, "step": 3490 }, { "epoch": 0.224979055229748, "grad_norm": 0.0001554998765619099, "learning_rate": 8.61296097386323e-06, "loss": 0.0, "step": 3491 }, { "epoch": 0.2250435006766772, "grad_norm": 0.0007973969882462369, "learning_rate": 8.612244897959184e-06, "loss": 0.0, "step": 3492 }, { "epoch": 0.22510794612360638, "grad_norm": 0.00010330225671837098, "learning_rate": 8.611528822055139e-06, "loss": 0.0, "step": 3493 }, { "epoch": 0.22517239157053553, "grad_norm": 0.00965204197384073, "learning_rate": 8.610812746151093e-06, "loss": 0.0, "step": 3494 }, { "epoch": 0.22523683701746472, "grad_norm": 0.28994619764491436, "learning_rate": 8.610096670247047e-06, "loss": 0.0006, "step": 3495 }, { "epoch": 0.2253012824643939, "grad_norm": 0.45118794268115947, "learning_rate": 8.609380594343002e-06, "loss": 0.0011, "step": 3496 }, { "epoch": 0.22536572791132306, "grad_norm": 0.056652606321714157, "learning_rate": 8.608664518438954e-06, "loss": 0.0001, "step": 3497 }, { "epoch": 0.22543017335825224, "grad_norm": 0.002017892148967793, "learning_rate": 8.607948442534908e-06, "loss": 0.0, "step": 3498 }, { "epoch": 0.2254946188051814, "grad_norm": 0.005166001572256319, "learning_rate": 8.607232366630863e-06, "loss": 0.0, "step": 3499 }, { "epoch": 0.22555906425211059, "grad_norm": 0.002040933558769715, "learning_rate": 8.606516290726817e-06, "loss": 0.0, "step": 3500 }, { "epoch": 0.22562350969903977, "grad_norm": 0.0007145762792561211, "learning_rate": 8.605800214822771e-06, "loss": 0.0, "step": 3501 }, { "epoch": 0.22568795514596893, "grad_norm": 0.0016717906006389666, "learning_rate": 8.605084138918726e-06, "loss": 0.0, "step": 3502 }, { "epoch": 0.2257524005928981, "grad_norm": 0.05478180477274337, "learning_rate": 8.60436806301468e-06, "loss": 0.0001, "step": 3503 }, { "epoch": 0.2258168460398273, "grad_norm": 0.009749687123417545, "learning_rate": 8.603651987110634e-06, "loss": 0.0, "step": 3504 }, { "epoch": 0.22588129148675645, "grad_norm": 0.023990730276947616, "learning_rate": 8.602935911206589e-06, "loss": 0.0, "step": 3505 }, { "epoch": 0.22594573693368564, "grad_norm": 0.07996477268825748, "learning_rate": 8.602219835302543e-06, "loss": 0.0016, "step": 3506 }, { "epoch": 0.22601018238061482, "grad_norm": 0.0019798777230370587, "learning_rate": 8.601503759398497e-06, "loss": 0.0, "step": 3507 }, { "epoch": 0.22607462782754398, "grad_norm": 0.0003005708285917906, "learning_rate": 8.600787683494451e-06, "loss": 0.0, "step": 3508 }, { "epoch": 0.22613907327447316, "grad_norm": 0.05497608294365255, "learning_rate": 8.600071607590406e-06, "loss": 0.0001, "step": 3509 }, { "epoch": 0.22620351872140232, "grad_norm": 0.00847252292242908, "learning_rate": 8.59935553168636e-06, "loss": 0.0, "step": 3510 }, { "epoch": 0.2262679641683315, "grad_norm": 0.0018614056583017003, "learning_rate": 8.598639455782314e-06, "loss": 0.0, "step": 3511 }, { "epoch": 0.2263324096152607, "grad_norm": 0.005697144685217808, "learning_rate": 8.597923379878269e-06, "loss": 0.0, "step": 3512 }, { "epoch": 0.22639685506218984, "grad_norm": 0.01307997836978058, "learning_rate": 8.597207303974223e-06, "loss": 0.0001, "step": 3513 }, { "epoch": 0.22646130050911903, "grad_norm": 0.1307518360599829, "learning_rate": 8.596491228070176e-06, "loss": 0.0004, "step": 3514 }, { "epoch": 0.2265257459560482, "grad_norm": 0.016693761174956682, "learning_rate": 8.59577515216613e-06, "loss": 0.0, "step": 3515 }, { "epoch": 0.22659019140297737, "grad_norm": 0.033633965786489854, "learning_rate": 8.595059076262084e-06, "loss": 0.0001, "step": 3516 }, { "epoch": 0.22665463684990655, "grad_norm": 0.034370924494014785, "learning_rate": 8.594343000358038e-06, "loss": 0.0, "step": 3517 }, { "epoch": 0.22671908229683574, "grad_norm": 0.4692173535367172, "learning_rate": 8.593626924453993e-06, "loss": 0.002, "step": 3518 }, { "epoch": 0.2267835277437649, "grad_norm": 0.16348664515859737, "learning_rate": 8.592910848549947e-06, "loss": 0.0003, "step": 3519 }, { "epoch": 0.22684797319069408, "grad_norm": 0.02663825689310431, "learning_rate": 8.592194772645901e-06, "loss": 0.0002, "step": 3520 }, { "epoch": 0.22691241863762326, "grad_norm": 0.23243357524753472, "learning_rate": 8.591478696741856e-06, "loss": 0.0002, "step": 3521 }, { "epoch": 0.22697686408455242, "grad_norm": 0.00017325221716987894, "learning_rate": 8.59076262083781e-06, "loss": 0.0, "step": 3522 }, { "epoch": 0.2270413095314816, "grad_norm": 0.0010475463938276882, "learning_rate": 8.590046544933763e-06, "loss": 0.0, "step": 3523 }, { "epoch": 0.22710575497841076, "grad_norm": 0.0006956761215428136, "learning_rate": 8.589330469029717e-06, "loss": 0.0, "step": 3524 }, { "epoch": 0.22717020042533995, "grad_norm": 0.026205720830062656, "learning_rate": 8.588614393125671e-06, "loss": 0.0003, "step": 3525 }, { "epoch": 0.22723464587226913, "grad_norm": 0.007105951163493245, "learning_rate": 8.587898317221625e-06, "loss": 0.0, "step": 3526 }, { "epoch": 0.2272990913191983, "grad_norm": 0.0004057145569787648, "learning_rate": 8.58718224131758e-06, "loss": 0.0, "step": 3527 }, { "epoch": 0.22736353676612747, "grad_norm": 0.02197380725177132, "learning_rate": 8.586466165413536e-06, "loss": 0.0001, "step": 3528 }, { "epoch": 0.22742798221305666, "grad_norm": 0.001348719214099292, "learning_rate": 8.58575008950949e-06, "loss": 0.0, "step": 3529 }, { "epoch": 0.2274924276599858, "grad_norm": 0.010772461928954678, "learning_rate": 8.585034013605443e-06, "loss": 0.0001, "step": 3530 }, { "epoch": 0.227556873106915, "grad_norm": 0.08330517966318159, "learning_rate": 8.584317937701397e-06, "loss": 0.0019, "step": 3531 }, { "epoch": 0.22762131855384418, "grad_norm": 0.19010628847131272, "learning_rate": 8.583601861797351e-06, "loss": 0.0027, "step": 3532 }, { "epoch": 0.22768576400077334, "grad_norm": 0.0011382736201224818, "learning_rate": 8.582885785893306e-06, "loss": 0.0, "step": 3533 }, { "epoch": 0.22775020944770252, "grad_norm": 0.01510820616315032, "learning_rate": 8.58216970998926e-06, "loss": 0.0001, "step": 3534 }, { "epoch": 0.2278146548946317, "grad_norm": 0.0022582360408176425, "learning_rate": 8.581453634085214e-06, "loss": 0.0, "step": 3535 }, { "epoch": 0.22787910034156086, "grad_norm": 0.0024001167201978904, "learning_rate": 8.580737558181168e-06, "loss": 0.0, "step": 3536 }, { "epoch": 0.22794354578849005, "grad_norm": 0.00014460807293517424, "learning_rate": 8.580021482277123e-06, "loss": 0.0, "step": 3537 }, { "epoch": 0.2280079912354192, "grad_norm": 0.004371144356167792, "learning_rate": 8.579305406373077e-06, "loss": 0.0, "step": 3538 }, { "epoch": 0.2280724366823484, "grad_norm": 0.12752316946926834, "learning_rate": 8.57858933046903e-06, "loss": 0.0012, "step": 3539 }, { "epoch": 0.22813688212927757, "grad_norm": 6.635877081708909e-05, "learning_rate": 8.577873254564984e-06, "loss": 0.0, "step": 3540 }, { "epoch": 0.22820132757620673, "grad_norm": 0.035709400460215875, "learning_rate": 8.577157178660938e-06, "loss": 0.0, "step": 3541 }, { "epoch": 0.22826577302313591, "grad_norm": 0.043633021594940764, "learning_rate": 8.576441102756893e-06, "loss": 0.0, "step": 3542 }, { "epoch": 0.2283302184700651, "grad_norm": 0.198884992954439, "learning_rate": 8.575725026852847e-06, "loss": 0.0002, "step": 3543 }, { "epoch": 0.22839466391699426, "grad_norm": 0.03614280375678468, "learning_rate": 8.575008950948801e-06, "loss": 0.0001, "step": 3544 }, { "epoch": 0.22845910936392344, "grad_norm": 0.0005592311449027554, "learning_rate": 8.574292875044755e-06, "loss": 0.0, "step": 3545 }, { "epoch": 0.22852355481085262, "grad_norm": 0.0407157248262875, "learning_rate": 8.57357679914071e-06, "loss": 0.0001, "step": 3546 }, { "epoch": 0.22858800025778178, "grad_norm": 0.04273274208048958, "learning_rate": 8.572860723236664e-06, "loss": 0.0001, "step": 3547 }, { "epoch": 0.22865244570471097, "grad_norm": 0.00011219605860635525, "learning_rate": 8.572144647332618e-06, "loss": 0.0, "step": 3548 }, { "epoch": 0.22871689115164015, "grad_norm": 0.0011935116431631536, "learning_rate": 8.571428571428571e-06, "loss": 0.0, "step": 3549 }, { "epoch": 0.2287813365985693, "grad_norm": 0.041032109467459874, "learning_rate": 8.570712495524525e-06, "loss": 0.0005, "step": 3550 }, { "epoch": 0.2288457820454985, "grad_norm": 0.0033587200385650448, "learning_rate": 8.569996419620481e-06, "loss": 0.0, "step": 3551 }, { "epoch": 0.22891022749242765, "grad_norm": 0.25359755604274065, "learning_rate": 8.569280343716436e-06, "loss": 0.0005, "step": 3552 }, { "epoch": 0.22897467293935683, "grad_norm": 0.00030970704197332206, "learning_rate": 8.56856426781239e-06, "loss": 0.0, "step": 3553 }, { "epoch": 0.22903911838628602, "grad_norm": 0.003074699240476695, "learning_rate": 8.567848191908344e-06, "loss": 0.0, "step": 3554 }, { "epoch": 0.22910356383321517, "grad_norm": 0.04174109869575229, "learning_rate": 8.567132116004297e-06, "loss": 0.0001, "step": 3555 }, { "epoch": 0.22916800928014436, "grad_norm": 0.00035973275162782593, "learning_rate": 8.566416040100251e-06, "loss": 0.0, "step": 3556 }, { "epoch": 0.22923245472707354, "grad_norm": 0.17487531971851106, "learning_rate": 8.565699964196205e-06, "loss": 0.0078, "step": 3557 }, { "epoch": 0.2292969001740027, "grad_norm": 0.0021126300294926044, "learning_rate": 8.56498388829216e-06, "loss": 0.0, "step": 3558 }, { "epoch": 0.22936134562093188, "grad_norm": 0.0005282738112423214, "learning_rate": 8.564267812388114e-06, "loss": 0.0, "step": 3559 }, { "epoch": 0.22942579106786107, "grad_norm": 0.09196037584755035, "learning_rate": 8.563551736484068e-06, "loss": 0.0008, "step": 3560 }, { "epoch": 0.22949023651479022, "grad_norm": 0.0020641696227278188, "learning_rate": 8.562835660580023e-06, "loss": 0.0, "step": 3561 }, { "epoch": 0.2295546819617194, "grad_norm": 0.0005468917117046053, "learning_rate": 8.562119584675977e-06, "loss": 0.0, "step": 3562 }, { "epoch": 0.22961912740864857, "grad_norm": 0.1214281701890994, "learning_rate": 8.561403508771931e-06, "loss": 0.0002, "step": 3563 }, { "epoch": 0.22968357285557775, "grad_norm": 0.027091342936798053, "learning_rate": 8.560687432867885e-06, "loss": 0.0001, "step": 3564 }, { "epoch": 0.22974801830250693, "grad_norm": 0.003703475196088253, "learning_rate": 8.559971356963838e-06, "loss": 0.0, "step": 3565 }, { "epoch": 0.2298124637494361, "grad_norm": 0.0004199587784974058, "learning_rate": 8.559255281059792e-06, "loss": 0.0, "step": 3566 }, { "epoch": 0.22987690919636528, "grad_norm": 2.4043078821272714, "learning_rate": 8.558539205155747e-06, "loss": 0.0121, "step": 3567 }, { "epoch": 0.22994135464329446, "grad_norm": 0.00202259349133905, "learning_rate": 8.557823129251701e-06, "loss": 0.0, "step": 3568 }, { "epoch": 0.23000580009022362, "grad_norm": 0.3955692008143024, "learning_rate": 8.557107053347655e-06, "loss": 0.0015, "step": 3569 }, { "epoch": 0.2300702455371528, "grad_norm": 0.10374625174088346, "learning_rate": 8.55639097744361e-06, "loss": 0.0004, "step": 3570 }, { "epoch": 0.23013469098408199, "grad_norm": 0.0004160911240873582, "learning_rate": 8.555674901539564e-06, "loss": 0.0, "step": 3571 }, { "epoch": 0.23019913643101114, "grad_norm": 0.38638667095974427, "learning_rate": 8.554958825635518e-06, "loss": 0.0014, "step": 3572 }, { "epoch": 0.23026358187794033, "grad_norm": 0.0017868331589009291, "learning_rate": 8.554242749731472e-06, "loss": 0.0, "step": 3573 }, { "epoch": 0.2303280273248695, "grad_norm": 0.0008067980443222023, "learning_rate": 8.553526673827427e-06, "loss": 0.0, "step": 3574 }, { "epoch": 0.23039247277179867, "grad_norm": 0.14120487685539893, "learning_rate": 8.552810597923381e-06, "loss": 0.0002, "step": 3575 }, { "epoch": 0.23045691821872785, "grad_norm": 0.01688292958076883, "learning_rate": 8.552094522019335e-06, "loss": 0.0001, "step": 3576 }, { "epoch": 0.230521363665657, "grad_norm": 0.016395282165946354, "learning_rate": 8.55137844611529e-06, "loss": 0.0, "step": 3577 }, { "epoch": 0.2305858091125862, "grad_norm": 0.0002731364490621126, "learning_rate": 8.550662370211244e-06, "loss": 0.0, "step": 3578 }, { "epoch": 0.23065025455951538, "grad_norm": 0.0024728902830418137, "learning_rate": 8.549946294307198e-06, "loss": 0.0, "step": 3579 }, { "epoch": 0.23071470000644453, "grad_norm": 0.0009555167639653805, "learning_rate": 8.549230218403152e-06, "loss": 0.0, "step": 3580 }, { "epoch": 0.23077914545337372, "grad_norm": 0.10064114749816049, "learning_rate": 8.548514142499105e-06, "loss": 0.0001, "step": 3581 }, { "epoch": 0.2308435909003029, "grad_norm": 0.10534812510748648, "learning_rate": 8.54779806659506e-06, "loss": 0.0002, "step": 3582 }, { "epoch": 0.23090803634723206, "grad_norm": 0.003143158927519917, "learning_rate": 8.547081990691014e-06, "loss": 0.0, "step": 3583 }, { "epoch": 0.23097248179416124, "grad_norm": 0.0013283154727609836, "learning_rate": 8.546365914786968e-06, "loss": 0.0, "step": 3584 }, { "epoch": 0.23103692724109043, "grad_norm": 0.0009492733557983951, "learning_rate": 8.545649838882922e-06, "loss": 0.0, "step": 3585 }, { "epoch": 0.23110137268801959, "grad_norm": 0.000137084553934003, "learning_rate": 8.544933762978877e-06, "loss": 0.0, "step": 3586 }, { "epoch": 0.23116581813494877, "grad_norm": 0.013543951428487857, "learning_rate": 8.544217687074831e-06, "loss": 0.0001, "step": 3587 }, { "epoch": 0.23123026358187795, "grad_norm": 0.004368855218247446, "learning_rate": 8.543501611170785e-06, "loss": 0.0, "step": 3588 }, { "epoch": 0.2312947090288071, "grad_norm": 0.08383096778560901, "learning_rate": 8.54278553526674e-06, "loss": 0.0001, "step": 3589 }, { "epoch": 0.2313591544757363, "grad_norm": 0.014596576080533902, "learning_rate": 8.542069459362694e-06, "loss": 0.0001, "step": 3590 }, { "epoch": 0.23142359992266545, "grad_norm": 0.003054285641641687, "learning_rate": 8.541353383458646e-06, "loss": 0.0, "step": 3591 }, { "epoch": 0.23148804536959464, "grad_norm": 0.010529457621076216, "learning_rate": 8.5406373075546e-06, "loss": 0.0001, "step": 3592 }, { "epoch": 0.23155249081652382, "grad_norm": 0.03386443048252686, "learning_rate": 8.539921231650555e-06, "loss": 0.0001, "step": 3593 }, { "epoch": 0.23161693626345298, "grad_norm": 0.000382320509569171, "learning_rate": 8.53920515574651e-06, "loss": 0.0, "step": 3594 }, { "epoch": 0.23168138171038216, "grad_norm": 0.03725659818404199, "learning_rate": 8.538489079842464e-06, "loss": 0.0, "step": 3595 }, { "epoch": 0.23174582715731135, "grad_norm": 0.002715356890662232, "learning_rate": 8.537773003938418e-06, "loss": 0.0, "step": 3596 }, { "epoch": 0.2318102726042405, "grad_norm": 0.0005005922412607236, "learning_rate": 8.537056928034372e-06, "loss": 0.0, "step": 3597 }, { "epoch": 0.2318747180511697, "grad_norm": 0.23895372802173373, "learning_rate": 8.536340852130326e-06, "loss": 0.0005, "step": 3598 }, { "epoch": 0.23193916349809887, "grad_norm": 0.0035535297376758264, "learning_rate": 8.53562477622628e-06, "loss": 0.0, "step": 3599 }, { "epoch": 0.23200360894502803, "grad_norm": 1.192982066141991, "learning_rate": 8.534908700322235e-06, "loss": 0.0016, "step": 3600 }, { "epoch": 0.2320680543919572, "grad_norm": 0.0006909154871493346, "learning_rate": 8.53419262441819e-06, "loss": 0.0, "step": 3601 }, { "epoch": 0.23213249983888637, "grad_norm": 0.003394634640801154, "learning_rate": 8.533476548514144e-06, "loss": 0.0, "step": 3602 }, { "epoch": 0.23219694528581555, "grad_norm": 0.16565397364526332, "learning_rate": 8.532760472610098e-06, "loss": 0.0029, "step": 3603 }, { "epoch": 0.23226139073274474, "grad_norm": 0.0007924288655583169, "learning_rate": 8.532044396706052e-06, "loss": 0.0, "step": 3604 }, { "epoch": 0.2323258361796739, "grad_norm": 0.0008002846613981609, "learning_rate": 8.531328320802007e-06, "loss": 0.0, "step": 3605 }, { "epoch": 0.23239028162660308, "grad_norm": 0.028553978107853775, "learning_rate": 8.530612244897961e-06, "loss": 0.0001, "step": 3606 }, { "epoch": 0.23245472707353226, "grad_norm": 0.018178990149575903, "learning_rate": 8.529896168993913e-06, "loss": 0.0001, "step": 3607 }, { "epoch": 0.23251917252046142, "grad_norm": 0.026175255165739573, "learning_rate": 8.529180093089868e-06, "loss": 0.0, "step": 3608 }, { "epoch": 0.2325836179673906, "grad_norm": 0.00863108323449065, "learning_rate": 8.528464017185822e-06, "loss": 0.0, "step": 3609 }, { "epoch": 0.2326480634143198, "grad_norm": 0.00367795649645988, "learning_rate": 8.527747941281776e-06, "loss": 0.0, "step": 3610 }, { "epoch": 0.23271250886124895, "grad_norm": 0.023703811777085085, "learning_rate": 8.52703186537773e-06, "loss": 0.0001, "step": 3611 }, { "epoch": 0.23277695430817813, "grad_norm": 0.41825952843291414, "learning_rate": 8.526315789473685e-06, "loss": 0.0023, "step": 3612 }, { "epoch": 0.23284139975510731, "grad_norm": 0.0002994700953356274, "learning_rate": 8.52559971356964e-06, "loss": 0.0, "step": 3613 }, { "epoch": 0.23290584520203647, "grad_norm": 0.03925458351539499, "learning_rate": 8.524883637665594e-06, "loss": 0.0004, "step": 3614 }, { "epoch": 0.23297029064896566, "grad_norm": 0.010272126599741443, "learning_rate": 8.524167561761548e-06, "loss": 0.0001, "step": 3615 }, { "epoch": 0.2330347360958948, "grad_norm": 0.0008049833577190571, "learning_rate": 8.5234514858575e-06, "loss": 0.0, "step": 3616 }, { "epoch": 0.233099181542824, "grad_norm": 0.00012036471779414499, "learning_rate": 8.522735409953455e-06, "loss": 0.0, "step": 3617 }, { "epoch": 0.23316362698975318, "grad_norm": 0.0011984086577387977, "learning_rate": 8.522019334049409e-06, "loss": 0.0, "step": 3618 }, { "epoch": 0.23322807243668234, "grad_norm": 0.0835934790491009, "learning_rate": 8.521303258145363e-06, "loss": 0.0006, "step": 3619 }, { "epoch": 0.23329251788361152, "grad_norm": 0.08836004941212419, "learning_rate": 8.520587182241318e-06, "loss": 0.0002, "step": 3620 }, { "epoch": 0.2333569633305407, "grad_norm": 0.18029412205686446, "learning_rate": 8.519871106337274e-06, "loss": 0.0038, "step": 3621 }, { "epoch": 0.23342140877746986, "grad_norm": 0.00026738501476959226, "learning_rate": 8.519155030433228e-06, "loss": 0.0, "step": 3622 }, { "epoch": 0.23348585422439905, "grad_norm": 0.005370139991002869, "learning_rate": 8.51843895452918e-06, "loss": 0.0001, "step": 3623 }, { "epoch": 0.23355029967132823, "grad_norm": 0.2452207171892442, "learning_rate": 8.517722878625135e-06, "loss": 0.0005, "step": 3624 }, { "epoch": 0.2336147451182574, "grad_norm": 0.0010621327473918718, "learning_rate": 8.517006802721089e-06, "loss": 0.0, "step": 3625 }, { "epoch": 0.23367919056518657, "grad_norm": 0.00016653019881507862, "learning_rate": 8.516290726817043e-06, "loss": 0.0, "step": 3626 }, { "epoch": 0.23374363601211576, "grad_norm": 0.06785625871955342, "learning_rate": 8.515574650912998e-06, "loss": 0.0005, "step": 3627 }, { "epoch": 0.23380808145904491, "grad_norm": 1.7781309962381047, "learning_rate": 8.514858575008952e-06, "loss": 0.0043, "step": 3628 }, { "epoch": 0.2338725269059741, "grad_norm": 0.00018354352976572647, "learning_rate": 8.514142499104906e-06, "loss": 0.0, "step": 3629 }, { "epoch": 0.23393697235290326, "grad_norm": 0.0003587856822891483, "learning_rate": 8.51342642320086e-06, "loss": 0.0, "step": 3630 }, { "epoch": 0.23400141779983244, "grad_norm": 0.003481401644091111, "learning_rate": 8.512710347296815e-06, "loss": 0.0, "step": 3631 }, { "epoch": 0.23406586324676162, "grad_norm": 0.0014548914336011787, "learning_rate": 8.511994271392768e-06, "loss": 0.0, "step": 3632 }, { "epoch": 0.23413030869369078, "grad_norm": 0.02106593887574402, "learning_rate": 8.511278195488722e-06, "loss": 0.0001, "step": 3633 }, { "epoch": 0.23419475414061997, "grad_norm": 0.1377033136626864, "learning_rate": 8.510562119584676e-06, "loss": 0.002, "step": 3634 }, { "epoch": 0.23425919958754915, "grad_norm": 0.04813451217798247, "learning_rate": 8.50984604368063e-06, "loss": 0.0002, "step": 3635 }, { "epoch": 0.2343236450344783, "grad_norm": 0.012981266824628336, "learning_rate": 8.509129967776585e-06, "loss": 0.0, "step": 3636 }, { "epoch": 0.2343880904814075, "grad_norm": 0.12003216797220177, "learning_rate": 8.508413891872539e-06, "loss": 0.0001, "step": 3637 }, { "epoch": 0.23445253592833667, "grad_norm": 0.003240766907488695, "learning_rate": 8.507697815968493e-06, "loss": 0.0, "step": 3638 }, { "epoch": 0.23451698137526583, "grad_norm": 0.07065803673474678, "learning_rate": 8.506981740064448e-06, "loss": 0.0003, "step": 3639 }, { "epoch": 0.23458142682219502, "grad_norm": 0.8096147071804877, "learning_rate": 8.506265664160402e-06, "loss": 0.0021, "step": 3640 }, { "epoch": 0.23464587226912417, "grad_norm": 0.00021237418484407522, "learning_rate": 8.505549588256356e-06, "loss": 0.0, "step": 3641 }, { "epoch": 0.23471031771605336, "grad_norm": 0.002760565859295997, "learning_rate": 8.504833512352309e-06, "loss": 0.0, "step": 3642 }, { "epoch": 0.23477476316298254, "grad_norm": 0.004335590145388964, "learning_rate": 8.504117436448263e-06, "loss": 0.0001, "step": 3643 }, { "epoch": 0.2348392086099117, "grad_norm": 0.3139593026011861, "learning_rate": 8.503401360544217e-06, "loss": 0.0005, "step": 3644 }, { "epoch": 0.23490365405684088, "grad_norm": 0.15569056856024935, "learning_rate": 8.502685284640173e-06, "loss": 0.0021, "step": 3645 }, { "epoch": 0.23496809950377007, "grad_norm": 0.0906748991919936, "learning_rate": 8.501969208736128e-06, "loss": 0.0001, "step": 3646 }, { "epoch": 0.23503254495069922, "grad_norm": 0.014439834483945311, "learning_rate": 8.501253132832082e-06, "loss": 0.0001, "step": 3647 }, { "epoch": 0.2350969903976284, "grad_norm": 0.019942436114608407, "learning_rate": 8.500537056928035e-06, "loss": 0.0001, "step": 3648 }, { "epoch": 0.2351614358445576, "grad_norm": 0.0053793905060603225, "learning_rate": 8.499820981023989e-06, "loss": 0.0, "step": 3649 }, { "epoch": 0.23522588129148675, "grad_norm": 0.00658721510723763, "learning_rate": 8.499104905119943e-06, "loss": 0.0001, "step": 3650 }, { "epoch": 0.23529032673841593, "grad_norm": 0.0024141843857270616, "learning_rate": 8.498388829215898e-06, "loss": 0.0, "step": 3651 }, { "epoch": 0.23535477218534512, "grad_norm": 0.00412149132628881, "learning_rate": 8.497672753311852e-06, "loss": 0.0, "step": 3652 }, { "epoch": 0.23541921763227427, "grad_norm": 0.0017195526767637631, "learning_rate": 8.496956677407806e-06, "loss": 0.0, "step": 3653 }, { "epoch": 0.23548366307920346, "grad_norm": 0.0001600318501965816, "learning_rate": 8.49624060150376e-06, "loss": 0.0, "step": 3654 }, { "epoch": 0.23554810852613262, "grad_norm": 0.0015997134786524389, "learning_rate": 8.495524525599715e-06, "loss": 0.0, "step": 3655 }, { "epoch": 0.2356125539730618, "grad_norm": 0.00019996158198574133, "learning_rate": 8.494808449695669e-06, "loss": 0.0, "step": 3656 }, { "epoch": 0.23567699941999098, "grad_norm": 0.022086918620967465, "learning_rate": 8.494092373791623e-06, "loss": 0.0001, "step": 3657 }, { "epoch": 0.23574144486692014, "grad_norm": 0.010196585178414033, "learning_rate": 8.493376297887576e-06, "loss": 0.0, "step": 3658 }, { "epoch": 0.23580589031384933, "grad_norm": 0.006909177117970666, "learning_rate": 8.49266022198353e-06, "loss": 0.0, "step": 3659 }, { "epoch": 0.2358703357607785, "grad_norm": 2.2550082099579014, "learning_rate": 8.491944146079485e-06, "loss": 0.015, "step": 3660 }, { "epoch": 0.23593478120770767, "grad_norm": 0.023382369159834774, "learning_rate": 8.491228070175439e-06, "loss": 0.0002, "step": 3661 }, { "epoch": 0.23599922665463685, "grad_norm": 0.6735246278908694, "learning_rate": 8.490511994271393e-06, "loss": 0.0021, "step": 3662 }, { "epoch": 0.23606367210156604, "grad_norm": 0.0016992829250492205, "learning_rate": 8.489795918367347e-06, "loss": 0.0, "step": 3663 }, { "epoch": 0.2361281175484952, "grad_norm": 0.14514165931232095, "learning_rate": 8.489079842463302e-06, "loss": 0.0003, "step": 3664 }, { "epoch": 0.23619256299542438, "grad_norm": 0.056902135808290705, "learning_rate": 8.488363766559256e-06, "loss": 0.0006, "step": 3665 }, { "epoch": 0.23625700844235356, "grad_norm": 0.00035760265499879115, "learning_rate": 8.48764769065521e-06, "loss": 0.0, "step": 3666 }, { "epoch": 0.23632145388928272, "grad_norm": 0.015011496814008625, "learning_rate": 8.486931614751165e-06, "loss": 0.0001, "step": 3667 }, { "epoch": 0.2363858993362119, "grad_norm": 0.00026337167364298936, "learning_rate": 8.486215538847119e-06, "loss": 0.0, "step": 3668 }, { "epoch": 0.23645034478314106, "grad_norm": 0.0029167551721233755, "learning_rate": 8.485499462943073e-06, "loss": 0.0, "step": 3669 }, { "epoch": 0.23651479023007024, "grad_norm": 0.01064145374267631, "learning_rate": 8.484783387039028e-06, "loss": 0.0, "step": 3670 }, { "epoch": 0.23657923567699943, "grad_norm": 0.002452641484903878, "learning_rate": 8.484067311134982e-06, "loss": 0.0, "step": 3671 }, { "epoch": 0.23664368112392858, "grad_norm": 0.009032862405537249, "learning_rate": 8.483351235230936e-06, "loss": 0.0001, "step": 3672 }, { "epoch": 0.23670812657085777, "grad_norm": 0.000362237704258534, "learning_rate": 8.48263515932689e-06, "loss": 0.0, "step": 3673 }, { "epoch": 0.23677257201778695, "grad_norm": 0.0007830415697379507, "learning_rate": 8.481919083422843e-06, "loss": 0.0, "step": 3674 }, { "epoch": 0.2368370174647161, "grad_norm": 0.0008006848452688169, "learning_rate": 8.481203007518797e-06, "loss": 0.0, "step": 3675 }, { "epoch": 0.2369014629116453, "grad_norm": 1.025715803628338, "learning_rate": 8.480486931614752e-06, "loss": 0.005, "step": 3676 }, { "epoch": 0.23696590835857448, "grad_norm": 0.06137980239797591, "learning_rate": 8.479770855710706e-06, "loss": 0.0002, "step": 3677 }, { "epoch": 0.23703035380550364, "grad_norm": 0.37055178769912805, "learning_rate": 8.47905477980666e-06, "loss": 0.0035, "step": 3678 }, { "epoch": 0.23709479925243282, "grad_norm": 0.00016133238319453695, "learning_rate": 8.478338703902615e-06, "loss": 0.0, "step": 3679 }, { "epoch": 0.23715924469936198, "grad_norm": 0.45889901524037224, "learning_rate": 8.477622627998569e-06, "loss": 0.0006, "step": 3680 }, { "epoch": 0.23722369014629116, "grad_norm": 0.04829375159957358, "learning_rate": 8.476906552094523e-06, "loss": 0.0001, "step": 3681 }, { "epoch": 0.23728813559322035, "grad_norm": 0.04483302025827112, "learning_rate": 8.476190476190477e-06, "loss": 0.0, "step": 3682 }, { "epoch": 0.2373525810401495, "grad_norm": 0.006915118970124431, "learning_rate": 8.475474400286432e-06, "loss": 0.0015, "step": 3683 }, { "epoch": 0.2374170264870787, "grad_norm": 0.029743070065202375, "learning_rate": 8.474758324382384e-06, "loss": 0.0001, "step": 3684 }, { "epoch": 0.23748147193400787, "grad_norm": 0.010346678082478153, "learning_rate": 8.474042248478339e-06, "loss": 0.0001, "step": 3685 }, { "epoch": 0.23754591738093703, "grad_norm": 0.003808709571335096, "learning_rate": 8.473326172574293e-06, "loss": 0.0, "step": 3686 }, { "epoch": 0.2376103628278662, "grad_norm": 0.03439312438102102, "learning_rate": 8.472610096670247e-06, "loss": 0.0001, "step": 3687 }, { "epoch": 0.2376748082747954, "grad_norm": 0.0006371055023281539, "learning_rate": 8.471894020766201e-06, "loss": 0.0, "step": 3688 }, { "epoch": 0.23773925372172455, "grad_norm": 0.008961421343413054, "learning_rate": 8.471177944862156e-06, "loss": 0.0, "step": 3689 }, { "epoch": 0.23780369916865374, "grad_norm": 0.01069256581449394, "learning_rate": 8.47046186895811e-06, "loss": 0.0, "step": 3690 }, { "epoch": 0.23786814461558292, "grad_norm": 0.07071569525956135, "learning_rate": 8.469745793054064e-06, "loss": 0.0005, "step": 3691 }, { "epoch": 0.23793259006251208, "grad_norm": 0.001481707000711012, "learning_rate": 8.469029717150019e-06, "loss": 0.0, "step": 3692 }, { "epoch": 0.23799703550944126, "grad_norm": 0.02562734064927472, "learning_rate": 8.468313641245973e-06, "loss": 0.0004, "step": 3693 }, { "epoch": 0.23806148095637042, "grad_norm": 0.009852533383060125, "learning_rate": 8.467597565341927e-06, "loss": 0.0, "step": 3694 }, { "epoch": 0.2381259264032996, "grad_norm": 0.024841071931428666, "learning_rate": 8.466881489437882e-06, "loss": 0.0, "step": 3695 }, { "epoch": 0.2381903718502288, "grad_norm": 0.0019675953712899397, "learning_rate": 8.466165413533836e-06, "loss": 0.0, "step": 3696 }, { "epoch": 0.23825481729715794, "grad_norm": 0.005357394982959551, "learning_rate": 8.46544933762979e-06, "loss": 0.0001, "step": 3697 }, { "epoch": 0.23831926274408713, "grad_norm": 0.018464397355722775, "learning_rate": 8.464733261725744e-06, "loss": 0.0002, "step": 3698 }, { "epoch": 0.2383837081910163, "grad_norm": 0.030809908730295932, "learning_rate": 8.464017185821699e-06, "loss": 0.0004, "step": 3699 }, { "epoch": 0.23844815363794547, "grad_norm": 0.0016479319719454062, "learning_rate": 8.463301109917651e-06, "loss": 0.0, "step": 3700 }, { "epoch": 0.23851259908487465, "grad_norm": 0.0053921943411062425, "learning_rate": 8.462585034013606e-06, "loss": 0.0, "step": 3701 }, { "epoch": 0.23857704453180384, "grad_norm": 0.010372835272828807, "learning_rate": 8.46186895810956e-06, "loss": 0.0, "step": 3702 }, { "epoch": 0.238641489978733, "grad_norm": 0.009654674728061838, "learning_rate": 8.461152882205514e-06, "loss": 0.0, "step": 3703 }, { "epoch": 0.23870593542566218, "grad_norm": 0.0007935941029292266, "learning_rate": 8.460436806301469e-06, "loss": 0.0, "step": 3704 }, { "epoch": 0.23877038087259136, "grad_norm": 0.0005360464652635607, "learning_rate": 8.459720730397423e-06, "loss": 0.0, "step": 3705 }, { "epoch": 0.23883482631952052, "grad_norm": 0.0045528072264818704, "learning_rate": 8.459004654493377e-06, "loss": 0.0, "step": 3706 }, { "epoch": 0.2388992717664497, "grad_norm": 0.0004918566041346934, "learning_rate": 8.458288578589331e-06, "loss": 0.0, "step": 3707 }, { "epoch": 0.23896371721337886, "grad_norm": 0.01280604837715533, "learning_rate": 8.457572502685286e-06, "loss": 0.0, "step": 3708 }, { "epoch": 0.23902816266030805, "grad_norm": 0.014364710962126858, "learning_rate": 8.456856426781238e-06, "loss": 0.0001, "step": 3709 }, { "epoch": 0.23909260810723723, "grad_norm": 0.002629268415838148, "learning_rate": 8.456140350877193e-06, "loss": 0.0, "step": 3710 }, { "epoch": 0.2391570535541664, "grad_norm": 0.07447493721494977, "learning_rate": 8.455424274973147e-06, "loss": 0.0008, "step": 3711 }, { "epoch": 0.23922149900109557, "grad_norm": 0.5730290447183728, "learning_rate": 8.454708199069101e-06, "loss": 0.0059, "step": 3712 }, { "epoch": 0.23928594444802476, "grad_norm": 0.0012635572436632849, "learning_rate": 8.453992123165056e-06, "loss": 0.0, "step": 3713 }, { "epoch": 0.2393503898949539, "grad_norm": 0.024047417080047616, "learning_rate": 8.45327604726101e-06, "loss": 0.0, "step": 3714 }, { "epoch": 0.2394148353418831, "grad_norm": 0.2183628402735261, "learning_rate": 8.452559971356966e-06, "loss": 0.0009, "step": 3715 }, { "epoch": 0.23947928078881228, "grad_norm": 0.0006938746252782697, "learning_rate": 8.451843895452918e-06, "loss": 0.0, "step": 3716 }, { "epoch": 0.23954372623574144, "grad_norm": 0.0038200120856466476, "learning_rate": 8.451127819548873e-06, "loss": 0.0, "step": 3717 }, { "epoch": 0.23960817168267062, "grad_norm": 0.21408100848688394, "learning_rate": 8.450411743644827e-06, "loss": 0.0116, "step": 3718 }, { "epoch": 0.2396726171295998, "grad_norm": 0.0025670123515598884, "learning_rate": 8.449695667740781e-06, "loss": 0.0, "step": 3719 }, { "epoch": 0.23973706257652896, "grad_norm": 0.017562171034058886, "learning_rate": 8.448979591836736e-06, "loss": 0.0, "step": 3720 }, { "epoch": 0.23980150802345815, "grad_norm": 0.0006715029546785951, "learning_rate": 8.44826351593269e-06, "loss": 0.0, "step": 3721 }, { "epoch": 0.2398659534703873, "grad_norm": 0.03602618355161491, "learning_rate": 8.447547440028644e-06, "loss": 0.0001, "step": 3722 }, { "epoch": 0.2399303989173165, "grad_norm": 0.018207445749766946, "learning_rate": 8.446831364124599e-06, "loss": 0.0001, "step": 3723 }, { "epoch": 0.23999484436424567, "grad_norm": 0.12231537824269463, "learning_rate": 8.446115288220553e-06, "loss": 0.0002, "step": 3724 }, { "epoch": 0.24005928981117483, "grad_norm": 0.11532271765071904, "learning_rate": 8.445399212316505e-06, "loss": 0.0019, "step": 3725 }, { "epoch": 0.24012373525810402, "grad_norm": 2.264262664560703, "learning_rate": 8.44468313641246e-06, "loss": 0.0274, "step": 3726 }, { "epoch": 0.2401881807050332, "grad_norm": 0.0012899088975698734, "learning_rate": 8.443967060508414e-06, "loss": 0.0, "step": 3727 }, { "epoch": 0.24025262615196236, "grad_norm": 0.01366843452761101, "learning_rate": 8.443250984604368e-06, "loss": 0.0, "step": 3728 }, { "epoch": 0.24031707159889154, "grad_norm": 0.15249607222806652, "learning_rate": 8.442534908700323e-06, "loss": 0.0006, "step": 3729 }, { "epoch": 0.24038151704582073, "grad_norm": 0.021124642403800617, "learning_rate": 8.441818832796277e-06, "loss": 0.0, "step": 3730 }, { "epoch": 0.24044596249274988, "grad_norm": 0.12230660647504508, "learning_rate": 8.441102756892231e-06, "loss": 0.0011, "step": 3731 }, { "epoch": 0.24051040793967907, "grad_norm": 0.9040274496763755, "learning_rate": 8.440386680988186e-06, "loss": 0.0071, "step": 3732 }, { "epoch": 0.24057485338660822, "grad_norm": 0.0016440136482255797, "learning_rate": 8.43967060508414e-06, "loss": 0.0, "step": 3733 }, { "epoch": 0.2406392988335374, "grad_norm": 0.003047586958538098, "learning_rate": 8.438954529180094e-06, "loss": 0.0, "step": 3734 }, { "epoch": 0.2407037442804666, "grad_norm": 0.0009673710376870563, "learning_rate": 8.438238453276047e-06, "loss": 0.0, "step": 3735 }, { "epoch": 0.24076818972739575, "grad_norm": 0.016850687581852576, "learning_rate": 8.437522377372001e-06, "loss": 0.0001, "step": 3736 }, { "epoch": 0.24083263517432493, "grad_norm": 0.007526774476757326, "learning_rate": 8.436806301467955e-06, "loss": 0.0001, "step": 3737 }, { "epoch": 0.24089708062125412, "grad_norm": 0.13067564235859247, "learning_rate": 8.436090225563911e-06, "loss": 0.0003, "step": 3738 }, { "epoch": 0.24096152606818327, "grad_norm": 0.10502113619162073, "learning_rate": 8.435374149659866e-06, "loss": 0.0004, "step": 3739 }, { "epoch": 0.24102597151511246, "grad_norm": 0.05960794534318693, "learning_rate": 8.43465807375582e-06, "loss": 0.0001, "step": 3740 }, { "epoch": 0.24109041696204164, "grad_norm": 0.04238541013913969, "learning_rate": 8.433941997851773e-06, "loss": 0.0003, "step": 3741 }, { "epoch": 0.2411548624089708, "grad_norm": 0.018773961960864012, "learning_rate": 8.433225921947727e-06, "loss": 0.0016, "step": 3742 }, { "epoch": 0.24121930785589998, "grad_norm": 0.002479640335606827, "learning_rate": 8.432509846043681e-06, "loss": 0.0, "step": 3743 }, { "epoch": 0.24128375330282917, "grad_norm": 0.06248413383738307, "learning_rate": 8.431793770139635e-06, "loss": 0.0001, "step": 3744 }, { "epoch": 0.24134819874975832, "grad_norm": 0.0005678648591910123, "learning_rate": 8.43107769423559e-06, "loss": 0.0, "step": 3745 }, { "epoch": 0.2414126441966875, "grad_norm": 0.0013594156974299353, "learning_rate": 8.430361618331544e-06, "loss": 0.0, "step": 3746 }, { "epoch": 0.24147708964361667, "grad_norm": 0.0005139733092100992, "learning_rate": 8.429645542427498e-06, "loss": 0.0, "step": 3747 }, { "epoch": 0.24154153509054585, "grad_norm": 0.08670038037296418, "learning_rate": 8.428929466523453e-06, "loss": 0.0001, "step": 3748 }, { "epoch": 0.24160598053747503, "grad_norm": 0.8153278218675962, "learning_rate": 8.428213390619407e-06, "loss": 0.0049, "step": 3749 }, { "epoch": 0.2416704259844042, "grad_norm": 0.0035986476689472626, "learning_rate": 8.427497314715361e-06, "loss": 0.0, "step": 3750 }, { "epoch": 0.24173487143133338, "grad_norm": 0.00044391004078189354, "learning_rate": 8.426781238811314e-06, "loss": 0.0, "step": 3751 }, { "epoch": 0.24179931687826256, "grad_norm": 0.01262247356549068, "learning_rate": 8.426065162907268e-06, "loss": 0.0001, "step": 3752 }, { "epoch": 0.24186376232519172, "grad_norm": 0.02262652431529532, "learning_rate": 8.425349087003222e-06, "loss": 0.0, "step": 3753 }, { "epoch": 0.2419282077721209, "grad_norm": 0.0008362116585841875, "learning_rate": 8.424633011099177e-06, "loss": 0.0, "step": 3754 }, { "epoch": 0.24199265321905009, "grad_norm": 0.005311226408071963, "learning_rate": 8.423916935195131e-06, "loss": 0.0, "step": 3755 }, { "epoch": 0.24205709866597924, "grad_norm": 7.897391349834373e-05, "learning_rate": 8.423200859291085e-06, "loss": 0.0, "step": 3756 }, { "epoch": 0.24212154411290843, "grad_norm": 0.007048575324203542, "learning_rate": 8.42248478338704e-06, "loss": 0.0001, "step": 3757 }, { "epoch": 0.2421859895598376, "grad_norm": 0.002582242938985846, "learning_rate": 8.421768707482994e-06, "loss": 0.0, "step": 3758 }, { "epoch": 0.24225043500676677, "grad_norm": 0.0048435534680228, "learning_rate": 8.421052631578948e-06, "loss": 0.0, "step": 3759 }, { "epoch": 0.24231488045369595, "grad_norm": 0.4563282507589461, "learning_rate": 8.420336555674903e-06, "loss": 0.0009, "step": 3760 }, { "epoch": 0.2423793259006251, "grad_norm": 0.002779418637291888, "learning_rate": 8.419620479770857e-06, "loss": 0.0, "step": 3761 }, { "epoch": 0.2424437713475543, "grad_norm": 0.0027071330977519806, "learning_rate": 8.418904403866811e-06, "loss": 0.0, "step": 3762 }, { "epoch": 0.24250821679448348, "grad_norm": 0.010813946842943904, "learning_rate": 8.418188327962765e-06, "loss": 0.0001, "step": 3763 }, { "epoch": 0.24257266224141263, "grad_norm": 0.4170509314868395, "learning_rate": 8.41747225205872e-06, "loss": 0.0019, "step": 3764 }, { "epoch": 0.24263710768834182, "grad_norm": 0.005691707930402684, "learning_rate": 8.416756176154674e-06, "loss": 0.0, "step": 3765 }, { "epoch": 0.242701553135271, "grad_norm": 0.03813354713701013, "learning_rate": 8.416040100250628e-06, "loss": 0.0004, "step": 3766 }, { "epoch": 0.24276599858220016, "grad_norm": 0.032524933022695664, "learning_rate": 8.415324024346581e-06, "loss": 0.0003, "step": 3767 }, { "epoch": 0.24283044402912934, "grad_norm": 0.004065155547747669, "learning_rate": 8.414607948442535e-06, "loss": 0.0, "step": 3768 }, { "epoch": 0.24289488947605853, "grad_norm": 0.25707170231896603, "learning_rate": 8.41389187253849e-06, "loss": 0.0003, "step": 3769 }, { "epoch": 0.24295933492298769, "grad_norm": 0.9013399427931535, "learning_rate": 8.413175796634444e-06, "loss": 0.0075, "step": 3770 }, { "epoch": 0.24302378036991687, "grad_norm": 0.0021442310344541708, "learning_rate": 8.412459720730398e-06, "loss": 0.0, "step": 3771 }, { "epoch": 0.24308822581684603, "grad_norm": 0.10145635743766157, "learning_rate": 8.411743644826352e-06, "loss": 0.0005, "step": 3772 }, { "epoch": 0.2431526712637752, "grad_norm": 0.0016105101427803788, "learning_rate": 8.411027568922307e-06, "loss": 0.0, "step": 3773 }, { "epoch": 0.2432171167107044, "grad_norm": 0.00012493315542511565, "learning_rate": 8.410311493018261e-06, "loss": 0.0, "step": 3774 }, { "epoch": 0.24328156215763355, "grad_norm": 0.019223677016477023, "learning_rate": 8.409595417114215e-06, "loss": 0.0001, "step": 3775 }, { "epoch": 0.24334600760456274, "grad_norm": 0.005873850051817299, "learning_rate": 8.40887934121017e-06, "loss": 0.0001, "step": 3776 }, { "epoch": 0.24341045305149192, "grad_norm": 0.011179190813958233, "learning_rate": 8.408163265306122e-06, "loss": 0.0001, "step": 3777 }, { "epoch": 0.24347489849842108, "grad_norm": 0.0015477369522274402, "learning_rate": 8.407447189402077e-06, "loss": 0.0, "step": 3778 }, { "epoch": 0.24353934394535026, "grad_norm": 0.0017133007265412168, "learning_rate": 8.40673111349803e-06, "loss": 0.0, "step": 3779 }, { "epoch": 0.24360378939227945, "grad_norm": 0.0008757344192784564, "learning_rate": 8.406015037593985e-06, "loss": 0.0, "step": 3780 }, { "epoch": 0.2436682348392086, "grad_norm": 0.007403871914327963, "learning_rate": 8.40529896168994e-06, "loss": 0.0, "step": 3781 }, { "epoch": 0.2437326802861378, "grad_norm": 0.010201067541391566, "learning_rate": 8.404582885785894e-06, "loss": 0.0, "step": 3782 }, { "epoch": 0.24379712573306697, "grad_norm": 0.015937982305306662, "learning_rate": 8.403866809881848e-06, "loss": 0.0, "step": 3783 }, { "epoch": 0.24386157117999613, "grad_norm": 0.028991708253557905, "learning_rate": 8.403150733977802e-06, "loss": 0.0002, "step": 3784 }, { "epoch": 0.2439260166269253, "grad_norm": 0.04578634280546854, "learning_rate": 8.402434658073757e-06, "loss": 0.0003, "step": 3785 }, { "epoch": 0.24399046207385447, "grad_norm": 0.003052409221744512, "learning_rate": 8.401718582169711e-06, "loss": 0.0, "step": 3786 }, { "epoch": 0.24405490752078365, "grad_norm": 0.010366611251185617, "learning_rate": 8.401002506265665e-06, "loss": 0.0, "step": 3787 }, { "epoch": 0.24411935296771284, "grad_norm": 0.012989847995797284, "learning_rate": 8.40028643036162e-06, "loss": 0.0001, "step": 3788 }, { "epoch": 0.244183798414642, "grad_norm": 1.509528493086036, "learning_rate": 8.399570354457574e-06, "loss": 0.0121, "step": 3789 }, { "epoch": 0.24424824386157118, "grad_norm": 0.13255768762843395, "learning_rate": 8.398854278553528e-06, "loss": 0.0015, "step": 3790 }, { "epoch": 0.24431268930850036, "grad_norm": 0.07067210243967802, "learning_rate": 8.398138202649482e-06, "loss": 0.0003, "step": 3791 }, { "epoch": 0.24437713475542952, "grad_norm": 0.014960046766105765, "learning_rate": 8.397422126745437e-06, "loss": 0.0, "step": 3792 }, { "epoch": 0.2444415802023587, "grad_norm": 0.008710688093756856, "learning_rate": 8.39670605084139e-06, "loss": 0.0, "step": 3793 }, { "epoch": 0.2445060256492879, "grad_norm": 0.255980528473747, "learning_rate": 8.395989974937344e-06, "loss": 0.0017, "step": 3794 }, { "epoch": 0.24457047109621705, "grad_norm": 0.00026442442359102594, "learning_rate": 8.395273899033298e-06, "loss": 0.0, "step": 3795 }, { "epoch": 0.24463491654314623, "grad_norm": 0.3572974900567217, "learning_rate": 8.394557823129252e-06, "loss": 0.0004, "step": 3796 }, { "epoch": 0.24469936199007541, "grad_norm": 0.04774931035227209, "learning_rate": 8.393841747225207e-06, "loss": 0.0007, "step": 3797 }, { "epoch": 0.24476380743700457, "grad_norm": 0.10715923279977019, "learning_rate": 8.39312567132116e-06, "loss": 0.0005, "step": 3798 }, { "epoch": 0.24482825288393376, "grad_norm": 0.0012820152532673366, "learning_rate": 8.392409595417115e-06, "loss": 0.0, "step": 3799 }, { "epoch": 0.2448926983308629, "grad_norm": 0.3422857221960594, "learning_rate": 8.39169351951307e-06, "loss": 0.0016, "step": 3800 }, { "epoch": 0.2449571437777921, "grad_norm": 0.004484513715100537, "learning_rate": 8.390977443609024e-06, "loss": 0.0, "step": 3801 }, { "epoch": 0.24502158922472128, "grad_norm": 1.0571975791421366, "learning_rate": 8.390261367704976e-06, "loss": 0.0039, "step": 3802 }, { "epoch": 0.24508603467165044, "grad_norm": 0.0014573274750767575, "learning_rate": 8.38954529180093e-06, "loss": 0.0, "step": 3803 }, { "epoch": 0.24515048011857962, "grad_norm": 0.018570197831693735, "learning_rate": 8.388829215896885e-06, "loss": 0.0, "step": 3804 }, { "epoch": 0.2452149255655088, "grad_norm": 0.0013457453855705087, "learning_rate": 8.38811313999284e-06, "loss": 0.0, "step": 3805 }, { "epoch": 0.24527937101243796, "grad_norm": 0.0009432171359150775, "learning_rate": 8.387397064088793e-06, "loss": 0.0, "step": 3806 }, { "epoch": 0.24534381645936715, "grad_norm": 0.04670797332971043, "learning_rate": 8.386680988184748e-06, "loss": 0.0003, "step": 3807 }, { "epoch": 0.24540826190629633, "grad_norm": 0.0033869906812700606, "learning_rate": 8.385964912280704e-06, "loss": 0.0, "step": 3808 }, { "epoch": 0.2454727073532255, "grad_norm": 0.001629549809419056, "learning_rate": 8.385248836376656e-06, "loss": 0.0, "step": 3809 }, { "epoch": 0.24553715280015467, "grad_norm": 0.0008565285923683144, "learning_rate": 8.38453276047261e-06, "loss": 0.0, "step": 3810 }, { "epoch": 0.24560159824708383, "grad_norm": 0.07435944046824011, "learning_rate": 8.383816684568565e-06, "loss": 0.0002, "step": 3811 }, { "epoch": 0.24566604369401301, "grad_norm": 1.455835009863066, "learning_rate": 8.38310060866452e-06, "loss": 0.0065, "step": 3812 }, { "epoch": 0.2457304891409422, "grad_norm": 0.02158769033318039, "learning_rate": 8.382384532760474e-06, "loss": 0.0, "step": 3813 }, { "epoch": 0.24579493458787136, "grad_norm": 0.009037092296314405, "learning_rate": 8.381668456856428e-06, "loss": 0.0, "step": 3814 }, { "epoch": 0.24585938003480054, "grad_norm": 0.01045907662154638, "learning_rate": 8.380952380952382e-06, "loss": 0.0, "step": 3815 }, { "epoch": 0.24592382548172972, "grad_norm": 0.2750826120917977, "learning_rate": 8.380236305048336e-06, "loss": 0.001, "step": 3816 }, { "epoch": 0.24598827092865888, "grad_norm": 0.00485543121189977, "learning_rate": 8.37952022914429e-06, "loss": 0.0, "step": 3817 }, { "epoch": 0.24605271637558807, "grad_norm": 0.0069404249703027745, "learning_rate": 8.378804153240243e-06, "loss": 0.0, "step": 3818 }, { "epoch": 0.24611716182251725, "grad_norm": 0.0008645888421194356, "learning_rate": 8.378088077336198e-06, "loss": 0.0, "step": 3819 }, { "epoch": 0.2461816072694464, "grad_norm": 0.0006523125352911542, "learning_rate": 8.377372001432152e-06, "loss": 0.0, "step": 3820 }, { "epoch": 0.2462460527163756, "grad_norm": 0.022340756216589642, "learning_rate": 8.376655925528106e-06, "loss": 0.0003, "step": 3821 }, { "epoch": 0.24631049816330478, "grad_norm": 0.02572042481418643, "learning_rate": 8.37593984962406e-06, "loss": 0.0001, "step": 3822 }, { "epoch": 0.24637494361023393, "grad_norm": 0.05793174450618309, "learning_rate": 8.375223773720015e-06, "loss": 0.0001, "step": 3823 }, { "epoch": 0.24643938905716312, "grad_norm": 0.0013580941737972847, "learning_rate": 8.37450769781597e-06, "loss": 0.0, "step": 3824 }, { "epoch": 0.24650383450409227, "grad_norm": 0.023467460744772406, "learning_rate": 8.373791621911923e-06, "loss": 0.0001, "step": 3825 }, { "epoch": 0.24656827995102146, "grad_norm": 0.002280545682215033, "learning_rate": 8.373075546007878e-06, "loss": 0.0, "step": 3826 }, { "epoch": 0.24663272539795064, "grad_norm": 0.20302869274283905, "learning_rate": 8.372359470103832e-06, "loss": 0.0019, "step": 3827 }, { "epoch": 0.2466971708448798, "grad_norm": 2.142446703431541, "learning_rate": 8.371643394199785e-06, "loss": 0.0132, "step": 3828 }, { "epoch": 0.24676161629180898, "grad_norm": 0.11213382494207665, "learning_rate": 8.370927318295739e-06, "loss": 0.0018, "step": 3829 }, { "epoch": 0.24682606173873817, "grad_norm": 0.5988444067924492, "learning_rate": 8.370211242391693e-06, "loss": 0.0016, "step": 3830 }, { "epoch": 0.24689050718566732, "grad_norm": 0.002282277636076149, "learning_rate": 8.36949516648765e-06, "loss": 0.0, "step": 3831 }, { "epoch": 0.2469549526325965, "grad_norm": 0.017599929260292843, "learning_rate": 8.368779090583604e-06, "loss": 0.0002, "step": 3832 }, { "epoch": 0.2470193980795257, "grad_norm": 8.669601616320829e-05, "learning_rate": 8.368063014679558e-06, "loss": 0.0, "step": 3833 }, { "epoch": 0.24708384352645485, "grad_norm": 1.5981672071508441, "learning_rate": 8.36734693877551e-06, "loss": 0.0037, "step": 3834 }, { "epoch": 0.24714828897338403, "grad_norm": 0.008489611550036908, "learning_rate": 8.366630862871465e-06, "loss": 0.0001, "step": 3835 }, { "epoch": 0.24721273442031322, "grad_norm": 0.003644974177941349, "learning_rate": 8.365914786967419e-06, "loss": 0.0, "step": 3836 }, { "epoch": 0.24727717986724237, "grad_norm": 0.003976308666016474, "learning_rate": 8.365198711063373e-06, "loss": 0.0, "step": 3837 }, { "epoch": 0.24734162531417156, "grad_norm": 0.27547633413259537, "learning_rate": 8.364482635159328e-06, "loss": 0.0092, "step": 3838 }, { "epoch": 0.24740607076110072, "grad_norm": 0.0015722282850001267, "learning_rate": 8.363766559255282e-06, "loss": 0.0, "step": 3839 }, { "epoch": 0.2474705162080299, "grad_norm": 0.005728081698721112, "learning_rate": 8.363050483351236e-06, "loss": 0.0, "step": 3840 }, { "epoch": 0.24753496165495908, "grad_norm": 0.019088249596522798, "learning_rate": 8.36233440744719e-06, "loss": 0.0002, "step": 3841 }, { "epoch": 0.24759940710188824, "grad_norm": 0.13053990981847335, "learning_rate": 8.361618331543145e-06, "loss": 0.0004, "step": 3842 }, { "epoch": 0.24766385254881743, "grad_norm": 0.022335887407941846, "learning_rate": 8.3609022556391e-06, "loss": 0.0002, "step": 3843 }, { "epoch": 0.2477282979957466, "grad_norm": 0.029563943186087686, "learning_rate": 8.360186179735052e-06, "loss": 0.0001, "step": 3844 }, { "epoch": 0.24779274344267577, "grad_norm": 0.009087314706417401, "learning_rate": 8.359470103831006e-06, "loss": 0.0, "step": 3845 }, { "epoch": 0.24785718888960495, "grad_norm": 0.00653539308431923, "learning_rate": 8.35875402792696e-06, "loss": 0.0, "step": 3846 }, { "epoch": 0.24792163433653414, "grad_norm": 0.4713887646308475, "learning_rate": 8.358037952022915e-06, "loss": 0.0011, "step": 3847 }, { "epoch": 0.2479860797834633, "grad_norm": 0.024816317722805743, "learning_rate": 8.357321876118869e-06, "loss": 0.0001, "step": 3848 }, { "epoch": 0.24805052523039248, "grad_norm": 0.038245842028925114, "learning_rate": 8.356605800214823e-06, "loss": 0.0005, "step": 3849 }, { "epoch": 0.24811497067732163, "grad_norm": 0.1631073794859977, "learning_rate": 8.355889724310778e-06, "loss": 0.0007, "step": 3850 }, { "epoch": 0.24817941612425082, "grad_norm": 0.12254443599798615, "learning_rate": 8.355173648406732e-06, "loss": 0.0006, "step": 3851 }, { "epoch": 0.24824386157118, "grad_norm": 0.000737590175483497, "learning_rate": 8.354457572502686e-06, "loss": 0.0, "step": 3852 }, { "epoch": 0.24830830701810916, "grad_norm": 0.0006945422121030539, "learning_rate": 8.35374149659864e-06, "loss": 0.0, "step": 3853 }, { "epoch": 0.24837275246503834, "grad_norm": 0.019502762791774902, "learning_rate": 8.353025420694593e-06, "loss": 0.0003, "step": 3854 }, { "epoch": 0.24843719791196753, "grad_norm": 0.08053178332273953, "learning_rate": 8.352309344790549e-06, "loss": 0.0009, "step": 3855 }, { "epoch": 0.24850164335889668, "grad_norm": 0.14025133875090617, "learning_rate": 8.351593268886503e-06, "loss": 0.0012, "step": 3856 }, { "epoch": 0.24856608880582587, "grad_norm": 0.006439356158143003, "learning_rate": 8.350877192982458e-06, "loss": 0.0, "step": 3857 }, { "epoch": 0.24863053425275505, "grad_norm": 0.00029574026453269123, "learning_rate": 8.350161117078412e-06, "loss": 0.0, "step": 3858 }, { "epoch": 0.2486949796996842, "grad_norm": 0.004987156108916005, "learning_rate": 8.349445041174366e-06, "loss": 0.0, "step": 3859 }, { "epoch": 0.2487594251466134, "grad_norm": 0.015322867790270124, "learning_rate": 8.348728965270319e-06, "loss": 0.0, "step": 3860 }, { "epoch": 0.24882387059354258, "grad_norm": 0.0010423756801406121, "learning_rate": 8.348012889366273e-06, "loss": 0.0, "step": 3861 }, { "epoch": 0.24888831604047174, "grad_norm": 0.38380098203053353, "learning_rate": 8.347296813462227e-06, "loss": 0.0018, "step": 3862 }, { "epoch": 0.24895276148740092, "grad_norm": 0.0022487493770144092, "learning_rate": 8.346580737558182e-06, "loss": 0.0, "step": 3863 }, { "epoch": 0.24901720693433008, "grad_norm": 0.021213837093024088, "learning_rate": 8.345864661654136e-06, "loss": 0.0001, "step": 3864 }, { "epoch": 0.24908165238125926, "grad_norm": 0.0027843664534250444, "learning_rate": 8.34514858575009e-06, "loss": 0.0, "step": 3865 }, { "epoch": 0.24914609782818845, "grad_norm": 0.13952617361567596, "learning_rate": 8.344432509846045e-06, "loss": 0.0002, "step": 3866 }, { "epoch": 0.2492105432751176, "grad_norm": 0.0010491756332098327, "learning_rate": 8.343716433941999e-06, "loss": 0.0, "step": 3867 }, { "epoch": 0.2492749887220468, "grad_norm": 0.05011367783332657, "learning_rate": 8.343000358037953e-06, "loss": 0.0002, "step": 3868 }, { "epoch": 0.24933943416897597, "grad_norm": 0.4646252671662538, "learning_rate": 8.342284282133908e-06, "loss": 0.0106, "step": 3869 }, { "epoch": 0.24940387961590513, "grad_norm": 0.00023720217135674875, "learning_rate": 8.34156820622986e-06, "loss": 0.0, "step": 3870 }, { "epoch": 0.2494683250628343, "grad_norm": 0.03269248525753471, "learning_rate": 8.340852130325814e-06, "loss": 0.0001, "step": 3871 }, { "epoch": 0.2495327705097635, "grad_norm": 0.0042010391733460955, "learning_rate": 8.340136054421769e-06, "loss": 0.0, "step": 3872 }, { "epoch": 0.24959721595669265, "grad_norm": 0.0717919685100648, "learning_rate": 8.339419978517723e-06, "loss": 0.0002, "step": 3873 }, { "epoch": 0.24966166140362184, "grad_norm": 0.00015835011294514, "learning_rate": 8.338703902613677e-06, "loss": 0.0, "step": 3874 }, { "epoch": 0.24972610685055102, "grad_norm": 0.013157551152446972, "learning_rate": 8.337987826709632e-06, "loss": 0.0, "step": 3875 }, { "epoch": 0.24979055229748018, "grad_norm": 0.0015622204880051153, "learning_rate": 8.337271750805586e-06, "loss": 0.0, "step": 3876 }, { "epoch": 0.24985499774440936, "grad_norm": 0.03520397086657459, "learning_rate": 8.33655567490154e-06, "loss": 0.0004, "step": 3877 }, { "epoch": 0.24991944319133852, "grad_norm": 0.02710640237915254, "learning_rate": 8.335839598997495e-06, "loss": 0.0001, "step": 3878 }, { "epoch": 0.2499838886382677, "grad_norm": 0.0010845842766640747, "learning_rate": 8.335123523093449e-06, "loss": 0.0, "step": 3879 }, { "epoch": 0.2500483340851969, "grad_norm": 0.18885238021353518, "learning_rate": 8.334407447189403e-06, "loss": 0.0005, "step": 3880 }, { "epoch": 0.2501127795321261, "grad_norm": 0.14326937212325316, "learning_rate": 8.333691371285357e-06, "loss": 0.0007, "step": 3881 }, { "epoch": 0.25017722497905526, "grad_norm": 0.17603865426681994, "learning_rate": 8.332975295381312e-06, "loss": 0.0005, "step": 3882 }, { "epoch": 0.2502416704259844, "grad_norm": 0.08609897712323253, "learning_rate": 8.332259219477266e-06, "loss": 0.0003, "step": 3883 }, { "epoch": 0.25030611587291357, "grad_norm": 0.007551340900007415, "learning_rate": 8.33154314357322e-06, "loss": 0.0, "step": 3884 }, { "epoch": 0.25037056131984275, "grad_norm": 0.057125706868368396, "learning_rate": 8.330827067669175e-06, "loss": 0.0001, "step": 3885 }, { "epoch": 0.25043500676677194, "grad_norm": 0.0034849557503283534, "learning_rate": 8.330110991765127e-06, "loss": 0.0, "step": 3886 }, { "epoch": 0.2504994522137011, "grad_norm": 0.0025515581732295125, "learning_rate": 8.329394915861082e-06, "loss": 0.0, "step": 3887 }, { "epoch": 0.25056389766063025, "grad_norm": 0.0006230279157326317, "learning_rate": 8.328678839957036e-06, "loss": 0.0, "step": 3888 }, { "epoch": 0.25062834310755944, "grad_norm": 0.01758640478425065, "learning_rate": 8.32796276405299e-06, "loss": 0.0002, "step": 3889 }, { "epoch": 0.2506927885544886, "grad_norm": 0.46716078933857036, "learning_rate": 8.327246688148944e-06, "loss": 0.0034, "step": 3890 }, { "epoch": 0.2507572340014178, "grad_norm": 0.067406871204759, "learning_rate": 8.326530612244899e-06, "loss": 0.0001, "step": 3891 }, { "epoch": 0.250821679448347, "grad_norm": 0.0006241835739047378, "learning_rate": 8.325814536340853e-06, "loss": 0.0, "step": 3892 }, { "epoch": 0.2508861248952762, "grad_norm": 0.31971812344868983, "learning_rate": 8.325098460436807e-06, "loss": 0.0009, "step": 3893 }, { "epoch": 0.2509505703422053, "grad_norm": 0.028947342519578857, "learning_rate": 8.324382384532762e-06, "loss": 0.0002, "step": 3894 }, { "epoch": 0.2510150157891345, "grad_norm": 0.004368482565717253, "learning_rate": 8.323666308628714e-06, "loss": 0.0, "step": 3895 }, { "epoch": 0.2510794612360637, "grad_norm": 0.010188551518575773, "learning_rate": 8.322950232724669e-06, "loss": 0.0, "step": 3896 }, { "epoch": 0.25114390668299286, "grad_norm": 0.16556811322935083, "learning_rate": 8.322234156820623e-06, "loss": 0.0008, "step": 3897 }, { "epoch": 0.25120835212992204, "grad_norm": 0.04164728894373619, "learning_rate": 8.321518080916577e-06, "loss": 0.0001, "step": 3898 }, { "epoch": 0.25127279757685117, "grad_norm": 0.002195380129568669, "learning_rate": 8.320802005012531e-06, "loss": 0.0, "step": 3899 }, { "epoch": 0.25133724302378035, "grad_norm": 0.001985210248564425, "learning_rate": 8.320085929108486e-06, "loss": 0.0, "step": 3900 }, { "epoch": 0.25140168847070954, "grad_norm": 0.001143081809185654, "learning_rate": 8.319369853204442e-06, "loss": 0.0, "step": 3901 }, { "epoch": 0.2514661339176387, "grad_norm": 0.001505042883211427, "learning_rate": 8.318653777300394e-06, "loss": 0.0, "step": 3902 }, { "epoch": 0.2515305793645679, "grad_norm": 0.07406394901749401, "learning_rate": 8.317937701396349e-06, "loss": 0.0006, "step": 3903 }, { "epoch": 0.2515950248114971, "grad_norm": 0.0056360193079743005, "learning_rate": 8.317221625492303e-06, "loss": 0.0, "step": 3904 }, { "epoch": 0.2516594702584262, "grad_norm": 0.061877072578334864, "learning_rate": 8.316505549588257e-06, "loss": 0.0002, "step": 3905 }, { "epoch": 0.2517239157053554, "grad_norm": 0.1699128915889434, "learning_rate": 8.315789473684212e-06, "loss": 0.0005, "step": 3906 }, { "epoch": 0.2517883611522846, "grad_norm": 0.00045409563031198523, "learning_rate": 8.315073397780166e-06, "loss": 0.0, "step": 3907 }, { "epoch": 0.2518528065992138, "grad_norm": 0.025290433653521596, "learning_rate": 8.31435732187612e-06, "loss": 0.0, "step": 3908 }, { "epoch": 0.25191725204614296, "grad_norm": 0.0014953556261391822, "learning_rate": 8.313641245972074e-06, "loss": 0.0, "step": 3909 }, { "epoch": 0.2519816974930721, "grad_norm": 0.8381777895944582, "learning_rate": 8.312925170068029e-06, "loss": 0.0056, "step": 3910 }, { "epoch": 0.25204614294000127, "grad_norm": 0.018405931304765668, "learning_rate": 8.312209094163981e-06, "loss": 0.0, "step": 3911 }, { "epoch": 0.25211058838693046, "grad_norm": 0.11354576666309285, "learning_rate": 8.311493018259936e-06, "loss": 0.0002, "step": 3912 }, { "epoch": 0.25217503383385964, "grad_norm": 0.053129490844862515, "learning_rate": 8.31077694235589e-06, "loss": 0.0001, "step": 3913 }, { "epoch": 0.2522394792807888, "grad_norm": 0.0058663139011346475, "learning_rate": 8.310060866451844e-06, "loss": 0.0, "step": 3914 }, { "epoch": 0.252303924727718, "grad_norm": 0.08415443369084287, "learning_rate": 8.309344790547799e-06, "loss": 0.0001, "step": 3915 }, { "epoch": 0.25236837017464714, "grad_norm": 0.47324517501410757, "learning_rate": 8.308628714643753e-06, "loss": 0.0034, "step": 3916 }, { "epoch": 0.2524328156215763, "grad_norm": 0.00036435772833055455, "learning_rate": 8.307912638739707e-06, "loss": 0.0, "step": 3917 }, { "epoch": 0.2524972610685055, "grad_norm": 0.0016095858469143312, "learning_rate": 8.307196562835661e-06, "loss": 0.0, "step": 3918 }, { "epoch": 0.2525617065154347, "grad_norm": 0.680525427646642, "learning_rate": 8.306480486931616e-06, "loss": 0.0049, "step": 3919 }, { "epoch": 0.2526261519623639, "grad_norm": 0.0160279352790546, "learning_rate": 8.30576441102757e-06, "loss": 0.0, "step": 3920 }, { "epoch": 0.25269059740929306, "grad_norm": 0.04248723436608822, "learning_rate": 8.305048335123523e-06, "loss": 0.0001, "step": 3921 }, { "epoch": 0.2527550428562222, "grad_norm": 0.009271066669047413, "learning_rate": 8.304332259219477e-06, "loss": 0.0, "step": 3922 }, { "epoch": 0.2528194883031514, "grad_norm": 0.0045926226252857045, "learning_rate": 8.303616183315431e-06, "loss": 0.0, "step": 3923 }, { "epoch": 0.25288393375008056, "grad_norm": 0.0683818303419785, "learning_rate": 8.302900107411385e-06, "loss": 0.0001, "step": 3924 }, { "epoch": 0.25294837919700974, "grad_norm": 0.4241263926590266, "learning_rate": 8.302184031507341e-06, "loss": 0.0016, "step": 3925 }, { "epoch": 0.2530128246439389, "grad_norm": 0.002698765370629597, "learning_rate": 8.301467955603296e-06, "loss": 0.0, "step": 3926 }, { "epoch": 0.25307727009086806, "grad_norm": 0.021524961969480675, "learning_rate": 8.300751879699248e-06, "loss": 0.0, "step": 3927 }, { "epoch": 0.25314171553779724, "grad_norm": 0.0322740450649284, "learning_rate": 8.300035803795203e-06, "loss": 0.0, "step": 3928 }, { "epoch": 0.2532061609847264, "grad_norm": 0.018648881771948716, "learning_rate": 8.299319727891157e-06, "loss": 0.0, "step": 3929 }, { "epoch": 0.2532706064316556, "grad_norm": 0.11561774475944939, "learning_rate": 8.298603651987111e-06, "loss": 0.0001, "step": 3930 }, { "epoch": 0.2533350518785848, "grad_norm": 0.0038557643494287752, "learning_rate": 8.297887576083066e-06, "loss": 0.0, "step": 3931 }, { "epoch": 0.253399497325514, "grad_norm": 0.010070178706289873, "learning_rate": 8.29717150017902e-06, "loss": 0.0, "step": 3932 }, { "epoch": 0.2534639427724431, "grad_norm": 1.0039694205866754, "learning_rate": 8.296455424274974e-06, "loss": 0.0032, "step": 3933 }, { "epoch": 0.2535283882193723, "grad_norm": 0.06947501096791438, "learning_rate": 8.295739348370928e-06, "loss": 0.0, "step": 3934 }, { "epoch": 0.2535928336663015, "grad_norm": 0.0042569530407342215, "learning_rate": 8.295023272466883e-06, "loss": 0.0, "step": 3935 }, { "epoch": 0.25365727911323066, "grad_norm": 0.0014290376859672905, "learning_rate": 8.294307196562837e-06, "loss": 0.0, "step": 3936 }, { "epoch": 0.25372172456015984, "grad_norm": 0.03599591192270969, "learning_rate": 8.29359112065879e-06, "loss": 0.0001, "step": 3937 }, { "epoch": 0.253786170007089, "grad_norm": 0.03868331751936182, "learning_rate": 8.292875044754744e-06, "loss": 0.0, "step": 3938 }, { "epoch": 0.25385061545401816, "grad_norm": 0.019385192551703966, "learning_rate": 8.292158968850698e-06, "loss": 0.0, "step": 3939 }, { "epoch": 0.25391506090094734, "grad_norm": 0.002924765983032503, "learning_rate": 8.291442892946653e-06, "loss": 0.0, "step": 3940 }, { "epoch": 0.2539795063478765, "grad_norm": 0.0067314597492258285, "learning_rate": 8.290726817042607e-06, "loss": 0.0, "step": 3941 }, { "epoch": 0.2540439517948057, "grad_norm": 0.004637551407635732, "learning_rate": 8.290010741138561e-06, "loss": 0.0, "step": 3942 }, { "epoch": 0.2541083972417349, "grad_norm": 0.0018488212029575274, "learning_rate": 8.289294665234515e-06, "loss": 0.0, "step": 3943 }, { "epoch": 0.254172842688664, "grad_norm": 0.060456514226417284, "learning_rate": 8.28857858933047e-06, "loss": 0.0002, "step": 3944 }, { "epoch": 0.2542372881355932, "grad_norm": 0.020665116807158542, "learning_rate": 8.287862513426424e-06, "loss": 0.0001, "step": 3945 }, { "epoch": 0.2543017335825224, "grad_norm": 0.1538017089756088, "learning_rate": 8.287146437522378e-06, "loss": 0.0014, "step": 3946 }, { "epoch": 0.2543661790294516, "grad_norm": 4.183620420739566, "learning_rate": 8.286430361618331e-06, "loss": 0.0425, "step": 3947 }, { "epoch": 0.25443062447638076, "grad_norm": 0.2073996343380915, "learning_rate": 8.285714285714287e-06, "loss": 0.0004, "step": 3948 }, { "epoch": 0.2544950699233099, "grad_norm": 0.020734861291480815, "learning_rate": 8.284998209810241e-06, "loss": 0.0016, "step": 3949 }, { "epoch": 0.2545595153702391, "grad_norm": 0.0010731652208121033, "learning_rate": 8.284282133906196e-06, "loss": 0.0, "step": 3950 }, { "epoch": 0.25462396081716826, "grad_norm": 0.0006424096037444866, "learning_rate": 8.28356605800215e-06, "loss": 0.0, "step": 3951 }, { "epoch": 0.25468840626409744, "grad_norm": 0.003079481410634368, "learning_rate": 8.282849982098104e-06, "loss": 0.0, "step": 3952 }, { "epoch": 0.25475285171102663, "grad_norm": 0.23498132479301437, "learning_rate": 8.282133906194057e-06, "loss": 0.0006, "step": 3953 }, { "epoch": 0.2548172971579558, "grad_norm": 0.013251046524471814, "learning_rate": 8.281417830290011e-06, "loss": 0.0, "step": 3954 }, { "epoch": 0.25488174260488494, "grad_norm": 0.005255771405272541, "learning_rate": 8.280701754385965e-06, "loss": 0.0, "step": 3955 }, { "epoch": 0.2549461880518141, "grad_norm": 0.005244493955979896, "learning_rate": 8.27998567848192e-06, "loss": 0.0, "step": 3956 }, { "epoch": 0.2550106334987433, "grad_norm": 0.0006158653952264748, "learning_rate": 8.279269602577874e-06, "loss": 0.0, "step": 3957 }, { "epoch": 0.2550750789456725, "grad_norm": 0.00035900625196237423, "learning_rate": 8.278553526673828e-06, "loss": 0.0, "step": 3958 }, { "epoch": 0.2551395243926017, "grad_norm": 0.008736003219196615, "learning_rate": 8.277837450769783e-06, "loss": 0.0001, "step": 3959 }, { "epoch": 0.25520396983953086, "grad_norm": 0.0017198369442972868, "learning_rate": 8.277121374865737e-06, "loss": 0.0, "step": 3960 }, { "epoch": 0.25526841528646, "grad_norm": 0.000357485762814059, "learning_rate": 8.276405298961691e-06, "loss": 0.0, "step": 3961 }, { "epoch": 0.2553328607333892, "grad_norm": 0.004416114733046556, "learning_rate": 8.275689223057645e-06, "loss": 0.0, "step": 3962 }, { "epoch": 0.25539730618031836, "grad_norm": 0.05105998267534113, "learning_rate": 8.274973147153598e-06, "loss": 0.0001, "step": 3963 }, { "epoch": 0.25546175162724755, "grad_norm": 2.68921207630397, "learning_rate": 8.274257071249552e-06, "loss": 0.0131, "step": 3964 }, { "epoch": 0.25552619707417673, "grad_norm": 0.0021671550621980688, "learning_rate": 8.273540995345507e-06, "loss": 0.0, "step": 3965 }, { "epoch": 0.25559064252110586, "grad_norm": 0.047147107595446595, "learning_rate": 8.272824919441461e-06, "loss": 0.0004, "step": 3966 }, { "epoch": 0.25565508796803504, "grad_norm": 0.0007389136439259691, "learning_rate": 8.272108843537415e-06, "loss": 0.0, "step": 3967 }, { "epoch": 0.25571953341496423, "grad_norm": 0.0011897303535312082, "learning_rate": 8.27139276763337e-06, "loss": 0.0, "step": 3968 }, { "epoch": 0.2557839788618934, "grad_norm": 0.0008199297088428887, "learning_rate": 8.270676691729324e-06, "loss": 0.0, "step": 3969 }, { "epoch": 0.2558484243088226, "grad_norm": 0.526262460456898, "learning_rate": 8.269960615825278e-06, "loss": 0.0039, "step": 3970 }, { "epoch": 0.2559128697557518, "grad_norm": 0.005483051269792115, "learning_rate": 8.269244539921232e-06, "loss": 0.0, "step": 3971 }, { "epoch": 0.2559773152026809, "grad_norm": 0.020236496196260945, "learning_rate": 8.268528464017187e-06, "loss": 0.0, "step": 3972 }, { "epoch": 0.2560417606496101, "grad_norm": 0.22438319054480052, "learning_rate": 8.267812388113141e-06, "loss": 0.0021, "step": 3973 }, { "epoch": 0.2561062060965393, "grad_norm": 0.1771382826278181, "learning_rate": 8.267096312209095e-06, "loss": 0.0002, "step": 3974 }, { "epoch": 0.25617065154346846, "grad_norm": 0.047601454923286886, "learning_rate": 8.26638023630505e-06, "loss": 0.0001, "step": 3975 }, { "epoch": 0.25623509699039765, "grad_norm": 0.06528823065675737, "learning_rate": 8.265664160401004e-06, "loss": 0.0001, "step": 3976 }, { "epoch": 0.2562995424373268, "grad_norm": 0.021962709957251546, "learning_rate": 8.264948084496958e-06, "loss": 0.0, "step": 3977 }, { "epoch": 0.25636398788425596, "grad_norm": 0.011551023631307096, "learning_rate": 8.264232008592913e-06, "loss": 0.0, "step": 3978 }, { "epoch": 0.25642843333118515, "grad_norm": 0.0026298561406031358, "learning_rate": 8.263515932688865e-06, "loss": 0.0, "step": 3979 }, { "epoch": 0.25649287877811433, "grad_norm": 0.002949573749046438, "learning_rate": 8.26279985678482e-06, "loss": 0.0, "step": 3980 }, { "epoch": 0.2565573242250435, "grad_norm": 0.0030800614041981695, "learning_rate": 8.262083780880774e-06, "loss": 0.0, "step": 3981 }, { "epoch": 0.2566217696719727, "grad_norm": 0.02935547035373371, "learning_rate": 8.261367704976728e-06, "loss": 0.0003, "step": 3982 }, { "epoch": 0.25668621511890183, "grad_norm": 0.018741440340363177, "learning_rate": 8.260651629072682e-06, "loss": 0.0001, "step": 3983 }, { "epoch": 0.256750660565831, "grad_norm": 0.002872186655947733, "learning_rate": 8.259935553168637e-06, "loss": 0.0, "step": 3984 }, { "epoch": 0.2568151060127602, "grad_norm": 0.054660745871351675, "learning_rate": 8.259219477264591e-06, "loss": 0.0002, "step": 3985 }, { "epoch": 0.2568795514596894, "grad_norm": 0.0025098992277484064, "learning_rate": 8.258503401360545e-06, "loss": 0.0, "step": 3986 }, { "epoch": 0.25694399690661857, "grad_norm": 0.11741832015214243, "learning_rate": 8.2577873254565e-06, "loss": 0.0006, "step": 3987 }, { "epoch": 0.2570084423535477, "grad_norm": 0.008282736051549501, "learning_rate": 8.257071249552452e-06, "loss": 0.0, "step": 3988 }, { "epoch": 0.2570728878004769, "grad_norm": 0.05705696246490212, "learning_rate": 8.256355173648406e-06, "loss": 0.0001, "step": 3989 }, { "epoch": 0.25713733324740606, "grad_norm": 0.0049394262891318905, "learning_rate": 8.25563909774436e-06, "loss": 0.0, "step": 3990 }, { "epoch": 0.25720177869433525, "grad_norm": 0.04868299851585092, "learning_rate": 8.254923021840315e-06, "loss": 0.0001, "step": 3991 }, { "epoch": 0.25726622414126443, "grad_norm": 0.0073026446153531, "learning_rate": 8.25420694593627e-06, "loss": 0.0, "step": 3992 }, { "epoch": 0.2573306695881936, "grad_norm": 0.006003148253976413, "learning_rate": 8.253490870032224e-06, "loss": 0.0, "step": 3993 }, { "epoch": 0.25739511503512275, "grad_norm": 0.0030958094222771513, "learning_rate": 8.252774794128178e-06, "loss": 0.0, "step": 3994 }, { "epoch": 0.25745956048205193, "grad_norm": 0.005458472731249199, "learning_rate": 8.252058718224132e-06, "loss": 0.0, "step": 3995 }, { "epoch": 0.2575240059289811, "grad_norm": 0.00035485698227203874, "learning_rate": 8.251342642320087e-06, "loss": 0.0, "step": 3996 }, { "epoch": 0.2575884513759103, "grad_norm": 0.00017725530470848552, "learning_rate": 8.25062656641604e-06, "loss": 0.0, "step": 3997 }, { "epoch": 0.2576528968228395, "grad_norm": 0.00317466520196752, "learning_rate": 8.249910490511995e-06, "loss": 0.0, "step": 3998 }, { "epoch": 0.25771734226976867, "grad_norm": 0.012355914561483782, "learning_rate": 8.24919441460795e-06, "loss": 0.0001, "step": 3999 }, { "epoch": 0.2577817877166978, "grad_norm": 0.3189945787922834, "learning_rate": 8.248478338703904e-06, "loss": 0.0054, "step": 4000 }, { "epoch": 0.257846233163627, "grad_norm": 0.0002133263309856516, "learning_rate": 8.247762262799858e-06, "loss": 0.0, "step": 4001 }, { "epoch": 0.25791067861055617, "grad_norm": 0.005477701385231475, "learning_rate": 8.247046186895812e-06, "loss": 0.0, "step": 4002 }, { "epoch": 0.25797512405748535, "grad_norm": 0.0050049522296231825, "learning_rate": 8.246330110991767e-06, "loss": 0.0, "step": 4003 }, { "epoch": 0.25803956950441453, "grad_norm": 0.12513390461850646, "learning_rate": 8.24561403508772e-06, "loss": 0.0008, "step": 4004 }, { "epoch": 0.25810401495134366, "grad_norm": 0.387589101391816, "learning_rate": 8.244897959183674e-06, "loss": 0.0008, "step": 4005 }, { "epoch": 0.25816846039827285, "grad_norm": 0.01585431965727312, "learning_rate": 8.244181883279628e-06, "loss": 0.0, "step": 4006 }, { "epoch": 0.25823290584520203, "grad_norm": 0.00618341276123621, "learning_rate": 8.243465807375582e-06, "loss": 0.0, "step": 4007 }, { "epoch": 0.2582973512921312, "grad_norm": 0.0026786699895131457, "learning_rate": 8.242749731471536e-06, "loss": 0.0, "step": 4008 }, { "epoch": 0.2583617967390604, "grad_norm": 0.0036054754536920027, "learning_rate": 8.24203365556749e-06, "loss": 0.0, "step": 4009 }, { "epoch": 0.2584262421859896, "grad_norm": 0.0007546240302236562, "learning_rate": 8.241317579663445e-06, "loss": 0.0, "step": 4010 }, { "epoch": 0.2584906876329187, "grad_norm": 0.012423956570293417, "learning_rate": 8.2406015037594e-06, "loss": 0.0001, "step": 4011 }, { "epoch": 0.2585551330798479, "grad_norm": 0.003081257517508017, "learning_rate": 8.239885427855354e-06, "loss": 0.0, "step": 4012 }, { "epoch": 0.2586195785267771, "grad_norm": 0.3443340411812879, "learning_rate": 8.239169351951308e-06, "loss": 0.0029, "step": 4013 }, { "epoch": 0.25868402397370627, "grad_norm": 0.025767518490395797, "learning_rate": 8.23845327604726e-06, "loss": 0.0002, "step": 4014 }, { "epoch": 0.25874846942063545, "grad_norm": 0.048692089363689406, "learning_rate": 8.237737200143215e-06, "loss": 0.0001, "step": 4015 }, { "epoch": 0.2588129148675646, "grad_norm": 0.9627859124434754, "learning_rate": 8.237021124239169e-06, "loss": 0.0037, "step": 4016 }, { "epoch": 0.25887736031449377, "grad_norm": 0.05837690042372548, "learning_rate": 8.236305048335123e-06, "loss": 0.0002, "step": 4017 }, { "epoch": 0.25894180576142295, "grad_norm": 0.004356229156364595, "learning_rate": 8.23558897243108e-06, "loss": 0.0, "step": 4018 }, { "epoch": 0.25900625120835213, "grad_norm": 0.03680381490022786, "learning_rate": 8.234872896527034e-06, "loss": 0.0, "step": 4019 }, { "epoch": 0.2590706966552813, "grad_norm": 0.05317580278091146, "learning_rate": 8.234156820622986e-06, "loss": 0.0017, "step": 4020 }, { "epoch": 0.2591351421022105, "grad_norm": 0.0026499589169380762, "learning_rate": 8.23344074471894e-06, "loss": 0.0, "step": 4021 }, { "epoch": 0.25919958754913963, "grad_norm": 0.06186301694838707, "learning_rate": 8.232724668814895e-06, "loss": 0.0007, "step": 4022 }, { "epoch": 0.2592640329960688, "grad_norm": 0.008444060526545723, "learning_rate": 8.23200859291085e-06, "loss": 0.0, "step": 4023 }, { "epoch": 0.259328478442998, "grad_norm": 0.0017911749821692064, "learning_rate": 8.231292517006804e-06, "loss": 0.0, "step": 4024 }, { "epoch": 0.2593929238899272, "grad_norm": 2.548784534768542, "learning_rate": 8.230576441102758e-06, "loss": 0.0384, "step": 4025 }, { "epoch": 0.25945736933685637, "grad_norm": 2.548784534768542, "learning_rate": 8.230576441102758e-06, "loss": 0.0226, "step": 4026 }, { "epoch": 0.2595218147837855, "grad_norm": 0.005779961270392248, "learning_rate": 8.229860365198712e-06, "loss": 0.0, "step": 4027 }, { "epoch": 0.2595862602307147, "grad_norm": 0.0009377977132422742, "learning_rate": 8.229144289294666e-06, "loss": 0.0, "step": 4028 }, { "epoch": 0.25965070567764387, "grad_norm": 0.03774370585557605, "learning_rate": 8.22842821339062e-06, "loss": 0.0, "step": 4029 }, { "epoch": 0.25971515112457305, "grad_norm": 0.005797201910791572, "learning_rate": 8.227712137486575e-06, "loss": 0.0, "step": 4030 }, { "epoch": 0.25977959657150224, "grad_norm": 0.06964904980649746, "learning_rate": 8.226996061582528e-06, "loss": 0.0015, "step": 4031 }, { "epoch": 0.2598440420184314, "grad_norm": 0.060964751645412936, "learning_rate": 8.226279985678482e-06, "loss": 0.0002, "step": 4032 }, { "epoch": 0.25990848746536055, "grad_norm": 0.060964751645412936, "learning_rate": 8.226279985678482e-06, "loss": 0.0187, "step": 4033 }, { "epoch": 0.25997293291228973, "grad_norm": 0.060525086543292334, "learning_rate": 8.225563909774436e-06, "loss": 0.0002, "step": 4034 }, { "epoch": 0.2600373783592189, "grad_norm": 0.19072903311402004, "learning_rate": 8.22484783387039e-06, "loss": 0.0002, "step": 4035 }, { "epoch": 0.2601018238061481, "grad_norm": 0.031776800989206204, "learning_rate": 8.224131757966345e-06, "loss": 0.0, "step": 4036 }, { "epoch": 0.2601662692530773, "grad_norm": 0.08727196607202162, "learning_rate": 8.223415682062299e-06, "loss": 0.0002, "step": 4037 }, { "epoch": 0.26023071470000647, "grad_norm": 0.08446679894310626, "learning_rate": 8.222699606158253e-06, "loss": 0.0002, "step": 4038 }, { "epoch": 0.2602951601469356, "grad_norm": 0.002980232543945297, "learning_rate": 8.221983530254208e-06, "loss": 0.0, "step": 4039 }, { "epoch": 0.2603596055938648, "grad_norm": 0.0011543615042102016, "learning_rate": 8.221267454350162e-06, "loss": 0.0, "step": 4040 }, { "epoch": 0.26042405104079397, "grad_norm": 0.41192820033215677, "learning_rate": 8.220551378446116e-06, "loss": 0.0012, "step": 4041 }, { "epoch": 0.26048849648772315, "grad_norm": 0.0010378573531816071, "learning_rate": 8.219835302542069e-06, "loss": 0.0, "step": 4042 }, { "epoch": 0.26055294193465234, "grad_norm": 0.0006749139181435023, "learning_rate": 8.219119226638023e-06, "loss": 0.0, "step": 4043 }, { "epoch": 0.26061738738158147, "grad_norm": 0.07496953795127487, "learning_rate": 8.21840315073398e-06, "loss": 0.0002, "step": 4044 }, { "epoch": 0.26068183282851065, "grad_norm": 0.004533902303890554, "learning_rate": 8.217687074829933e-06, "loss": 0.0, "step": 4045 }, { "epoch": 0.26074627827543984, "grad_norm": 0.0896993077211288, "learning_rate": 8.216970998925888e-06, "loss": 0.0001, "step": 4046 }, { "epoch": 0.260810723722369, "grad_norm": 0.0005955184982546869, "learning_rate": 8.216254923021842e-06, "loss": 0.0, "step": 4047 }, { "epoch": 0.2608751691692982, "grad_norm": 0.004027601831837156, "learning_rate": 8.215538847117795e-06, "loss": 0.0, "step": 4048 }, { "epoch": 0.2609396146162274, "grad_norm": 0.0069240323019456535, "learning_rate": 8.214822771213749e-06, "loss": 0.0001, "step": 4049 }, { "epoch": 0.2610040600631565, "grad_norm": 0.0008218871824397413, "learning_rate": 8.214106695309703e-06, "loss": 0.0, "step": 4050 }, { "epoch": 0.2610685055100857, "grad_norm": 0.0008479160595231805, "learning_rate": 8.213390619405658e-06, "loss": 0.0, "step": 4051 }, { "epoch": 0.2611329509570149, "grad_norm": 0.1893188846496353, "learning_rate": 8.212674543501612e-06, "loss": 0.0006, "step": 4052 }, { "epoch": 0.26119739640394407, "grad_norm": 0.015341010774691261, "learning_rate": 8.211958467597566e-06, "loss": 0.0001, "step": 4053 }, { "epoch": 0.26126184185087326, "grad_norm": 0.0028778119216130286, "learning_rate": 8.21124239169352e-06, "loss": 0.0, "step": 4054 }, { "epoch": 0.2613262872978024, "grad_norm": 0.07925056622743644, "learning_rate": 8.210526315789475e-06, "loss": 0.0009, "step": 4055 }, { "epoch": 0.26139073274473157, "grad_norm": 0.20684729806969399, "learning_rate": 8.209810239885429e-06, "loss": 0.0043, "step": 4056 }, { "epoch": 0.26145517819166075, "grad_norm": 0.0006323088838524224, "learning_rate": 8.209094163981383e-06, "loss": 0.0, "step": 4057 }, { "epoch": 0.26151962363858994, "grad_norm": 0.0011074759344691717, "learning_rate": 8.208378088077336e-06, "loss": 0.0, "step": 4058 }, { "epoch": 0.2615840690855191, "grad_norm": 0.00026941518339601263, "learning_rate": 8.20766201217329e-06, "loss": 0.0, "step": 4059 }, { "epoch": 0.2616485145324483, "grad_norm": 0.04375307634300663, "learning_rate": 8.206945936269245e-06, "loss": 0.0001, "step": 4060 }, { "epoch": 0.26171295997937744, "grad_norm": 0.05213391882349801, "learning_rate": 8.206229860365199e-06, "loss": 0.0001, "step": 4061 }, { "epoch": 0.2617774054263066, "grad_norm": 1.516026237311075, "learning_rate": 8.205513784461153e-06, "loss": 0.0053, "step": 4062 }, { "epoch": 0.2618418508732358, "grad_norm": 0.27357712314436866, "learning_rate": 8.204797708557107e-06, "loss": 0.0021, "step": 4063 }, { "epoch": 0.261906296320165, "grad_norm": 0.005267339834329356, "learning_rate": 8.204081632653062e-06, "loss": 0.0, "step": 4064 }, { "epoch": 0.2619707417670942, "grad_norm": 0.006003939047317733, "learning_rate": 8.203365556749016e-06, "loss": 0.0, "step": 4065 }, { "epoch": 0.2620351872140233, "grad_norm": 0.006873744433309013, "learning_rate": 8.20264948084497e-06, "loss": 0.0, "step": 4066 }, { "epoch": 0.2620996326609525, "grad_norm": 0.0010702078088623004, "learning_rate": 8.201933404940925e-06, "loss": 0.0, "step": 4067 }, { "epoch": 0.26216407810788167, "grad_norm": 0.00016045440845831096, "learning_rate": 8.201217329036879e-06, "loss": 0.0, "step": 4068 }, { "epoch": 0.26222852355481086, "grad_norm": 0.33688526859804885, "learning_rate": 8.200501253132833e-06, "loss": 0.0033, "step": 4069 }, { "epoch": 0.26229296900174004, "grad_norm": 0.005965846264202467, "learning_rate": 8.199785177228788e-06, "loss": 0.0, "step": 4070 }, { "epoch": 0.2623574144486692, "grad_norm": 1.2753300557497391, "learning_rate": 8.199069101324742e-06, "loss": 0.007, "step": 4071 }, { "epoch": 0.26242185989559835, "grad_norm": 0.42094141481840447, "learning_rate": 8.198353025420696e-06, "loss": 0.0034, "step": 4072 }, { "epoch": 0.26248630534252754, "grad_norm": 0.0007287004405519097, "learning_rate": 8.19763694951665e-06, "loss": 0.0, "step": 4073 }, { "epoch": 0.2625507507894567, "grad_norm": 0.00040297765390191563, "learning_rate": 8.196920873612603e-06, "loss": 0.0, "step": 4074 }, { "epoch": 0.2626151962363859, "grad_norm": 0.013456809470448914, "learning_rate": 8.196204797708557e-06, "loss": 0.0, "step": 4075 }, { "epoch": 0.2626796416833151, "grad_norm": 0.37379560179625704, "learning_rate": 8.195488721804512e-06, "loss": 0.0044, "step": 4076 }, { "epoch": 0.2627440871302443, "grad_norm": 0.19677700298270354, "learning_rate": 8.194772645900466e-06, "loss": 0.0001, "step": 4077 }, { "epoch": 0.2628085325771734, "grad_norm": 0.4003689829862226, "learning_rate": 8.19405656999642e-06, "loss": 0.0004, "step": 4078 }, { "epoch": 0.2628729780241026, "grad_norm": 0.004796831837730866, "learning_rate": 8.193340494092375e-06, "loss": 0.0001, "step": 4079 }, { "epoch": 0.2629374234710318, "grad_norm": 0.0019953814802931044, "learning_rate": 8.192624418188329e-06, "loss": 0.0, "step": 4080 }, { "epoch": 0.26300186891796096, "grad_norm": 0.08354990881299791, "learning_rate": 8.191908342284283e-06, "loss": 0.0017, "step": 4081 }, { "epoch": 0.26306631436489014, "grad_norm": 0.022514688404505278, "learning_rate": 8.191192266380237e-06, "loss": 0.0002, "step": 4082 }, { "epoch": 0.26313075981181927, "grad_norm": 0.029840040645768735, "learning_rate": 8.190476190476192e-06, "loss": 0.0001, "step": 4083 }, { "epoch": 0.26319520525874845, "grad_norm": 0.0012259527385874386, "learning_rate": 8.189760114572144e-06, "loss": 0.0, "step": 4084 }, { "epoch": 0.26325965070567764, "grad_norm": 0.0016036067659748598, "learning_rate": 8.189044038668099e-06, "loss": 0.0, "step": 4085 }, { "epoch": 0.2633240961526068, "grad_norm": 0.00031105862440330756, "learning_rate": 8.188327962764053e-06, "loss": 0.0, "step": 4086 }, { "epoch": 0.263388541599536, "grad_norm": 0.01397653280989205, "learning_rate": 8.187611886860007e-06, "loss": 0.0001, "step": 4087 }, { "epoch": 0.2634529870464652, "grad_norm": 0.001967928846776797, "learning_rate": 8.186895810955962e-06, "loss": 0.0, "step": 4088 }, { "epoch": 0.2635174324933943, "grad_norm": 0.00274836594875593, "learning_rate": 8.186179735051916e-06, "loss": 0.0, "step": 4089 }, { "epoch": 0.2635818779403235, "grad_norm": 0.0032691538261679083, "learning_rate": 8.18546365914787e-06, "loss": 0.0, "step": 4090 }, { "epoch": 0.2636463233872527, "grad_norm": 0.14504496675654013, "learning_rate": 8.184747583243824e-06, "loss": 0.0013, "step": 4091 }, { "epoch": 0.2637107688341819, "grad_norm": 0.5235929400677679, "learning_rate": 8.184031507339779e-06, "loss": 0.0018, "step": 4092 }, { "epoch": 0.26377521428111106, "grad_norm": 0.0067012557335894415, "learning_rate": 8.183315431435733e-06, "loss": 0.0, "step": 4093 }, { "epoch": 0.2638396597280402, "grad_norm": 0.6293055528436677, "learning_rate": 8.182599355531687e-06, "loss": 0.0015, "step": 4094 }, { "epoch": 0.2639041051749694, "grad_norm": 0.01867268633251904, "learning_rate": 8.181883279627642e-06, "loss": 0.0001, "step": 4095 }, { "epoch": 0.26396855062189856, "grad_norm": 0.24328595672003867, "learning_rate": 8.181167203723596e-06, "loss": 0.0001, "step": 4096 }, { "epoch": 0.26403299606882774, "grad_norm": 0.0032203777060472484, "learning_rate": 8.18045112781955e-06, "loss": 0.0, "step": 4097 }, { "epoch": 0.2640974415157569, "grad_norm": 0.06334367560512613, "learning_rate": 8.179735051915505e-06, "loss": 0.0001, "step": 4098 }, { "epoch": 0.2641618869626861, "grad_norm": 0.0012013743522975488, "learning_rate": 8.179018976011459e-06, "loss": 0.0, "step": 4099 }, { "epoch": 0.26422633240961524, "grad_norm": 0.0037318730977894373, "learning_rate": 8.178302900107411e-06, "loss": 0.0, "step": 4100 }, { "epoch": 0.2642907778565444, "grad_norm": 0.00883380236499844, "learning_rate": 8.177586824203366e-06, "loss": 0.0001, "step": 4101 }, { "epoch": 0.2643552233034736, "grad_norm": 0.0044728075765404236, "learning_rate": 8.17687074829932e-06, "loss": 0.0, "step": 4102 }, { "epoch": 0.2644196687504028, "grad_norm": 0.0070594660207348395, "learning_rate": 8.176154672395274e-06, "loss": 0.0001, "step": 4103 }, { "epoch": 0.264484114197332, "grad_norm": 0.10401460655221516, "learning_rate": 8.175438596491229e-06, "loss": 0.0001, "step": 4104 }, { "epoch": 0.2645485596442611, "grad_norm": 0.03844934332167701, "learning_rate": 8.174722520587183e-06, "loss": 0.0002, "step": 4105 }, { "epoch": 0.2646130050911903, "grad_norm": 1.0716477578622337, "learning_rate": 8.174006444683137e-06, "loss": 0.0032, "step": 4106 }, { "epoch": 0.2646774505381195, "grad_norm": 1.3784877152724, "learning_rate": 8.173290368779092e-06, "loss": 0.0164, "step": 4107 }, { "epoch": 0.26474189598504866, "grad_norm": 0.029587838605416625, "learning_rate": 8.172574292875046e-06, "loss": 0.0001, "step": 4108 }, { "epoch": 0.26480634143197784, "grad_norm": 0.18620288726645495, "learning_rate": 8.171858216970998e-06, "loss": 0.0019, "step": 4109 }, { "epoch": 0.264870786878907, "grad_norm": 0.0058924889771335845, "learning_rate": 8.171142141066953e-06, "loss": 0.0, "step": 4110 }, { "epoch": 0.26493523232583616, "grad_norm": 0.025689142320420872, "learning_rate": 8.170426065162907e-06, "loss": 0.0001, "step": 4111 }, { "epoch": 0.26499967777276534, "grad_norm": 0.007680422605328349, "learning_rate": 8.169709989258861e-06, "loss": 0.0, "step": 4112 }, { "epoch": 0.2650641232196945, "grad_norm": 0.025682199960681367, "learning_rate": 8.168993913354816e-06, "loss": 0.0001, "step": 4113 }, { "epoch": 0.2651285686666237, "grad_norm": 0.6048429639215468, "learning_rate": 8.168277837450772e-06, "loss": 0.0029, "step": 4114 }, { "epoch": 0.2651930141135529, "grad_norm": 0.21238982942042506, "learning_rate": 8.167561761546726e-06, "loss": 0.0002, "step": 4115 }, { "epoch": 0.2652574595604821, "grad_norm": 0.1002031654121083, "learning_rate": 8.166845685642679e-06, "loss": 0.0001, "step": 4116 }, { "epoch": 0.2653219050074112, "grad_norm": 0.07676032076806907, "learning_rate": 8.166129609738633e-06, "loss": 0.0, "step": 4117 }, { "epoch": 0.2653863504543404, "grad_norm": 0.0556255801510828, "learning_rate": 8.165413533834587e-06, "loss": 0.0001, "step": 4118 }, { "epoch": 0.2654507959012696, "grad_norm": 0.38215803137244037, "learning_rate": 8.164697457930541e-06, "loss": 0.0012, "step": 4119 }, { "epoch": 0.26551524134819876, "grad_norm": 0.367995408179826, "learning_rate": 8.163981382026496e-06, "loss": 0.0004, "step": 4120 }, { "epoch": 0.26557968679512794, "grad_norm": 0.12432520116844026, "learning_rate": 8.16326530612245e-06, "loss": 0.0002, "step": 4121 }, { "epoch": 0.2656441322420571, "grad_norm": 0.40048772983471204, "learning_rate": 8.162549230218404e-06, "loss": 0.0012, "step": 4122 }, { "epoch": 0.26570857768898626, "grad_norm": 0.028347294384729757, "learning_rate": 8.161833154314359e-06, "loss": 0.0001, "step": 4123 }, { "epoch": 0.26577302313591544, "grad_norm": 0.001553801958047865, "learning_rate": 8.161117078410313e-06, "loss": 0.0, "step": 4124 }, { "epoch": 0.2658374685828446, "grad_norm": 0.0048637148912617, "learning_rate": 8.160401002506266e-06, "loss": 0.0, "step": 4125 }, { "epoch": 0.2659019140297738, "grad_norm": 0.06001730600695284, "learning_rate": 8.15968492660222e-06, "loss": 0.0004, "step": 4126 }, { "epoch": 0.265966359476703, "grad_norm": 0.002554106564452214, "learning_rate": 8.158968850698174e-06, "loss": 0.0, "step": 4127 }, { "epoch": 0.2660308049236321, "grad_norm": 0.01876722425713162, "learning_rate": 8.158252774794128e-06, "loss": 0.0002, "step": 4128 }, { "epoch": 0.2660952503705613, "grad_norm": 0.0034013080274845862, "learning_rate": 8.157536698890083e-06, "loss": 0.0, "step": 4129 }, { "epoch": 0.2661596958174905, "grad_norm": 0.0013784421649856453, "learning_rate": 8.156820622986037e-06, "loss": 0.0, "step": 4130 }, { "epoch": 0.2662241412644197, "grad_norm": 0.002031356068066092, "learning_rate": 8.156104547081991e-06, "loss": 0.0, "step": 4131 }, { "epoch": 0.26628858671134886, "grad_norm": 0.008120560940019509, "learning_rate": 8.155388471177946e-06, "loss": 0.0, "step": 4132 }, { "epoch": 0.266353032158278, "grad_norm": 0.09823169443657727, "learning_rate": 8.1546723952739e-06, "loss": 0.0001, "step": 4133 }, { "epoch": 0.2664174776052072, "grad_norm": 0.016965823095737853, "learning_rate": 8.153956319369854e-06, "loss": 0.0001, "step": 4134 }, { "epoch": 0.26648192305213636, "grad_norm": 0.0015814950437502196, "learning_rate": 8.153240243465807e-06, "loss": 0.0, "step": 4135 }, { "epoch": 0.26654636849906554, "grad_norm": 0.05691408457785069, "learning_rate": 8.152524167561761e-06, "loss": 0.0001, "step": 4136 }, { "epoch": 0.26661081394599473, "grad_norm": 0.03462659186650775, "learning_rate": 8.151808091657717e-06, "loss": 0.0001, "step": 4137 }, { "epoch": 0.2666752593929239, "grad_norm": 0.014375004746504964, "learning_rate": 8.151092015753671e-06, "loss": 0.0001, "step": 4138 }, { "epoch": 0.26673970483985304, "grad_norm": 0.0024106756135800498, "learning_rate": 8.150375939849626e-06, "loss": 0.0, "step": 4139 }, { "epoch": 0.2668041502867822, "grad_norm": 0.007668585018300137, "learning_rate": 8.14965986394558e-06, "loss": 0.0, "step": 4140 }, { "epoch": 0.2668685957337114, "grad_norm": 0.006620185098797049, "learning_rate": 8.148943788041533e-06, "loss": 0.0, "step": 4141 }, { "epoch": 0.2669330411806406, "grad_norm": 0.001317973851116255, "learning_rate": 8.148227712137487e-06, "loss": 0.0, "step": 4142 }, { "epoch": 0.2669974866275698, "grad_norm": 0.035794016515809664, "learning_rate": 8.147511636233441e-06, "loss": 0.0004, "step": 4143 }, { "epoch": 0.2670619320744989, "grad_norm": 0.00035408094955500456, "learning_rate": 8.146795560329396e-06, "loss": 0.0, "step": 4144 }, { "epoch": 0.2671263775214281, "grad_norm": 0.008474903101516811, "learning_rate": 8.14607948442535e-06, "loss": 0.0001, "step": 4145 }, { "epoch": 0.2671908229683573, "grad_norm": 0.0012700962973577897, "learning_rate": 8.145363408521304e-06, "loss": 0.0, "step": 4146 }, { "epoch": 0.26725526841528646, "grad_norm": 0.0002888536530388627, "learning_rate": 8.144647332617258e-06, "loss": 0.0, "step": 4147 }, { "epoch": 0.26731971386221565, "grad_norm": 0.0006534653339654185, "learning_rate": 8.143931256713213e-06, "loss": 0.0, "step": 4148 }, { "epoch": 0.26738415930914483, "grad_norm": 0.07163475431684561, "learning_rate": 8.143215180809167e-06, "loss": 0.0001, "step": 4149 }, { "epoch": 0.26744860475607396, "grad_norm": 0.0025577957639277035, "learning_rate": 8.142499104905121e-06, "loss": 0.0, "step": 4150 }, { "epoch": 0.26751305020300314, "grad_norm": 0.00018299976222950143, "learning_rate": 8.141783029001074e-06, "loss": 0.0, "step": 4151 }, { "epoch": 0.26757749564993233, "grad_norm": 0.2443402427946589, "learning_rate": 8.141066953097028e-06, "loss": 0.0007, "step": 4152 }, { "epoch": 0.2676419410968615, "grad_norm": 0.4529980777695485, "learning_rate": 8.140350877192983e-06, "loss": 0.002, "step": 4153 }, { "epoch": 0.2677063865437907, "grad_norm": 0.023886939799844756, "learning_rate": 8.139634801288937e-06, "loss": 0.0003, "step": 4154 }, { "epoch": 0.2677708319907199, "grad_norm": 0.0007809454694696253, "learning_rate": 8.138918725384891e-06, "loss": 0.0, "step": 4155 }, { "epoch": 0.267835277437649, "grad_norm": 0.18457986917933467, "learning_rate": 8.138202649480845e-06, "loss": 0.0043, "step": 4156 }, { "epoch": 0.2678997228845782, "grad_norm": 0.005621835272940629, "learning_rate": 8.1374865735768e-06, "loss": 0.0001, "step": 4157 }, { "epoch": 0.2679641683315074, "grad_norm": 0.0006343240890120217, "learning_rate": 8.136770497672754e-06, "loss": 0.0, "step": 4158 }, { "epoch": 0.26802861377843656, "grad_norm": 0.07103669554810184, "learning_rate": 8.136054421768708e-06, "loss": 0.0002, "step": 4159 }, { "epoch": 0.26809305922536575, "grad_norm": 0.00017467493782411902, "learning_rate": 8.135338345864663e-06, "loss": 0.0, "step": 4160 }, { "epoch": 0.2681575046722949, "grad_norm": 0.00046530142134858303, "learning_rate": 8.134622269960617e-06, "loss": 0.0, "step": 4161 }, { "epoch": 0.26822195011922406, "grad_norm": 0.00028367250039852975, "learning_rate": 8.133906194056571e-06, "loss": 0.0, "step": 4162 }, { "epoch": 0.26828639556615325, "grad_norm": 0.07753849113407583, "learning_rate": 8.133190118152525e-06, "loss": 0.0002, "step": 4163 }, { "epoch": 0.26835084101308243, "grad_norm": 0.00028427710331693025, "learning_rate": 8.13247404224848e-06, "loss": 0.0, "step": 4164 }, { "epoch": 0.2684152864600116, "grad_norm": 0.0039621051110842725, "learning_rate": 8.131757966344434e-06, "loss": 0.0, "step": 4165 }, { "epoch": 0.2684797319069408, "grad_norm": 0.021191026836945282, "learning_rate": 8.131041890440388e-06, "loss": 0.0001, "step": 4166 }, { "epoch": 0.26854417735386993, "grad_norm": 0.0008846425877893397, "learning_rate": 8.130325814536341e-06, "loss": 0.0, "step": 4167 }, { "epoch": 0.2686086228007991, "grad_norm": 0.00047771099607565615, "learning_rate": 8.129609738632295e-06, "loss": 0.0, "step": 4168 }, { "epoch": 0.2686730682477283, "grad_norm": 0.011498115816560874, "learning_rate": 8.12889366272825e-06, "loss": 0.0, "step": 4169 }, { "epoch": 0.2687375136946575, "grad_norm": 0.00035771112664960457, "learning_rate": 8.128177586824204e-06, "loss": 0.0, "step": 4170 }, { "epoch": 0.26880195914158667, "grad_norm": 0.0011935188824502712, "learning_rate": 8.127461510920158e-06, "loss": 0.0, "step": 4171 }, { "epoch": 0.2688664045885158, "grad_norm": 0.0036977720277806395, "learning_rate": 8.126745435016112e-06, "loss": 0.0, "step": 4172 }, { "epoch": 0.268930850035445, "grad_norm": 0.0014385408918984439, "learning_rate": 8.126029359112067e-06, "loss": 0.0, "step": 4173 }, { "epoch": 0.26899529548237416, "grad_norm": 0.0024186235474048293, "learning_rate": 8.125313283208021e-06, "loss": 0.0, "step": 4174 }, { "epoch": 0.26905974092930335, "grad_norm": 0.034542698594028746, "learning_rate": 8.124597207303975e-06, "loss": 0.0001, "step": 4175 }, { "epoch": 0.26912418637623253, "grad_norm": 0.009862969678027288, "learning_rate": 8.12388113139993e-06, "loss": 0.0, "step": 4176 }, { "epoch": 0.2691886318231617, "grad_norm": 0.22107264717946118, "learning_rate": 8.123165055495882e-06, "loss": 0.002, "step": 4177 }, { "epoch": 0.26925307727009085, "grad_norm": 0.0001773784457137841, "learning_rate": 8.122448979591837e-06, "loss": 0.0, "step": 4178 }, { "epoch": 0.26931752271702003, "grad_norm": 0.7825109224501692, "learning_rate": 8.121732903687791e-06, "loss": 0.0013, "step": 4179 }, { "epoch": 0.2693819681639492, "grad_norm": 1.3505293144001949, "learning_rate": 8.121016827783745e-06, "loss": 0.0069, "step": 4180 }, { "epoch": 0.2694464136108784, "grad_norm": 0.15621311825343512, "learning_rate": 8.1203007518797e-06, "loss": 0.0, "step": 4181 }, { "epoch": 0.2695108590578076, "grad_norm": 0.007509552035810147, "learning_rate": 8.119584675975654e-06, "loss": 0.0, "step": 4182 }, { "epoch": 0.26957530450473677, "grad_norm": 0.0005021826837052551, "learning_rate": 8.118868600071608e-06, "loss": 0.0, "step": 4183 }, { "epoch": 0.2696397499516659, "grad_norm": 0.0031436756764531048, "learning_rate": 8.118152524167562e-06, "loss": 0.0, "step": 4184 }, { "epoch": 0.2697041953985951, "grad_norm": 0.9572962491351598, "learning_rate": 8.117436448263517e-06, "loss": 0.0021, "step": 4185 }, { "epoch": 0.26976864084552427, "grad_norm": 1.3736213361417184, "learning_rate": 8.116720372359471e-06, "loss": 0.0054, "step": 4186 }, { "epoch": 0.26983308629245345, "grad_norm": 0.3207216905616851, "learning_rate": 8.116004296455425e-06, "loss": 0.0012, "step": 4187 }, { "epoch": 0.26989753173938263, "grad_norm": 0.029056574864155083, "learning_rate": 8.11528822055138e-06, "loss": 0.0001, "step": 4188 }, { "epoch": 0.26996197718631176, "grad_norm": 0.0008124575126002671, "learning_rate": 8.114572144647334e-06, "loss": 0.0, "step": 4189 }, { "epoch": 0.27002642263324095, "grad_norm": 0.007161701968818947, "learning_rate": 8.113856068743288e-06, "loss": 0.0, "step": 4190 }, { "epoch": 0.27009086808017013, "grad_norm": 0.002907445352707109, "learning_rate": 8.113139992839242e-06, "loss": 0.0, "step": 4191 }, { "epoch": 0.2701553135270993, "grad_norm": 0.0014930615116478428, "learning_rate": 8.112423916935197e-06, "loss": 0.0, "step": 4192 }, { "epoch": 0.2702197589740285, "grad_norm": 0.0064780529217881066, "learning_rate": 8.11170784103115e-06, "loss": 0.0, "step": 4193 }, { "epoch": 0.2702842044209577, "grad_norm": 0.0006701570723918696, "learning_rate": 8.110991765127104e-06, "loss": 0.0, "step": 4194 }, { "epoch": 0.2703486498678868, "grad_norm": 0.17730787477319992, "learning_rate": 8.110275689223058e-06, "loss": 0.0007, "step": 4195 }, { "epoch": 0.270413095314816, "grad_norm": 0.4032809813305308, "learning_rate": 8.109559613319012e-06, "loss": 0.0016, "step": 4196 }, { "epoch": 0.2704775407617452, "grad_norm": 0.00046503311607447816, "learning_rate": 8.108843537414967e-06, "loss": 0.0, "step": 4197 }, { "epoch": 0.27054198620867437, "grad_norm": 0.0011172217789278758, "learning_rate": 8.108127461510921e-06, "loss": 0.0, "step": 4198 }, { "epoch": 0.27060643165560355, "grad_norm": 0.0008143952554526554, "learning_rate": 8.107411385606875e-06, "loss": 0.0, "step": 4199 }, { "epoch": 0.2706708771025327, "grad_norm": 0.008522984751307939, "learning_rate": 8.10669530970283e-06, "loss": 0.0001, "step": 4200 }, { "epoch": 0.27073532254946187, "grad_norm": 0.0024840219283999553, "learning_rate": 8.105979233798784e-06, "loss": 0.0, "step": 4201 }, { "epoch": 0.27079976799639105, "grad_norm": 0.00039293305919349785, "learning_rate": 8.105263157894736e-06, "loss": 0.0, "step": 4202 }, { "epoch": 0.27086421344332023, "grad_norm": 0.005540428539705254, "learning_rate": 8.10454708199069e-06, "loss": 0.0001, "step": 4203 }, { "epoch": 0.2709286588902494, "grad_norm": 0.011686594502197983, "learning_rate": 8.103831006086645e-06, "loss": 0.0, "step": 4204 }, { "epoch": 0.2709931043371786, "grad_norm": 0.16621984375021687, "learning_rate": 8.1031149301826e-06, "loss": 0.0001, "step": 4205 }, { "epoch": 0.27105754978410773, "grad_norm": 0.0004452170447672888, "learning_rate": 8.102398854278554e-06, "loss": 0.0, "step": 4206 }, { "epoch": 0.2711219952310369, "grad_norm": 0.00228342254717163, "learning_rate": 8.10168277837451e-06, "loss": 0.0, "step": 4207 }, { "epoch": 0.2711864406779661, "grad_norm": 0.00729092715793383, "learning_rate": 8.100966702470464e-06, "loss": 0.0, "step": 4208 }, { "epoch": 0.2712508861248953, "grad_norm": 0.01637368052505697, "learning_rate": 8.100250626566416e-06, "loss": 0.0001, "step": 4209 }, { "epoch": 0.27131533157182447, "grad_norm": 0.47464788147246495, "learning_rate": 8.09953455066237e-06, "loss": 0.0007, "step": 4210 }, { "epoch": 0.2713797770187536, "grad_norm": 0.017767601083250248, "learning_rate": 8.098818474758325e-06, "loss": 0.0001, "step": 4211 }, { "epoch": 0.2714442224656828, "grad_norm": 0.0002587533399039369, "learning_rate": 8.09810239885428e-06, "loss": 0.0, "step": 4212 }, { "epoch": 0.27150866791261197, "grad_norm": 1.0716687262551394, "learning_rate": 8.097386322950234e-06, "loss": 0.0001, "step": 4213 }, { "epoch": 0.27157311335954115, "grad_norm": 0.0024058953844227756, "learning_rate": 8.096670247046188e-06, "loss": 0.0, "step": 4214 }, { "epoch": 0.27163755880647034, "grad_norm": 0.00714180532387043, "learning_rate": 8.095954171142142e-06, "loss": 0.0, "step": 4215 }, { "epoch": 0.2717020042533995, "grad_norm": 0.2157260896229057, "learning_rate": 8.095238095238097e-06, "loss": 0.0004, "step": 4216 }, { "epoch": 0.27176644970032865, "grad_norm": 0.015134509852193008, "learning_rate": 8.09452201933405e-06, "loss": 0.0, "step": 4217 }, { "epoch": 0.27183089514725783, "grad_norm": 0.013603620153944285, "learning_rate": 8.093805943430003e-06, "loss": 0.0001, "step": 4218 }, { "epoch": 0.271895340594187, "grad_norm": 0.014178400987857479, "learning_rate": 8.093089867525958e-06, "loss": 0.0001, "step": 4219 }, { "epoch": 0.2719597860411162, "grad_norm": 0.00024165479162664222, "learning_rate": 8.092373791621912e-06, "loss": 0.0, "step": 4220 }, { "epoch": 0.2720242314880454, "grad_norm": 0.0020571868036936888, "learning_rate": 8.091657715717866e-06, "loss": 0.0, "step": 4221 }, { "epoch": 0.27208867693497457, "grad_norm": 0.005309349799260391, "learning_rate": 8.09094163981382e-06, "loss": 0.0, "step": 4222 }, { "epoch": 0.2721531223819037, "grad_norm": 0.30123737973572057, "learning_rate": 8.090225563909775e-06, "loss": 0.005, "step": 4223 }, { "epoch": 0.2722175678288329, "grad_norm": 0.0012302217094057092, "learning_rate": 8.08950948800573e-06, "loss": 0.0, "step": 4224 }, { "epoch": 0.27228201327576207, "grad_norm": 0.0002683422688236845, "learning_rate": 8.088793412101684e-06, "loss": 0.0, "step": 4225 }, { "epoch": 0.27234645872269125, "grad_norm": 0.0006782941113959453, "learning_rate": 8.088077336197638e-06, "loss": 0.0, "step": 4226 }, { "epoch": 0.27241090416962044, "grad_norm": 0.010038923617897768, "learning_rate": 8.087361260293592e-06, "loss": 0.0001, "step": 4227 }, { "epoch": 0.27247534961654957, "grad_norm": 0.003118605860282275, "learning_rate": 8.086645184389545e-06, "loss": 0.0, "step": 4228 }, { "epoch": 0.27253979506347875, "grad_norm": 0.01232286451285321, "learning_rate": 8.085929108485499e-06, "loss": 0.0001, "step": 4229 }, { "epoch": 0.27260424051040794, "grad_norm": 0.8026916418279838, "learning_rate": 8.085213032581455e-06, "loss": 0.005, "step": 4230 }, { "epoch": 0.2726686859573371, "grad_norm": 0.25647062712724145, "learning_rate": 8.08449695667741e-06, "loss": 0.0021, "step": 4231 }, { "epoch": 0.2727331314042663, "grad_norm": 0.0792547996356328, "learning_rate": 8.083780880773364e-06, "loss": 0.0001, "step": 4232 }, { "epoch": 0.2727975768511955, "grad_norm": 0.07577762607384415, "learning_rate": 8.083064804869318e-06, "loss": 0.0007, "step": 4233 }, { "epoch": 0.2728620222981246, "grad_norm": 0.2807311066234324, "learning_rate": 8.08234872896527e-06, "loss": 0.0008, "step": 4234 }, { "epoch": 0.2729264677450538, "grad_norm": 0.0006046875437334746, "learning_rate": 8.081632653061225e-06, "loss": 0.0, "step": 4235 }, { "epoch": 0.272990913191983, "grad_norm": 0.030921507784656604, "learning_rate": 8.080916577157179e-06, "loss": 0.0002, "step": 4236 }, { "epoch": 0.27305535863891217, "grad_norm": 0.17561644139596558, "learning_rate": 8.080200501253133e-06, "loss": 0.0018, "step": 4237 }, { "epoch": 0.27311980408584136, "grad_norm": 0.0016910747966463998, "learning_rate": 8.079484425349088e-06, "loss": 0.0, "step": 4238 }, { "epoch": 0.2731842495327705, "grad_norm": 0.017976164972360383, "learning_rate": 8.078768349445042e-06, "loss": 0.0, "step": 4239 }, { "epoch": 0.27324869497969967, "grad_norm": 0.0002662013235983584, "learning_rate": 8.078052273540996e-06, "loss": 0.0, "step": 4240 }, { "epoch": 0.27331314042662885, "grad_norm": 0.004663524159279211, "learning_rate": 8.07733619763695e-06, "loss": 0.0, "step": 4241 }, { "epoch": 0.27337758587355804, "grad_norm": 0.21042938315959228, "learning_rate": 8.076620121732905e-06, "loss": 0.0005, "step": 4242 }, { "epoch": 0.2734420313204872, "grad_norm": 0.03295338194526715, "learning_rate": 8.07590404582886e-06, "loss": 0.0001, "step": 4243 }, { "epoch": 0.2735064767674164, "grad_norm": 0.0071647418191086885, "learning_rate": 8.075187969924812e-06, "loss": 0.0, "step": 4244 }, { "epoch": 0.27357092221434554, "grad_norm": 0.09229751786692166, "learning_rate": 8.074471894020766e-06, "loss": 0.0009, "step": 4245 }, { "epoch": 0.2736353676612747, "grad_norm": 0.005337680304002737, "learning_rate": 8.07375581811672e-06, "loss": 0.0, "step": 4246 }, { "epoch": 0.2736998131082039, "grad_norm": 0.0066058648404035405, "learning_rate": 8.073039742212675e-06, "loss": 0.0, "step": 4247 }, { "epoch": 0.2737642585551331, "grad_norm": 0.018948461852127226, "learning_rate": 8.072323666308629e-06, "loss": 0.0, "step": 4248 }, { "epoch": 0.2738287040020623, "grad_norm": 0.28756954658064515, "learning_rate": 8.071607590404583e-06, "loss": 0.0011, "step": 4249 }, { "epoch": 0.2738931494489914, "grad_norm": 0.1112823266641712, "learning_rate": 8.070891514500538e-06, "loss": 0.0002, "step": 4250 }, { "epoch": 0.2739575948959206, "grad_norm": 0.0033629401611851456, "learning_rate": 8.070175438596492e-06, "loss": 0.0, "step": 4251 }, { "epoch": 0.27402204034284977, "grad_norm": 0.01336159347321719, "learning_rate": 8.069459362692446e-06, "loss": 0.0001, "step": 4252 }, { "epoch": 0.27408648578977896, "grad_norm": 0.08284823562061203, "learning_rate": 8.0687432867884e-06, "loss": 0.0005, "step": 4253 }, { "epoch": 0.27415093123670814, "grad_norm": 0.02694515727281307, "learning_rate": 8.068027210884355e-06, "loss": 0.0, "step": 4254 }, { "epoch": 0.2742153766836373, "grad_norm": 0.012616425954789283, "learning_rate": 8.067311134980309e-06, "loss": 0.0001, "step": 4255 }, { "epoch": 0.27427982213056645, "grad_norm": 0.0007318360328273589, "learning_rate": 8.066595059076263e-06, "loss": 0.0, "step": 4256 }, { "epoch": 0.27434426757749564, "grad_norm": 1.0259591406667061, "learning_rate": 8.065878983172218e-06, "loss": 0.0031, "step": 4257 }, { "epoch": 0.2744087130244248, "grad_norm": 0.006475353661629028, "learning_rate": 8.065162907268172e-06, "loss": 0.0, "step": 4258 }, { "epoch": 0.274473158471354, "grad_norm": 0.44145669902026735, "learning_rate": 8.064446831364126e-06, "loss": 0.0013, "step": 4259 }, { "epoch": 0.2745376039182832, "grad_norm": 0.0496036830400151, "learning_rate": 8.063730755460079e-06, "loss": 0.0004, "step": 4260 }, { "epoch": 0.2746020493652124, "grad_norm": 0.08654972122820045, "learning_rate": 8.063014679556033e-06, "loss": 0.0005, "step": 4261 }, { "epoch": 0.2746664948121415, "grad_norm": 0.0032045456562780393, "learning_rate": 8.062298603651988e-06, "loss": 0.0, "step": 4262 }, { "epoch": 0.2747309402590707, "grad_norm": 0.027642436666024946, "learning_rate": 8.061582527747942e-06, "loss": 0.0, "step": 4263 }, { "epoch": 0.2747953857059999, "grad_norm": 0.026773698904416023, "learning_rate": 8.060866451843896e-06, "loss": 0.0001, "step": 4264 }, { "epoch": 0.27485983115292906, "grad_norm": 0.009627766904490808, "learning_rate": 8.06015037593985e-06, "loss": 0.0, "step": 4265 }, { "epoch": 0.27492427659985824, "grad_norm": 0.0016107713773131466, "learning_rate": 8.059434300035805e-06, "loss": 0.0, "step": 4266 }, { "epoch": 0.27498872204678737, "grad_norm": 0.025908643128907324, "learning_rate": 8.058718224131759e-06, "loss": 0.0001, "step": 4267 }, { "epoch": 0.27505316749371655, "grad_norm": 0.006806098258419837, "learning_rate": 8.058002148227713e-06, "loss": 0.0, "step": 4268 }, { "epoch": 0.27511761294064574, "grad_norm": 0.007748768085792652, "learning_rate": 8.057286072323668e-06, "loss": 0.0, "step": 4269 }, { "epoch": 0.2751820583875749, "grad_norm": 0.25058081332503046, "learning_rate": 8.05656999641962e-06, "loss": 0.0007, "step": 4270 }, { "epoch": 0.2752465038345041, "grad_norm": 0.024260379358677264, "learning_rate": 8.055853920515575e-06, "loss": 0.0003, "step": 4271 }, { "epoch": 0.2753109492814333, "grad_norm": 0.007065816195131135, "learning_rate": 8.055137844611529e-06, "loss": 0.0, "step": 4272 }, { "epoch": 0.2753753947283624, "grad_norm": 0.0032001312958800823, "learning_rate": 8.054421768707483e-06, "loss": 0.0, "step": 4273 }, { "epoch": 0.2754398401752916, "grad_norm": 0.0041486393690888055, "learning_rate": 8.053705692803437e-06, "loss": 0.0, "step": 4274 }, { "epoch": 0.2755042856222208, "grad_norm": 0.002187301162407734, "learning_rate": 8.052989616899392e-06, "loss": 0.0, "step": 4275 }, { "epoch": 0.27556873106915, "grad_norm": 0.019823731982545546, "learning_rate": 8.052273540995346e-06, "loss": 0.0001, "step": 4276 }, { "epoch": 0.27563317651607916, "grad_norm": 0.03341653553794979, "learning_rate": 8.0515574650913e-06, "loss": 0.0002, "step": 4277 }, { "epoch": 0.2756976219630083, "grad_norm": 0.0022856206212265636, "learning_rate": 8.050841389187255e-06, "loss": 0.0, "step": 4278 }, { "epoch": 0.2757620674099375, "grad_norm": 0.09553208442519484, "learning_rate": 8.050125313283209e-06, "loss": 0.0002, "step": 4279 }, { "epoch": 0.27582651285686666, "grad_norm": 0.008370866194229725, "learning_rate": 8.049409237379163e-06, "loss": 0.0001, "step": 4280 }, { "epoch": 0.27589095830379584, "grad_norm": 0.0013321094427172684, "learning_rate": 8.048693161475117e-06, "loss": 0.0, "step": 4281 }, { "epoch": 0.275955403750725, "grad_norm": 0.07517994067131512, "learning_rate": 8.047977085571072e-06, "loss": 0.0001, "step": 4282 }, { "epoch": 0.2760198491976542, "grad_norm": 0.0026881408795613724, "learning_rate": 8.047261009667026e-06, "loss": 0.0, "step": 4283 }, { "epoch": 0.27608429464458334, "grad_norm": 0.5881114557634267, "learning_rate": 8.04654493376298e-06, "loss": 0.0026, "step": 4284 }, { "epoch": 0.2761487400915125, "grad_norm": 0.0552722037753931, "learning_rate": 8.045828857858935e-06, "loss": 0.0005, "step": 4285 }, { "epoch": 0.2762131855384417, "grad_norm": 0.0019059926219596923, "learning_rate": 8.045112781954887e-06, "loss": 0.0, "step": 4286 }, { "epoch": 0.2762776309853709, "grad_norm": 0.004945866811174837, "learning_rate": 8.044396706050842e-06, "loss": 0.0, "step": 4287 }, { "epoch": 0.2763420764323001, "grad_norm": 0.020964249731356712, "learning_rate": 8.043680630146796e-06, "loss": 0.0001, "step": 4288 }, { "epoch": 0.2764065218792292, "grad_norm": 2.334616909744486, "learning_rate": 8.04296455424275e-06, "loss": 0.0045, "step": 4289 }, { "epoch": 0.2764709673261584, "grad_norm": 0.001980315912694081, "learning_rate": 8.042248478338704e-06, "loss": 0.0, "step": 4290 }, { "epoch": 0.2765354127730876, "grad_norm": 0.07433033072093934, "learning_rate": 8.041532402434659e-06, "loss": 0.0003, "step": 4291 }, { "epoch": 0.27659985822001676, "grad_norm": 0.015762347565720446, "learning_rate": 8.040816326530613e-06, "loss": 0.0, "step": 4292 }, { "epoch": 0.27666430366694594, "grad_norm": 9.599061102562011e-05, "learning_rate": 8.040100250626567e-06, "loss": 0.0, "step": 4293 }, { "epoch": 0.2767287491138751, "grad_norm": 0.011378677333695802, "learning_rate": 8.039384174722522e-06, "loss": 0.0001, "step": 4294 }, { "epoch": 0.27679319456080426, "grad_norm": 0.0012122533737657336, "learning_rate": 8.038668098818474e-06, "loss": 0.0, "step": 4295 }, { "epoch": 0.27685764000773344, "grad_norm": 0.00712413864184761, "learning_rate": 8.037952022914429e-06, "loss": 0.0, "step": 4296 }, { "epoch": 0.2769220854546626, "grad_norm": 0.0021291666023090256, "learning_rate": 8.037235947010383e-06, "loss": 0.0, "step": 4297 }, { "epoch": 0.2769865309015918, "grad_norm": 0.006989701767573375, "learning_rate": 8.036519871106337e-06, "loss": 0.0, "step": 4298 }, { "epoch": 0.277050976348521, "grad_norm": 0.0030630408264887, "learning_rate": 8.035803795202291e-06, "loss": 0.0, "step": 4299 }, { "epoch": 0.2771154217954502, "grad_norm": 0.42474275334818506, "learning_rate": 8.035087719298247e-06, "loss": 0.0018, "step": 4300 }, { "epoch": 0.2771798672423793, "grad_norm": 0.0020389694047471857, "learning_rate": 8.034371643394202e-06, "loss": 0.0, "step": 4301 }, { "epoch": 0.2772443126893085, "grad_norm": 0.02280851230690095, "learning_rate": 8.033655567490154e-06, "loss": 0.0001, "step": 4302 }, { "epoch": 0.2773087581362377, "grad_norm": 0.8225653820081554, "learning_rate": 8.032939491586109e-06, "loss": 0.0045, "step": 4303 }, { "epoch": 0.27737320358316686, "grad_norm": 0.007784121116634503, "learning_rate": 8.032223415682063e-06, "loss": 0.0, "step": 4304 }, { "epoch": 0.27743764903009605, "grad_norm": 0.09095866422393512, "learning_rate": 8.031507339778017e-06, "loss": 0.0009, "step": 4305 }, { "epoch": 0.2775020944770252, "grad_norm": 0.24910336875888767, "learning_rate": 8.030791263873972e-06, "loss": 0.0003, "step": 4306 }, { "epoch": 0.27756653992395436, "grad_norm": 0.973263011252569, "learning_rate": 8.030075187969926e-06, "loss": 0.0138, "step": 4307 }, { "epoch": 0.27763098537088354, "grad_norm": 0.49882107567641787, "learning_rate": 8.02935911206588e-06, "loss": 0.0013, "step": 4308 }, { "epoch": 0.2776954308178127, "grad_norm": 0.001363859972825628, "learning_rate": 8.028643036161834e-06, "loss": 0.0, "step": 4309 }, { "epoch": 0.2777598762647419, "grad_norm": 0.021673290387127987, "learning_rate": 8.027926960257789e-06, "loss": 0.0, "step": 4310 }, { "epoch": 0.2778243217116711, "grad_norm": 0.02073911943213021, "learning_rate": 8.027210884353741e-06, "loss": 0.0001, "step": 4311 }, { "epoch": 0.2778887671586002, "grad_norm": 0.35953358592667545, "learning_rate": 8.026494808449696e-06, "loss": 0.0029, "step": 4312 }, { "epoch": 0.2779532126055294, "grad_norm": 0.004120392236843992, "learning_rate": 8.02577873254565e-06, "loss": 0.0, "step": 4313 }, { "epoch": 0.2780176580524586, "grad_norm": 0.0016659283892527326, "learning_rate": 8.025062656641604e-06, "loss": 0.0, "step": 4314 }, { "epoch": 0.2780821034993878, "grad_norm": 0.000579688410185921, "learning_rate": 8.024346580737559e-06, "loss": 0.0, "step": 4315 }, { "epoch": 0.27814654894631696, "grad_norm": 0.1932598493658158, "learning_rate": 8.023630504833513e-06, "loss": 0.0009, "step": 4316 }, { "epoch": 0.2782109943932461, "grad_norm": 0.009979227631807589, "learning_rate": 8.022914428929467e-06, "loss": 0.0, "step": 4317 }, { "epoch": 0.2782754398401753, "grad_norm": 0.5246637414184626, "learning_rate": 8.022198353025421e-06, "loss": 0.0009, "step": 4318 }, { "epoch": 0.27833988528710446, "grad_norm": 0.0019687456576445756, "learning_rate": 8.021482277121376e-06, "loss": 0.0, "step": 4319 }, { "epoch": 0.27840433073403364, "grad_norm": 0.0011185073687473165, "learning_rate": 8.02076620121733e-06, "loss": 0.0, "step": 4320 }, { "epoch": 0.27846877618096283, "grad_norm": 0.04931498752101843, "learning_rate": 8.020050125313283e-06, "loss": 0.0002, "step": 4321 }, { "epoch": 0.278533221627892, "grad_norm": 0.00027083873114944093, "learning_rate": 8.019334049409237e-06, "loss": 0.0, "step": 4322 }, { "epoch": 0.27859766707482114, "grad_norm": 0.05839166362732932, "learning_rate": 8.018617973505191e-06, "loss": 0.0008, "step": 4323 }, { "epoch": 0.2786621125217503, "grad_norm": 0.0001520224323067307, "learning_rate": 8.017901897601147e-06, "loss": 0.0, "step": 4324 }, { "epoch": 0.2787265579686795, "grad_norm": 0.0007621996242916202, "learning_rate": 8.017185821697102e-06, "loss": 0.0, "step": 4325 }, { "epoch": 0.2787910034156087, "grad_norm": 0.00018054994650765078, "learning_rate": 8.016469745793056e-06, "loss": 0.0, "step": 4326 }, { "epoch": 0.2788554488625379, "grad_norm": 0.004102618013888196, "learning_rate": 8.015753669889008e-06, "loss": 0.0, "step": 4327 }, { "epoch": 0.278919894309467, "grad_norm": 0.0015162126017946738, "learning_rate": 8.015037593984963e-06, "loss": 0.0, "step": 4328 }, { "epoch": 0.2789843397563962, "grad_norm": 0.002592029774412762, "learning_rate": 8.014321518080917e-06, "loss": 0.0, "step": 4329 }, { "epoch": 0.2790487852033254, "grad_norm": 0.0014765765187576673, "learning_rate": 8.013605442176871e-06, "loss": 0.0, "step": 4330 }, { "epoch": 0.27911323065025456, "grad_norm": 0.024509027084424147, "learning_rate": 8.012889366272826e-06, "loss": 0.0, "step": 4331 }, { "epoch": 0.27917767609718375, "grad_norm": 0.0017077207017750734, "learning_rate": 8.01217329036878e-06, "loss": 0.0, "step": 4332 }, { "epoch": 0.27924212154411293, "grad_norm": 0.0022571001131268175, "learning_rate": 8.011457214464734e-06, "loss": 0.0, "step": 4333 }, { "epoch": 0.27930656699104206, "grad_norm": 0.0006027117464969319, "learning_rate": 8.010741138560689e-06, "loss": 0.0, "step": 4334 }, { "epoch": 0.27937101243797124, "grad_norm": 0.00018672067099711776, "learning_rate": 8.010025062656643e-06, "loss": 0.0, "step": 4335 }, { "epoch": 0.27943545788490043, "grad_norm": 0.0009781388881216495, "learning_rate": 8.009308986752597e-06, "loss": 0.0, "step": 4336 }, { "epoch": 0.2794999033318296, "grad_norm": 0.004771590672136837, "learning_rate": 8.00859291084855e-06, "loss": 0.0, "step": 4337 }, { "epoch": 0.2795643487787588, "grad_norm": 0.14926098480487418, "learning_rate": 8.007876834944504e-06, "loss": 0.0005, "step": 4338 }, { "epoch": 0.279628794225688, "grad_norm": 0.486617649409908, "learning_rate": 8.007160759040458e-06, "loss": 0.0008, "step": 4339 }, { "epoch": 0.2796932396726171, "grad_norm": 0.009931994546620311, "learning_rate": 8.006444683136413e-06, "loss": 0.0001, "step": 4340 }, { "epoch": 0.2797576851195463, "grad_norm": 0.00038062467703721604, "learning_rate": 8.005728607232367e-06, "loss": 0.0, "step": 4341 }, { "epoch": 0.2798221305664755, "grad_norm": 0.0003990964290874825, "learning_rate": 8.005012531328321e-06, "loss": 0.0, "step": 4342 }, { "epoch": 0.27988657601340466, "grad_norm": 0.008947741416100167, "learning_rate": 8.004296455424276e-06, "loss": 0.0001, "step": 4343 }, { "epoch": 0.27995102146033385, "grad_norm": 0.007823324713702041, "learning_rate": 8.00358037952023e-06, "loss": 0.0, "step": 4344 }, { "epoch": 0.280015466907263, "grad_norm": 0.040479600314342654, "learning_rate": 8.002864303616184e-06, "loss": 0.0001, "step": 4345 }, { "epoch": 0.28007991235419216, "grad_norm": 0.0055250204423881845, "learning_rate": 8.002148227712138e-06, "loss": 0.0, "step": 4346 }, { "epoch": 0.28014435780112135, "grad_norm": 0.010728338781064616, "learning_rate": 8.001432151808093e-06, "loss": 0.0001, "step": 4347 }, { "epoch": 0.28020880324805053, "grad_norm": 0.00040887627223743986, "learning_rate": 8.000716075904047e-06, "loss": 0.0, "step": 4348 }, { "epoch": 0.2802732486949797, "grad_norm": 0.13571115180144624, "learning_rate": 8.000000000000001e-06, "loss": 0.0004, "step": 4349 }, { "epoch": 0.2803376941419089, "grad_norm": 0.5591548156003548, "learning_rate": 7.999283924095956e-06, "loss": 0.004, "step": 4350 }, { "epoch": 0.28040213958883803, "grad_norm": 2.2364727922981085, "learning_rate": 7.99856784819191e-06, "loss": 0.0049, "step": 4351 }, { "epoch": 0.2804665850357672, "grad_norm": 0.015054656240965792, "learning_rate": 7.997851772287864e-06, "loss": 0.0, "step": 4352 }, { "epoch": 0.2805310304826964, "grad_norm": 0.25799236381304563, "learning_rate": 7.997135696383817e-06, "loss": 0.0008, "step": 4353 }, { "epoch": 0.2805954759296256, "grad_norm": 0.047828245320771644, "learning_rate": 7.996419620479771e-06, "loss": 0.0001, "step": 4354 }, { "epoch": 0.28065992137655477, "grad_norm": 0.5221935978641291, "learning_rate": 7.995703544575725e-06, "loss": 0.0039, "step": 4355 }, { "epoch": 0.2807243668234839, "grad_norm": 0.011907581028104474, "learning_rate": 7.99498746867168e-06, "loss": 0.0001, "step": 4356 }, { "epoch": 0.2807888122704131, "grad_norm": 0.0031201953035860396, "learning_rate": 7.994271392767634e-06, "loss": 0.0, "step": 4357 }, { "epoch": 0.28085325771734226, "grad_norm": 1.5818540874230766, "learning_rate": 7.993555316863588e-06, "loss": 0.0024, "step": 4358 }, { "epoch": 0.28091770316427145, "grad_norm": 0.01792528071689751, "learning_rate": 7.992839240959543e-06, "loss": 0.0, "step": 4359 }, { "epoch": 0.28098214861120063, "grad_norm": 0.03182447723050989, "learning_rate": 7.992123165055497e-06, "loss": 0.0003, "step": 4360 }, { "epoch": 0.2810465940581298, "grad_norm": 0.0016257270435471858, "learning_rate": 7.991407089151451e-06, "loss": 0.0, "step": 4361 }, { "epoch": 0.28111103950505895, "grad_norm": 0.4480030194664001, "learning_rate": 7.990691013247406e-06, "loss": 0.0019, "step": 4362 }, { "epoch": 0.28117548495198813, "grad_norm": 0.051033991221149974, "learning_rate": 7.989974937343358e-06, "loss": 0.0001, "step": 4363 }, { "epoch": 0.2812399303989173, "grad_norm": 0.2753491742477933, "learning_rate": 7.989258861439312e-06, "loss": 0.0011, "step": 4364 }, { "epoch": 0.2813043758458465, "grad_norm": 0.009632521139026014, "learning_rate": 7.988542785535267e-06, "loss": 0.0001, "step": 4365 }, { "epoch": 0.2813688212927757, "grad_norm": 0.39363273433598595, "learning_rate": 7.987826709631221e-06, "loss": 0.0048, "step": 4366 }, { "epoch": 0.2814332667397048, "grad_norm": 0.004705003054986105, "learning_rate": 7.987110633727175e-06, "loss": 0.0, "step": 4367 }, { "epoch": 0.281497712186634, "grad_norm": 0.002300404489696207, "learning_rate": 7.98639455782313e-06, "loss": 0.0, "step": 4368 }, { "epoch": 0.2815621576335632, "grad_norm": 0.11177615728153535, "learning_rate": 7.985678481919084e-06, "loss": 0.0002, "step": 4369 }, { "epoch": 0.28162660308049237, "grad_norm": 0.004602240365965075, "learning_rate": 7.984962406015038e-06, "loss": 0.0, "step": 4370 }, { "epoch": 0.28169104852742155, "grad_norm": 0.05880574974903835, "learning_rate": 7.984246330110993e-06, "loss": 0.0017, "step": 4371 }, { "epoch": 0.28175549397435073, "grad_norm": 0.008953983911220216, "learning_rate": 7.983530254206947e-06, "loss": 0.0001, "step": 4372 }, { "epoch": 0.28181993942127986, "grad_norm": 0.007779673626552251, "learning_rate": 7.982814178302901e-06, "loss": 0.0001, "step": 4373 }, { "epoch": 0.28188438486820905, "grad_norm": 0.0016397288000667067, "learning_rate": 7.982098102398855e-06, "loss": 0.0, "step": 4374 }, { "epoch": 0.28194883031513823, "grad_norm": 0.07536983363889126, "learning_rate": 7.98138202649481e-06, "loss": 0.0005, "step": 4375 }, { "epoch": 0.2820132757620674, "grad_norm": 0.0662605672125797, "learning_rate": 7.980665950590764e-06, "loss": 0.0002, "step": 4376 }, { "epoch": 0.2820777212089966, "grad_norm": 0.001330320124680005, "learning_rate": 7.979949874686718e-06, "loss": 0.0, "step": 4377 }, { "epoch": 0.2821421666559258, "grad_norm": 0.00196304783207142, "learning_rate": 7.979233798782673e-06, "loss": 0.0, "step": 4378 }, { "epoch": 0.2822066121028549, "grad_norm": 0.037367839809333316, "learning_rate": 7.978517722878625e-06, "loss": 0.0001, "step": 4379 }, { "epoch": 0.2822710575497841, "grad_norm": 0.006716713789545665, "learning_rate": 7.97780164697458e-06, "loss": 0.0, "step": 4380 }, { "epoch": 0.2823355029967133, "grad_norm": 0.6094877187697938, "learning_rate": 7.977085571070534e-06, "loss": 0.0014, "step": 4381 }, { "epoch": 0.28239994844364247, "grad_norm": 0.06727481563265361, "learning_rate": 7.976369495166488e-06, "loss": 0.0001, "step": 4382 }, { "epoch": 0.28246439389057165, "grad_norm": 0.18548593022155066, "learning_rate": 7.975653419262442e-06, "loss": 0.0005, "step": 4383 }, { "epoch": 0.2825288393375008, "grad_norm": 0.3365746156429773, "learning_rate": 7.974937343358397e-06, "loss": 0.0024, "step": 4384 }, { "epoch": 0.28259328478442997, "grad_norm": 0.00029314361765416907, "learning_rate": 7.974221267454351e-06, "loss": 0.0, "step": 4385 }, { "epoch": 0.28265773023135915, "grad_norm": 0.16602354030813277, "learning_rate": 7.973505191550305e-06, "loss": 0.0003, "step": 4386 }, { "epoch": 0.28272217567828833, "grad_norm": 0.0002632260427845974, "learning_rate": 7.97278911564626e-06, "loss": 0.0, "step": 4387 }, { "epoch": 0.2827866211252175, "grad_norm": 0.005457814706330409, "learning_rate": 7.972073039742212e-06, "loss": 0.0, "step": 4388 }, { "epoch": 0.2828510665721467, "grad_norm": 0.3241865073108496, "learning_rate": 7.971356963838167e-06, "loss": 0.0074, "step": 4389 }, { "epoch": 0.28291551201907583, "grad_norm": 0.03676520602108709, "learning_rate": 7.97064088793412e-06, "loss": 0.0001, "step": 4390 }, { "epoch": 0.282979957466005, "grad_norm": 0.5804222704426779, "learning_rate": 7.969924812030075e-06, "loss": 0.0032, "step": 4391 }, { "epoch": 0.2830444029129342, "grad_norm": 0.0040055974290435425, "learning_rate": 7.96920873612603e-06, "loss": 0.0, "step": 4392 }, { "epoch": 0.2831088483598634, "grad_norm": 0.005350671320858573, "learning_rate": 7.968492660221984e-06, "loss": 0.0, "step": 4393 }, { "epoch": 0.28317329380679257, "grad_norm": 0.004508154975757049, "learning_rate": 7.96777658431794e-06, "loss": 0.0, "step": 4394 }, { "epoch": 0.2832377392537217, "grad_norm": 0.04889548900087098, "learning_rate": 7.967060508413892e-06, "loss": 0.0001, "step": 4395 }, { "epoch": 0.2833021847006509, "grad_norm": 0.0443628869935588, "learning_rate": 7.966344432509847e-06, "loss": 0.0002, "step": 4396 }, { "epoch": 0.28336663014758007, "grad_norm": 0.08155526279809674, "learning_rate": 7.965628356605801e-06, "loss": 0.0003, "step": 4397 }, { "epoch": 0.28343107559450925, "grad_norm": 0.18657846288264684, "learning_rate": 7.964912280701755e-06, "loss": 0.0022, "step": 4398 }, { "epoch": 0.28349552104143844, "grad_norm": 0.02187256732999685, "learning_rate": 7.96419620479771e-06, "loss": 0.0001, "step": 4399 }, { "epoch": 0.2835599664883676, "grad_norm": 0.012205506467956272, "learning_rate": 7.963480128893664e-06, "loss": 0.0, "step": 4400 }, { "epoch": 0.28362441193529675, "grad_norm": 0.037582196116246906, "learning_rate": 7.962764052989618e-06, "loss": 0.0, "step": 4401 }, { "epoch": 0.28368885738222593, "grad_norm": 0.06266569361901952, "learning_rate": 7.962047977085572e-06, "loss": 0.0002, "step": 4402 }, { "epoch": 0.2837533028291551, "grad_norm": 0.0008525386340146979, "learning_rate": 7.961331901181527e-06, "loss": 0.0, "step": 4403 }, { "epoch": 0.2838177482760843, "grad_norm": 0.005498334319090381, "learning_rate": 7.96061582527748e-06, "loss": 0.0, "step": 4404 }, { "epoch": 0.2838821937230135, "grad_norm": 0.010821442272882176, "learning_rate": 7.959899749373434e-06, "loss": 0.0, "step": 4405 }, { "epoch": 0.2839466391699426, "grad_norm": 0.24158699435794354, "learning_rate": 7.959183673469388e-06, "loss": 0.002, "step": 4406 }, { "epoch": 0.2840110846168718, "grad_norm": 0.3462137556730702, "learning_rate": 7.958467597565342e-06, "loss": 0.0012, "step": 4407 }, { "epoch": 0.284075530063801, "grad_norm": 0.2733609092257243, "learning_rate": 7.957751521661296e-06, "loss": 0.0023, "step": 4408 }, { "epoch": 0.28413997551073017, "grad_norm": 0.16384960734985596, "learning_rate": 7.95703544575725e-06, "loss": 0.0003, "step": 4409 }, { "epoch": 0.28420442095765935, "grad_norm": 0.38844305999655787, "learning_rate": 7.956319369853205e-06, "loss": 0.0028, "step": 4410 }, { "epoch": 0.28426886640458854, "grad_norm": 0.004082978805036018, "learning_rate": 7.95560329394916e-06, "loss": 0.0, "step": 4411 }, { "epoch": 0.28433331185151767, "grad_norm": 0.004148427370944359, "learning_rate": 7.954887218045114e-06, "loss": 0.0, "step": 4412 }, { "epoch": 0.28439775729844685, "grad_norm": 0.021951817193776223, "learning_rate": 7.954171142141068e-06, "loss": 0.0, "step": 4413 }, { "epoch": 0.28446220274537604, "grad_norm": 0.006462803659022571, "learning_rate": 7.95345506623702e-06, "loss": 0.0, "step": 4414 }, { "epoch": 0.2845266481923052, "grad_norm": 0.13909423337469742, "learning_rate": 7.952738990332975e-06, "loss": 0.0008, "step": 4415 }, { "epoch": 0.2845910936392344, "grad_norm": 0.11019298248775587, "learning_rate": 7.95202291442893e-06, "loss": 0.002, "step": 4416 }, { "epoch": 0.2846555390861636, "grad_norm": 0.015269646596399714, "learning_rate": 7.951306838524885e-06, "loss": 0.0, "step": 4417 }, { "epoch": 0.2847199845330927, "grad_norm": 0.021452810796437523, "learning_rate": 7.95059076262084e-06, "loss": 0.0001, "step": 4418 }, { "epoch": 0.2847844299800219, "grad_norm": 0.517738806776239, "learning_rate": 7.949874686716794e-06, "loss": 0.0012, "step": 4419 }, { "epoch": 0.2848488754269511, "grad_norm": 0.03322008169773669, "learning_rate": 7.949158610812746e-06, "loss": 0.0003, "step": 4420 }, { "epoch": 0.28491332087388027, "grad_norm": 0.009138953843116304, "learning_rate": 7.9484425349087e-06, "loss": 0.0, "step": 4421 }, { "epoch": 0.28497776632080946, "grad_norm": 0.47363823956398626, "learning_rate": 7.947726459004655e-06, "loss": 0.0039, "step": 4422 }, { "epoch": 0.2850422117677386, "grad_norm": 0.0020104430634621095, "learning_rate": 7.94701038310061e-06, "loss": 0.0, "step": 4423 }, { "epoch": 0.28510665721466777, "grad_norm": 0.053079706230507256, "learning_rate": 7.946294307196564e-06, "loss": 0.0002, "step": 4424 }, { "epoch": 0.28517110266159695, "grad_norm": 0.02456466714186916, "learning_rate": 7.945578231292518e-06, "loss": 0.0001, "step": 4425 }, { "epoch": 0.28523554810852614, "grad_norm": 0.0008166402254194522, "learning_rate": 7.944862155388472e-06, "loss": 0.0, "step": 4426 }, { "epoch": 0.2852999935554553, "grad_norm": 0.006199516264046085, "learning_rate": 7.944146079484426e-06, "loss": 0.0, "step": 4427 }, { "epoch": 0.2853644390023845, "grad_norm": 0.0008081691424822122, "learning_rate": 7.94343000358038e-06, "loss": 0.0, "step": 4428 }, { "epoch": 0.28542888444931364, "grad_norm": 0.0029845301345706203, "learning_rate": 7.942713927676335e-06, "loss": 0.0, "step": 4429 }, { "epoch": 0.2854933298962428, "grad_norm": 0.011622539974228662, "learning_rate": 7.941997851772288e-06, "loss": 0.0, "step": 4430 }, { "epoch": 0.285557775343172, "grad_norm": 0.00507248422770294, "learning_rate": 7.941281775868242e-06, "loss": 0.0, "step": 4431 }, { "epoch": 0.2856222207901012, "grad_norm": 0.004660809038102969, "learning_rate": 7.940565699964196e-06, "loss": 0.0, "step": 4432 }, { "epoch": 0.2856866662370304, "grad_norm": 0.00253746912322247, "learning_rate": 7.93984962406015e-06, "loss": 0.0, "step": 4433 }, { "epoch": 0.2857511116839595, "grad_norm": 0.061854504871958836, "learning_rate": 7.939133548156105e-06, "loss": 0.002, "step": 4434 }, { "epoch": 0.2858155571308887, "grad_norm": 0.008229746313761522, "learning_rate": 7.93841747225206e-06, "loss": 0.0, "step": 4435 }, { "epoch": 0.28588000257781787, "grad_norm": 0.003418590816872071, "learning_rate": 7.937701396348013e-06, "loss": 0.0, "step": 4436 }, { "epoch": 0.28594444802474706, "grad_norm": 0.4305533788055168, "learning_rate": 7.936985320443968e-06, "loss": 0.0026, "step": 4437 }, { "epoch": 0.28600889347167624, "grad_norm": 0.006124099355040623, "learning_rate": 7.936269244539922e-06, "loss": 0.0, "step": 4438 }, { "epoch": 0.2860733389186054, "grad_norm": 0.0031271294943862695, "learning_rate": 7.935553168635876e-06, "loss": 0.0, "step": 4439 }, { "epoch": 0.28613778436553455, "grad_norm": 0.01045592306874938, "learning_rate": 7.934837092731829e-06, "loss": 0.0001, "step": 4440 }, { "epoch": 0.28620222981246374, "grad_norm": 0.01239300328247604, "learning_rate": 7.934121016827785e-06, "loss": 0.0, "step": 4441 }, { "epoch": 0.2862666752593929, "grad_norm": 0.1479763660766948, "learning_rate": 7.93340494092374e-06, "loss": 0.0003, "step": 4442 }, { "epoch": 0.2863311207063221, "grad_norm": 0.002779436143763441, "learning_rate": 7.932688865019694e-06, "loss": 0.0, "step": 4443 }, { "epoch": 0.2863955661532513, "grad_norm": 0.009939447193650896, "learning_rate": 7.931972789115648e-06, "loss": 0.0, "step": 4444 }, { "epoch": 0.2864600116001804, "grad_norm": 0.03607640825679185, "learning_rate": 7.931256713211602e-06, "loss": 0.0001, "step": 4445 }, { "epoch": 0.2865244570471096, "grad_norm": 0.030201390696854517, "learning_rate": 7.930540637307555e-06, "loss": 0.0, "step": 4446 }, { "epoch": 0.2865889024940388, "grad_norm": 0.07210643736495957, "learning_rate": 7.929824561403509e-06, "loss": 0.0001, "step": 4447 }, { "epoch": 0.286653347940968, "grad_norm": 0.27512171988414574, "learning_rate": 7.929108485499463e-06, "loss": 0.0023, "step": 4448 }, { "epoch": 0.28671779338789716, "grad_norm": 0.007823387955755273, "learning_rate": 7.928392409595418e-06, "loss": 0.0, "step": 4449 }, { "epoch": 0.28678223883482634, "grad_norm": 0.14412453491842261, "learning_rate": 7.927676333691372e-06, "loss": 0.0003, "step": 4450 }, { "epoch": 0.28684668428175547, "grad_norm": 0.00010022605205979735, "learning_rate": 7.926960257787326e-06, "loss": 0.0, "step": 4451 }, { "epoch": 0.28691112972868466, "grad_norm": 0.00852258052113132, "learning_rate": 7.92624418188328e-06, "loss": 0.0, "step": 4452 }, { "epoch": 0.28697557517561384, "grad_norm": 0.005646042880733156, "learning_rate": 7.925528105979235e-06, "loss": 0.0, "step": 4453 }, { "epoch": 0.287040020622543, "grad_norm": 0.0008066836868218597, "learning_rate": 7.924812030075189e-06, "loss": 0.0, "step": 4454 }, { "epoch": 0.2871044660694722, "grad_norm": 0.001507509646450024, "learning_rate": 7.924095954171143e-06, "loss": 0.0, "step": 4455 }, { "epoch": 0.2871689115164014, "grad_norm": 0.002629905680913306, "learning_rate": 7.923379878267096e-06, "loss": 0.0, "step": 4456 }, { "epoch": 0.2872333569633305, "grad_norm": 0.0059772441425043265, "learning_rate": 7.92266380236305e-06, "loss": 0.0, "step": 4457 }, { "epoch": 0.2872978024102597, "grad_norm": 0.0352865641772406, "learning_rate": 7.921947726459005e-06, "loss": 0.0001, "step": 4458 }, { "epoch": 0.2873622478571889, "grad_norm": 0.0002449629222558241, "learning_rate": 7.921231650554959e-06, "loss": 0.0, "step": 4459 }, { "epoch": 0.2874266933041181, "grad_norm": 0.00020933698575602438, "learning_rate": 7.920515574650913e-06, "loss": 0.0, "step": 4460 }, { "epoch": 0.28749113875104726, "grad_norm": 0.0031949037566283696, "learning_rate": 7.919799498746868e-06, "loss": 0.0, "step": 4461 }, { "epoch": 0.2875555841979764, "grad_norm": 0.0065847528646073294, "learning_rate": 7.919083422842822e-06, "loss": 0.0, "step": 4462 }, { "epoch": 0.2876200296449056, "grad_norm": 0.007622588936978267, "learning_rate": 7.918367346938776e-06, "loss": 0.0001, "step": 4463 }, { "epoch": 0.28768447509183476, "grad_norm": 0.0017648578089107282, "learning_rate": 7.91765127103473e-06, "loss": 0.0, "step": 4464 }, { "epoch": 0.28774892053876394, "grad_norm": 0.02611680379581983, "learning_rate": 7.916935195130685e-06, "loss": 0.0001, "step": 4465 }, { "epoch": 0.2878133659856931, "grad_norm": 0.0010693996340695561, "learning_rate": 7.916219119226639e-06, "loss": 0.0, "step": 4466 }, { "epoch": 0.2878778114326223, "grad_norm": 0.05102518528801154, "learning_rate": 7.915503043322593e-06, "loss": 0.0004, "step": 4467 }, { "epoch": 0.28794225687955144, "grad_norm": 0.03612863993394471, "learning_rate": 7.914786967418548e-06, "loss": 0.0001, "step": 4468 }, { "epoch": 0.2880067023264806, "grad_norm": 0.017686609282882126, "learning_rate": 7.914070891514502e-06, "loss": 0.0001, "step": 4469 }, { "epoch": 0.2880711477734098, "grad_norm": 0.0008417679838710003, "learning_rate": 7.913354815610456e-06, "loss": 0.0, "step": 4470 }, { "epoch": 0.288135593220339, "grad_norm": 0.00041560685689716803, "learning_rate": 7.91263873970641e-06, "loss": 0.0, "step": 4471 }, { "epoch": 0.2882000386672682, "grad_norm": 0.18941918295187352, "learning_rate": 7.911922663802363e-06, "loss": 0.0004, "step": 4472 }, { "epoch": 0.2882644841141973, "grad_norm": 0.21404756925539772, "learning_rate": 7.911206587898317e-06, "loss": 0.0006, "step": 4473 }, { "epoch": 0.2883289295611265, "grad_norm": 0.013452269074163891, "learning_rate": 7.910490511994272e-06, "loss": 0.0, "step": 4474 }, { "epoch": 0.2883933750080557, "grad_norm": 0.09559103668822853, "learning_rate": 7.909774436090226e-06, "loss": 0.0004, "step": 4475 }, { "epoch": 0.28845782045498486, "grad_norm": 0.025086608219520978, "learning_rate": 7.90905836018618e-06, "loss": 0.0001, "step": 4476 }, { "epoch": 0.28852226590191404, "grad_norm": 0.006432836326093911, "learning_rate": 7.908342284282135e-06, "loss": 0.0, "step": 4477 }, { "epoch": 0.28858671134884323, "grad_norm": 0.2498886709881648, "learning_rate": 7.907626208378089e-06, "loss": 0.0004, "step": 4478 }, { "epoch": 0.28865115679577236, "grad_norm": 0.0026041772410369305, "learning_rate": 7.906910132474043e-06, "loss": 0.0, "step": 4479 }, { "epoch": 0.28871560224270154, "grad_norm": 0.009591682875869855, "learning_rate": 7.906194056569998e-06, "loss": 0.0, "step": 4480 }, { "epoch": 0.2887800476896307, "grad_norm": 0.00032052997232582793, "learning_rate": 7.90547798066595e-06, "loss": 0.0, "step": 4481 }, { "epoch": 0.2888444931365599, "grad_norm": 0.00873159164063012, "learning_rate": 7.904761904761904e-06, "loss": 0.0, "step": 4482 }, { "epoch": 0.2889089385834891, "grad_norm": 0.35534598777334087, "learning_rate": 7.904045828857859e-06, "loss": 0.0054, "step": 4483 }, { "epoch": 0.2889733840304182, "grad_norm": 0.002808476140444069, "learning_rate": 7.903329752953813e-06, "loss": 0.0, "step": 4484 }, { "epoch": 0.2890378294773474, "grad_norm": 0.007032176873919268, "learning_rate": 7.902613677049767e-06, "loss": 0.0, "step": 4485 }, { "epoch": 0.2891022749242766, "grad_norm": 0.0008951899821003473, "learning_rate": 7.901897601145722e-06, "loss": 0.0, "step": 4486 }, { "epoch": 0.2891667203712058, "grad_norm": 0.07280670081014678, "learning_rate": 7.901181525241678e-06, "loss": 0.0017, "step": 4487 }, { "epoch": 0.28923116581813496, "grad_norm": 0.31948080480773533, "learning_rate": 7.90046544933763e-06, "loss": 0.0009, "step": 4488 }, { "epoch": 0.28929561126506415, "grad_norm": 0.0005501602348737609, "learning_rate": 7.899749373433585e-06, "loss": 0.0, "step": 4489 }, { "epoch": 0.2893600567119933, "grad_norm": 0.018795969206918124, "learning_rate": 7.899033297529539e-06, "loss": 0.0, "step": 4490 }, { "epoch": 0.28942450215892246, "grad_norm": 0.0006946576011490777, "learning_rate": 7.898317221625493e-06, "loss": 0.0, "step": 4491 }, { "epoch": 0.28948894760585164, "grad_norm": 0.00017523844631172394, "learning_rate": 7.897601145721447e-06, "loss": 0.0, "step": 4492 }, { "epoch": 0.2895533930527808, "grad_norm": 0.00040692935676826147, "learning_rate": 7.896885069817402e-06, "loss": 0.0, "step": 4493 }, { "epoch": 0.28961783849971, "grad_norm": 0.002279674125137819, "learning_rate": 7.896168993913356e-06, "loss": 0.0, "step": 4494 }, { "epoch": 0.2896822839466392, "grad_norm": 0.2672872842209903, "learning_rate": 7.89545291800931e-06, "loss": 0.0027, "step": 4495 }, { "epoch": 0.2897467293935683, "grad_norm": 0.006540003647810904, "learning_rate": 7.894736842105265e-06, "loss": 0.0, "step": 4496 }, { "epoch": 0.2898111748404975, "grad_norm": 0.015930352995606616, "learning_rate": 7.894020766201217e-06, "loss": 0.0, "step": 4497 }, { "epoch": 0.2898756202874267, "grad_norm": 0.004882923326128659, "learning_rate": 7.893304690297172e-06, "loss": 0.0, "step": 4498 }, { "epoch": 0.2899400657343559, "grad_norm": 0.31889832402219837, "learning_rate": 7.892588614393126e-06, "loss": 0.0005, "step": 4499 }, { "epoch": 0.29000451118128506, "grad_norm": 0.0013620853215409057, "learning_rate": 7.89187253848908e-06, "loss": 0.0, "step": 4500 }, { "epoch": 0.2900689566282142, "grad_norm": 0.015338372594140916, "learning_rate": 7.891156462585034e-06, "loss": 0.0, "step": 4501 }, { "epoch": 0.2901334020751434, "grad_norm": 0.05778266089969748, "learning_rate": 7.890440386680989e-06, "loss": 0.0001, "step": 4502 }, { "epoch": 0.29019784752207256, "grad_norm": 0.00270977776113452, "learning_rate": 7.889724310776943e-06, "loss": 0.0, "step": 4503 }, { "epoch": 0.29026229296900175, "grad_norm": 0.0037655202102685075, "learning_rate": 7.889008234872897e-06, "loss": 0.0, "step": 4504 }, { "epoch": 0.29032673841593093, "grad_norm": 0.015389182378204158, "learning_rate": 7.888292158968852e-06, "loss": 0.0001, "step": 4505 }, { "epoch": 0.2903911838628601, "grad_norm": 0.015059754222407136, "learning_rate": 7.887576083064806e-06, "loss": 0.0, "step": 4506 }, { "epoch": 0.29045562930978924, "grad_norm": 0.03073885991750051, "learning_rate": 7.886860007160759e-06, "loss": 0.0, "step": 4507 }, { "epoch": 0.2905200747567184, "grad_norm": 0.015867032314513176, "learning_rate": 7.886143931256713e-06, "loss": 0.0001, "step": 4508 }, { "epoch": 0.2905845202036476, "grad_norm": 0.00045300063330874063, "learning_rate": 7.885427855352667e-06, "loss": 0.0, "step": 4509 }, { "epoch": 0.2906489656505768, "grad_norm": 0.0004942003374035947, "learning_rate": 7.884711779448621e-06, "loss": 0.0, "step": 4510 }, { "epoch": 0.290713411097506, "grad_norm": 0.0012337128738821176, "learning_rate": 7.883995703544577e-06, "loss": 0.0, "step": 4511 }, { "epoch": 0.2907778565444351, "grad_norm": 0.0008876628842310143, "learning_rate": 7.883279627640532e-06, "loss": 0.0, "step": 4512 }, { "epoch": 0.2908423019913643, "grad_norm": 0.007350333417431606, "learning_rate": 7.882563551736484e-06, "loss": 0.0, "step": 4513 }, { "epoch": 0.2909067474382935, "grad_norm": 0.0002106284948672585, "learning_rate": 7.881847475832439e-06, "loss": 0.0, "step": 4514 }, { "epoch": 0.29097119288522266, "grad_norm": 0.00035708859633882503, "learning_rate": 7.881131399928393e-06, "loss": 0.0, "step": 4515 }, { "epoch": 0.29103563833215185, "grad_norm": 0.029065303728135556, "learning_rate": 7.880415324024347e-06, "loss": 0.0, "step": 4516 }, { "epoch": 0.29110008377908103, "grad_norm": 0.2127117840682844, "learning_rate": 7.879699248120301e-06, "loss": 0.001, "step": 4517 }, { "epoch": 0.29116452922601016, "grad_norm": 0.0006066203387944084, "learning_rate": 7.878983172216256e-06, "loss": 0.0, "step": 4518 }, { "epoch": 0.29122897467293934, "grad_norm": 0.018112009074947217, "learning_rate": 7.87826709631221e-06, "loss": 0.0, "step": 4519 }, { "epoch": 0.29129342011986853, "grad_norm": 0.002583304637280309, "learning_rate": 7.877551020408164e-06, "loss": 0.0, "step": 4520 }, { "epoch": 0.2913578655667977, "grad_norm": 0.266955702883381, "learning_rate": 7.876834944504119e-06, "loss": 0.0019, "step": 4521 }, { "epoch": 0.2914223110137269, "grad_norm": 0.018432121396502184, "learning_rate": 7.876118868600073e-06, "loss": 0.0, "step": 4522 }, { "epoch": 0.2914867564606561, "grad_norm": 0.03903561799608369, "learning_rate": 7.875402792696026e-06, "loss": 0.0003, "step": 4523 }, { "epoch": 0.2915512019075852, "grad_norm": 0.012237118359079405, "learning_rate": 7.87468671679198e-06, "loss": 0.0, "step": 4524 }, { "epoch": 0.2916156473545144, "grad_norm": 0.0033078222190907323, "learning_rate": 7.873970640887934e-06, "loss": 0.0, "step": 4525 }, { "epoch": 0.2916800928014436, "grad_norm": 0.003472865748525989, "learning_rate": 7.873254564983888e-06, "loss": 0.0, "step": 4526 }, { "epoch": 0.29174453824837276, "grad_norm": 0.0013939818695185597, "learning_rate": 7.872538489079843e-06, "loss": 0.0, "step": 4527 }, { "epoch": 0.29180898369530195, "grad_norm": 0.00018821632106576328, "learning_rate": 7.871822413175797e-06, "loss": 0.0, "step": 4528 }, { "epoch": 0.2918734291422311, "grad_norm": 0.00306179393129678, "learning_rate": 7.871106337271751e-06, "loss": 0.0, "step": 4529 }, { "epoch": 0.29193787458916026, "grad_norm": 0.0007617039287525343, "learning_rate": 7.870390261367706e-06, "loss": 0.0, "step": 4530 }, { "epoch": 0.29200232003608945, "grad_norm": 1.2425142734099195, "learning_rate": 7.86967418546366e-06, "loss": 0.0022, "step": 4531 }, { "epoch": 0.29206676548301863, "grad_norm": 0.0022768657242729392, "learning_rate": 7.868958109559614e-06, "loss": 0.0, "step": 4532 }, { "epoch": 0.2921312109299478, "grad_norm": 0.00020864275355323165, "learning_rate": 7.868242033655567e-06, "loss": 0.0, "step": 4533 }, { "epoch": 0.292195656376877, "grad_norm": 0.000213358795277892, "learning_rate": 7.867525957751523e-06, "loss": 0.0, "step": 4534 }, { "epoch": 0.29226010182380613, "grad_norm": 0.00015255038864135203, "learning_rate": 7.866809881847477e-06, "loss": 0.0, "step": 4535 }, { "epoch": 0.2923245472707353, "grad_norm": 0.00015455752183585395, "learning_rate": 7.866093805943431e-06, "loss": 0.0, "step": 4536 }, { "epoch": 0.2923889927176645, "grad_norm": 0.12145031071190686, "learning_rate": 7.865377730039386e-06, "loss": 0.0001, "step": 4537 }, { "epoch": 0.2924534381645937, "grad_norm": 0.031089749014432462, "learning_rate": 7.86466165413534e-06, "loss": 0.0002, "step": 4538 }, { "epoch": 0.29251788361152287, "grad_norm": 0.3512590264114246, "learning_rate": 7.863945578231293e-06, "loss": 0.0025, "step": 4539 }, { "epoch": 0.292582329058452, "grad_norm": 0.02734669707799407, "learning_rate": 7.863229502327247e-06, "loss": 0.0001, "step": 4540 }, { "epoch": 0.2926467745053812, "grad_norm": 0.0010073198792487593, "learning_rate": 7.862513426423201e-06, "loss": 0.0, "step": 4541 }, { "epoch": 0.29271121995231036, "grad_norm": 0.00019868880896533006, "learning_rate": 7.861797350519156e-06, "loss": 0.0, "step": 4542 }, { "epoch": 0.29277566539923955, "grad_norm": 0.0006258420716790086, "learning_rate": 7.86108127461511e-06, "loss": 0.0, "step": 4543 }, { "epoch": 0.29284011084616873, "grad_norm": 0.0005874389946818668, "learning_rate": 7.860365198711064e-06, "loss": 0.0, "step": 4544 }, { "epoch": 0.2929045562930979, "grad_norm": 0.003848245562625219, "learning_rate": 7.859649122807018e-06, "loss": 0.0, "step": 4545 }, { "epoch": 0.29296900174002705, "grad_norm": 0.0007591827858018054, "learning_rate": 7.858933046902973e-06, "loss": 0.0, "step": 4546 }, { "epoch": 0.29303344718695623, "grad_norm": 0.0027661824546528937, "learning_rate": 7.858216970998927e-06, "loss": 0.0, "step": 4547 }, { "epoch": 0.2930978926338854, "grad_norm": 0.004813073623290894, "learning_rate": 7.857500895094881e-06, "loss": 0.0, "step": 4548 }, { "epoch": 0.2931623380808146, "grad_norm": 0.7728116364248644, "learning_rate": 7.856784819190834e-06, "loss": 0.0034, "step": 4549 }, { "epoch": 0.2932267835277438, "grad_norm": 0.014514382210677952, "learning_rate": 7.856068743286788e-06, "loss": 0.0001, "step": 4550 }, { "epoch": 0.2932912289746729, "grad_norm": 0.18422850996311158, "learning_rate": 7.855352667382743e-06, "loss": 0.0017, "step": 4551 }, { "epoch": 0.2933556744216021, "grad_norm": 0.004560710887887605, "learning_rate": 7.854636591478697e-06, "loss": 0.0, "step": 4552 }, { "epoch": 0.2934201198685313, "grad_norm": 0.03367021315660142, "learning_rate": 7.853920515574651e-06, "loss": 0.0001, "step": 4553 }, { "epoch": 0.29348456531546047, "grad_norm": 0.0755510030149236, "learning_rate": 7.853204439670605e-06, "loss": 0.0008, "step": 4554 }, { "epoch": 0.29354901076238965, "grad_norm": 0.27525725415499364, "learning_rate": 7.85248836376656e-06, "loss": 0.0008, "step": 4555 }, { "epoch": 0.29361345620931883, "grad_norm": 0.019564531699342042, "learning_rate": 7.851772287862514e-06, "loss": 0.0, "step": 4556 }, { "epoch": 0.29367790165624796, "grad_norm": 0.008114938435770352, "learning_rate": 7.851056211958468e-06, "loss": 0.0, "step": 4557 }, { "epoch": 0.29374234710317715, "grad_norm": 0.3528800646587041, "learning_rate": 7.850340136054423e-06, "loss": 0.0002, "step": 4558 }, { "epoch": 0.29380679255010633, "grad_norm": 0.0006609426373109618, "learning_rate": 7.849624060150377e-06, "loss": 0.0, "step": 4559 }, { "epoch": 0.2938712379970355, "grad_norm": 0.18983152295207498, "learning_rate": 7.848907984246331e-06, "loss": 0.0019, "step": 4560 }, { "epoch": 0.2939356834439647, "grad_norm": 0.051502671807416966, "learning_rate": 7.848191908342286e-06, "loss": 0.0001, "step": 4561 }, { "epoch": 0.2940001288908939, "grad_norm": 0.004743945694445782, "learning_rate": 7.84747583243824e-06, "loss": 0.0, "step": 4562 }, { "epoch": 0.294064574337823, "grad_norm": 0.3051454696860592, "learning_rate": 7.846759756534194e-06, "loss": 0.0007, "step": 4563 }, { "epoch": 0.2941290197847522, "grad_norm": 0.0005357593062468779, "learning_rate": 7.846043680630148e-06, "loss": 0.0, "step": 4564 }, { "epoch": 0.2941934652316814, "grad_norm": 0.0004975399317757935, "learning_rate": 7.845327604726101e-06, "loss": 0.0, "step": 4565 }, { "epoch": 0.29425791067861057, "grad_norm": 0.002784029827135192, "learning_rate": 7.844611528822055e-06, "loss": 0.0, "step": 4566 }, { "epoch": 0.29432235612553975, "grad_norm": 0.00492558169817448, "learning_rate": 7.84389545291801e-06, "loss": 0.0, "step": 4567 }, { "epoch": 0.2943868015724689, "grad_norm": 0.0001998417877650629, "learning_rate": 7.843179377013964e-06, "loss": 0.0, "step": 4568 }, { "epoch": 0.29445124701939807, "grad_norm": 0.01305385781332726, "learning_rate": 7.842463301109918e-06, "loss": 0.0001, "step": 4569 }, { "epoch": 0.29451569246632725, "grad_norm": 0.010528942460724023, "learning_rate": 7.841747225205873e-06, "loss": 0.0001, "step": 4570 }, { "epoch": 0.29458013791325643, "grad_norm": 0.00020059010498940666, "learning_rate": 7.841031149301827e-06, "loss": 0.0, "step": 4571 }, { "epoch": 0.2946445833601856, "grad_norm": 0.0032149860570045243, "learning_rate": 7.840315073397781e-06, "loss": 0.0, "step": 4572 }, { "epoch": 0.2947090288071148, "grad_norm": 0.71958861976571, "learning_rate": 7.839598997493735e-06, "loss": 0.0035, "step": 4573 }, { "epoch": 0.29477347425404393, "grad_norm": 3.0767340446139673, "learning_rate": 7.838882921589688e-06, "loss": 0.025, "step": 4574 }, { "epoch": 0.2948379197009731, "grad_norm": 0.001674350790710976, "learning_rate": 7.838166845685642e-06, "loss": 0.0, "step": 4575 }, { "epoch": 0.2949023651479023, "grad_norm": 0.012657985235324564, "learning_rate": 7.837450769781597e-06, "loss": 0.0001, "step": 4576 }, { "epoch": 0.2949668105948315, "grad_norm": 0.005560305992674056, "learning_rate": 7.836734693877551e-06, "loss": 0.0, "step": 4577 }, { "epoch": 0.29503125604176067, "grad_norm": 0.039299774754964066, "learning_rate": 7.836018617973505e-06, "loss": 0.0001, "step": 4578 }, { "epoch": 0.2950957014886898, "grad_norm": 0.00030549551142350253, "learning_rate": 7.83530254206946e-06, "loss": 0.0, "step": 4579 }, { "epoch": 0.295160146935619, "grad_norm": 0.004564936822918925, "learning_rate": 7.834586466165414e-06, "loss": 0.0, "step": 4580 }, { "epoch": 0.29522459238254817, "grad_norm": 0.001231723360405645, "learning_rate": 7.833870390261368e-06, "loss": 0.0, "step": 4581 }, { "epoch": 0.29528903782947735, "grad_norm": 0.01125057962057763, "learning_rate": 7.833154314357322e-06, "loss": 0.0, "step": 4582 }, { "epoch": 0.29535348327640654, "grad_norm": 0.025788512570669428, "learning_rate": 7.832438238453277e-06, "loss": 0.0, "step": 4583 }, { "epoch": 0.2954179287233357, "grad_norm": 0.03530924012953212, "learning_rate": 7.831722162549231e-06, "loss": 0.0, "step": 4584 }, { "epoch": 0.29548237417026485, "grad_norm": 0.03815530609659034, "learning_rate": 7.831006086645185e-06, "loss": 0.0005, "step": 4585 }, { "epoch": 0.29554681961719403, "grad_norm": 0.7514677785081176, "learning_rate": 7.83029001074114e-06, "loss": 0.0039, "step": 4586 }, { "epoch": 0.2956112650641232, "grad_norm": 0.009560576602998675, "learning_rate": 7.829573934837094e-06, "loss": 0.0001, "step": 4587 }, { "epoch": 0.2956757105110524, "grad_norm": 0.032842624976011536, "learning_rate": 7.828857858933048e-06, "loss": 0.0, "step": 4588 }, { "epoch": 0.2957401559579816, "grad_norm": 0.031006444152063366, "learning_rate": 7.828141783029003e-06, "loss": 0.0017, "step": 4589 }, { "epoch": 0.2958046014049107, "grad_norm": 0.24915392579693496, "learning_rate": 7.827425707124955e-06, "loss": 0.0007, "step": 4590 }, { "epoch": 0.2958690468518399, "grad_norm": 0.03304664469135916, "learning_rate": 7.82670963122091e-06, "loss": 0.0, "step": 4591 }, { "epoch": 0.2959334922987691, "grad_norm": 0.05655894885454582, "learning_rate": 7.825993555316864e-06, "loss": 0.0001, "step": 4592 }, { "epoch": 0.29599793774569827, "grad_norm": 0.38388087603718396, "learning_rate": 7.825277479412818e-06, "loss": 0.0011, "step": 4593 }, { "epoch": 0.29606238319262745, "grad_norm": 0.566841116400412, "learning_rate": 7.824561403508772e-06, "loss": 0.0035, "step": 4594 }, { "epoch": 0.29612682863955664, "grad_norm": 0.011698133493121187, "learning_rate": 7.823845327604727e-06, "loss": 0.0001, "step": 4595 }, { "epoch": 0.29619127408648577, "grad_norm": 0.027770648157643973, "learning_rate": 7.823129251700681e-06, "loss": 0.0, "step": 4596 }, { "epoch": 0.29625571953341495, "grad_norm": 2.278856981939525, "learning_rate": 7.822413175796635e-06, "loss": 0.0001, "step": 4597 }, { "epoch": 0.29632016498034414, "grad_norm": 0.0020159815640930436, "learning_rate": 7.82169709989259e-06, "loss": 0.0, "step": 4598 }, { "epoch": 0.2963846104272733, "grad_norm": 0.14718794658617437, "learning_rate": 7.820981023988544e-06, "loss": 0.0008, "step": 4599 }, { "epoch": 0.2964490558742025, "grad_norm": 0.0021516636260371985, "learning_rate": 7.820264948084496e-06, "loss": 0.0, "step": 4600 }, { "epoch": 0.2965135013211317, "grad_norm": 0.010642395207132137, "learning_rate": 7.81954887218045e-06, "loss": 0.0, "step": 4601 }, { "epoch": 0.2965779467680608, "grad_norm": 0.04448229876604851, "learning_rate": 7.818832796276405e-06, "loss": 0.0, "step": 4602 }, { "epoch": 0.29664239221499, "grad_norm": 0.0033751772275503853, "learning_rate": 7.81811672037236e-06, "loss": 0.0, "step": 4603 }, { "epoch": 0.2967068376619192, "grad_norm": 0.0037728265986638255, "learning_rate": 7.817400644468315e-06, "loss": 0.0, "step": 4604 }, { "epoch": 0.29677128310884837, "grad_norm": 0.0027119067769277696, "learning_rate": 7.81668456856427e-06, "loss": 0.0, "step": 4605 }, { "epoch": 0.29683572855577756, "grad_norm": 0.06407639553398353, "learning_rate": 7.815968492660222e-06, "loss": 0.0002, "step": 4606 }, { "epoch": 0.2969001740027067, "grad_norm": 0.03252517104288065, "learning_rate": 7.815252416756177e-06, "loss": 0.0001, "step": 4607 }, { "epoch": 0.29696461944963587, "grad_norm": 0.04116721890821116, "learning_rate": 7.81453634085213e-06, "loss": 0.0003, "step": 4608 }, { "epoch": 0.29702906489656505, "grad_norm": 0.0015415324517583455, "learning_rate": 7.813820264948085e-06, "loss": 0.0, "step": 4609 }, { "epoch": 0.29709351034349424, "grad_norm": 0.08608630962120033, "learning_rate": 7.81310418904404e-06, "loss": 0.0002, "step": 4610 }, { "epoch": 0.2971579557904234, "grad_norm": 0.00012577019306394426, "learning_rate": 7.812388113139994e-06, "loss": 0.0, "step": 4611 }, { "epoch": 0.2972224012373526, "grad_norm": 0.006028051862400764, "learning_rate": 7.811672037235948e-06, "loss": 0.0001, "step": 4612 }, { "epoch": 0.29728684668428174, "grad_norm": 0.10264788711136673, "learning_rate": 7.810955961331902e-06, "loss": 0.0004, "step": 4613 }, { "epoch": 0.2973512921312109, "grad_norm": 0.037061054366988276, "learning_rate": 7.810239885427857e-06, "loss": 0.0, "step": 4614 }, { "epoch": 0.2974157375781401, "grad_norm": 0.0002439607188696404, "learning_rate": 7.809523809523811e-06, "loss": 0.0, "step": 4615 }, { "epoch": 0.2974801830250693, "grad_norm": 0.014972837410916137, "learning_rate": 7.808807733619764e-06, "loss": 0.0, "step": 4616 }, { "epoch": 0.2975446284719985, "grad_norm": 0.0008584027435497128, "learning_rate": 7.808091657715718e-06, "loss": 0.0, "step": 4617 }, { "epoch": 0.2976090739189276, "grad_norm": 0.0006271920321097305, "learning_rate": 7.807375581811672e-06, "loss": 0.0, "step": 4618 }, { "epoch": 0.2976735193658568, "grad_norm": 0.005431255360080073, "learning_rate": 7.806659505907626e-06, "loss": 0.0, "step": 4619 }, { "epoch": 0.29773796481278597, "grad_norm": 0.0010751240437544664, "learning_rate": 7.80594343000358e-06, "loss": 0.0, "step": 4620 }, { "epoch": 0.29780241025971516, "grad_norm": 0.3879795137802696, "learning_rate": 7.805227354099535e-06, "loss": 0.0039, "step": 4621 }, { "epoch": 0.29786685570664434, "grad_norm": 0.0020645484028067747, "learning_rate": 7.80451127819549e-06, "loss": 0.0, "step": 4622 }, { "epoch": 0.2979313011535735, "grad_norm": 0.0010723496273858072, "learning_rate": 7.803795202291444e-06, "loss": 0.0, "step": 4623 }, { "epoch": 0.29799574660050265, "grad_norm": 0.0027411407806028714, "learning_rate": 7.803079126387398e-06, "loss": 0.0, "step": 4624 }, { "epoch": 0.29806019204743184, "grad_norm": 0.00014802318816350044, "learning_rate": 7.802363050483352e-06, "loss": 0.0, "step": 4625 }, { "epoch": 0.298124637494361, "grad_norm": 0.031220465912154896, "learning_rate": 7.801646974579305e-06, "loss": 0.0002, "step": 4626 }, { "epoch": 0.2981890829412902, "grad_norm": 0.002613055270924557, "learning_rate": 7.80093089867526e-06, "loss": 0.0, "step": 4627 }, { "epoch": 0.2982535283882194, "grad_norm": 0.002614199435021259, "learning_rate": 7.800214822771215e-06, "loss": 0.0, "step": 4628 }, { "epoch": 0.2983179738351485, "grad_norm": 0.0026466108391232086, "learning_rate": 7.79949874686717e-06, "loss": 0.0, "step": 4629 }, { "epoch": 0.2983824192820777, "grad_norm": 0.0011872838771710653, "learning_rate": 7.798782670963124e-06, "loss": 0.0, "step": 4630 }, { "epoch": 0.2984468647290069, "grad_norm": 0.46395645085834897, "learning_rate": 7.798066595059078e-06, "loss": 0.0014, "step": 4631 }, { "epoch": 0.2985113101759361, "grad_norm": 0.0013353272721039187, "learning_rate": 7.79735051915503e-06, "loss": 0.0, "step": 4632 }, { "epoch": 0.29857575562286526, "grad_norm": 0.0017705315561493894, "learning_rate": 7.796634443250985e-06, "loss": 0.0, "step": 4633 }, { "epoch": 0.29864020106979444, "grad_norm": 0.007422942160128908, "learning_rate": 7.79591836734694e-06, "loss": 0.0001, "step": 4634 }, { "epoch": 0.29870464651672357, "grad_norm": 0.0017911138589993113, "learning_rate": 7.795202291442893e-06, "loss": 0.0, "step": 4635 }, { "epoch": 0.29876909196365276, "grad_norm": 0.023585737352795046, "learning_rate": 7.794486215538848e-06, "loss": 0.0002, "step": 4636 }, { "epoch": 0.29883353741058194, "grad_norm": 0.0016310814255533401, "learning_rate": 7.793770139634802e-06, "loss": 0.0, "step": 4637 }, { "epoch": 0.2988979828575111, "grad_norm": 0.10732582740168191, "learning_rate": 7.793054063730756e-06, "loss": 0.0011, "step": 4638 }, { "epoch": 0.2989624283044403, "grad_norm": 0.15895521683864702, "learning_rate": 7.79233798782671e-06, "loss": 0.0006, "step": 4639 }, { "epoch": 0.2990268737513695, "grad_norm": 0.00880201531000441, "learning_rate": 7.791621911922665e-06, "loss": 0.0, "step": 4640 }, { "epoch": 0.2990913191982986, "grad_norm": 0.003968743597232801, "learning_rate": 7.79090583601862e-06, "loss": 0.0, "step": 4641 }, { "epoch": 0.2991557646452278, "grad_norm": 0.001546892372756122, "learning_rate": 7.790189760114572e-06, "loss": 0.0, "step": 4642 }, { "epoch": 0.299220210092157, "grad_norm": 0.003255836015276672, "learning_rate": 7.789473684210526e-06, "loss": 0.0, "step": 4643 }, { "epoch": 0.2992846555390862, "grad_norm": 0.004197248189523486, "learning_rate": 7.78875760830648e-06, "loss": 0.0, "step": 4644 }, { "epoch": 0.29934910098601536, "grad_norm": 0.0008416042110688216, "learning_rate": 7.788041532402435e-06, "loss": 0.0, "step": 4645 }, { "epoch": 0.2994135464329445, "grad_norm": 0.0043030888462125645, "learning_rate": 7.787325456498389e-06, "loss": 0.0, "step": 4646 }, { "epoch": 0.2994779918798737, "grad_norm": 0.033809603114627454, "learning_rate": 7.786609380594343e-06, "loss": 0.0002, "step": 4647 }, { "epoch": 0.29954243732680286, "grad_norm": 0.0003606526883201009, "learning_rate": 7.785893304690298e-06, "loss": 0.0, "step": 4648 }, { "epoch": 0.29960688277373204, "grad_norm": 0.014667166220949858, "learning_rate": 7.785177228786252e-06, "loss": 0.0, "step": 4649 }, { "epoch": 0.2996713282206612, "grad_norm": 0.0005881952328653598, "learning_rate": 7.784461152882206e-06, "loss": 0.0, "step": 4650 }, { "epoch": 0.2997357736675904, "grad_norm": 0.0008412704886720019, "learning_rate": 7.78374507697816e-06, "loss": 0.0, "step": 4651 }, { "epoch": 0.29980021911451954, "grad_norm": 0.04959488604565864, "learning_rate": 7.783029001074115e-06, "loss": 0.0002, "step": 4652 }, { "epoch": 0.2998646645614487, "grad_norm": 0.002363653848485576, "learning_rate": 7.78231292517007e-06, "loss": 0.0, "step": 4653 }, { "epoch": 0.2999291100083779, "grad_norm": 0.00021418820248923426, "learning_rate": 7.781596849266023e-06, "loss": 0.0, "step": 4654 }, { "epoch": 0.2999935554553071, "grad_norm": 0.0019327509427194745, "learning_rate": 7.780880773361978e-06, "loss": 0.0, "step": 4655 }, { "epoch": 0.3000580009022363, "grad_norm": 0.004825567350965219, "learning_rate": 7.780164697457932e-06, "loss": 0.0, "step": 4656 }, { "epoch": 0.3001224463491654, "grad_norm": 0.02280787046614858, "learning_rate": 7.779448621553886e-06, "loss": 0.0001, "step": 4657 }, { "epoch": 0.3001868917960946, "grad_norm": 0.000839246267523498, "learning_rate": 7.778732545649839e-06, "loss": 0.0, "step": 4658 }, { "epoch": 0.3002513372430238, "grad_norm": 7.830966965893415e-05, "learning_rate": 7.778016469745793e-06, "loss": 0.0, "step": 4659 }, { "epoch": 0.30031578268995296, "grad_norm": 0.27347772847300145, "learning_rate": 7.777300393841748e-06, "loss": 0.002, "step": 4660 }, { "epoch": 0.30038022813688214, "grad_norm": 0.004087371353731553, "learning_rate": 7.776584317937702e-06, "loss": 0.0, "step": 4661 }, { "epoch": 0.30044467358381133, "grad_norm": 0.0006904881544805334, "learning_rate": 7.775868242033656e-06, "loss": 0.0, "step": 4662 }, { "epoch": 0.30050911903074046, "grad_norm": 0.005781394579225731, "learning_rate": 7.77515216612961e-06, "loss": 0.0001, "step": 4663 }, { "epoch": 0.30057356447766964, "grad_norm": 0.10011603023948211, "learning_rate": 7.774436090225565e-06, "loss": 0.0017, "step": 4664 }, { "epoch": 0.3006380099245988, "grad_norm": 0.0003700707387533223, "learning_rate": 7.773720014321519e-06, "loss": 0.0, "step": 4665 }, { "epoch": 0.300702455371528, "grad_norm": 0.0036266337136206684, "learning_rate": 7.773003938417473e-06, "loss": 0.0, "step": 4666 }, { "epoch": 0.3007669008184572, "grad_norm": 0.006053707020341861, "learning_rate": 7.772287862513428e-06, "loss": 0.0, "step": 4667 }, { "epoch": 0.3008313462653863, "grad_norm": 0.002798110998151991, "learning_rate": 7.77157178660938e-06, "loss": 0.0, "step": 4668 }, { "epoch": 0.3008957917123155, "grad_norm": 0.002980137785368464, "learning_rate": 7.770855710705335e-06, "loss": 0.0, "step": 4669 }, { "epoch": 0.3009602371592447, "grad_norm": 0.002200906356600856, "learning_rate": 7.770139634801289e-06, "loss": 0.0, "step": 4670 }, { "epoch": 0.3010246826061739, "grad_norm": 0.3781393095729475, "learning_rate": 7.769423558897243e-06, "loss": 0.0006, "step": 4671 }, { "epoch": 0.30108912805310306, "grad_norm": 0.012304835706529734, "learning_rate": 7.768707482993197e-06, "loss": 0.0, "step": 4672 }, { "epoch": 0.30115357350003225, "grad_norm": 0.030847193216389755, "learning_rate": 7.767991407089152e-06, "loss": 0.0, "step": 4673 }, { "epoch": 0.3012180189469614, "grad_norm": 0.0013446218988361794, "learning_rate": 7.767275331185106e-06, "loss": 0.0, "step": 4674 }, { "epoch": 0.30128246439389056, "grad_norm": 0.3333385313145346, "learning_rate": 7.76655925528106e-06, "loss": 0.0004, "step": 4675 }, { "epoch": 0.30134690984081974, "grad_norm": 0.05260483629009791, "learning_rate": 7.765843179377015e-06, "loss": 0.0001, "step": 4676 }, { "epoch": 0.3014113552877489, "grad_norm": 0.012214662431553574, "learning_rate": 7.765127103472969e-06, "loss": 0.0, "step": 4677 }, { "epoch": 0.3014758007346781, "grad_norm": 0.00023572483317842684, "learning_rate": 7.764411027568923e-06, "loss": 0.0, "step": 4678 }, { "epoch": 0.3015402461816073, "grad_norm": 0.15849859996043447, "learning_rate": 7.763694951664878e-06, "loss": 0.0002, "step": 4679 }, { "epoch": 0.3016046916285364, "grad_norm": 0.028688948102707078, "learning_rate": 7.762978875760832e-06, "loss": 0.0001, "step": 4680 }, { "epoch": 0.3016691370754656, "grad_norm": 0.2802043258044797, "learning_rate": 7.762262799856786e-06, "loss": 0.0006, "step": 4681 }, { "epoch": 0.3017335825223948, "grad_norm": 0.01589553270615149, "learning_rate": 7.76154672395274e-06, "loss": 0.0002, "step": 4682 }, { "epoch": 0.301798027969324, "grad_norm": 0.005081411054152497, "learning_rate": 7.760830648048695e-06, "loss": 0.0, "step": 4683 }, { "epoch": 0.30186247341625316, "grad_norm": 0.05606253532474472, "learning_rate": 7.760114572144647e-06, "loss": 0.0001, "step": 4684 }, { "epoch": 0.3019269188631823, "grad_norm": 0.01129783697631848, "learning_rate": 7.759398496240602e-06, "loss": 0.0001, "step": 4685 }, { "epoch": 0.3019913643101115, "grad_norm": 0.0004596890283011299, "learning_rate": 7.758682420336556e-06, "loss": 0.0, "step": 4686 }, { "epoch": 0.30205580975704066, "grad_norm": 0.000857765967735999, "learning_rate": 7.75796634443251e-06, "loss": 0.0, "step": 4687 }, { "epoch": 0.30212025520396985, "grad_norm": 0.0026360514695217855, "learning_rate": 7.757250268528465e-06, "loss": 0.0, "step": 4688 }, { "epoch": 0.30218470065089903, "grad_norm": 0.019969502601820874, "learning_rate": 7.756534192624419e-06, "loss": 0.0, "step": 4689 }, { "epoch": 0.3022491460978282, "grad_norm": 0.009782519107099928, "learning_rate": 7.755818116720373e-06, "loss": 0.0, "step": 4690 }, { "epoch": 0.30231359154475734, "grad_norm": 0.001139921838035331, "learning_rate": 7.755102040816327e-06, "loss": 0.0, "step": 4691 }, { "epoch": 0.3023780369916865, "grad_norm": 0.010498980062060898, "learning_rate": 7.754385964912282e-06, "loss": 0.0, "step": 4692 }, { "epoch": 0.3024424824386157, "grad_norm": 0.209275470748402, "learning_rate": 7.753669889008234e-06, "loss": 0.0008, "step": 4693 }, { "epoch": 0.3025069278855449, "grad_norm": 0.006151414616092774, "learning_rate": 7.752953813104189e-06, "loss": 0.0, "step": 4694 }, { "epoch": 0.3025713733324741, "grad_norm": 0.2086179229931362, "learning_rate": 7.752237737200143e-06, "loss": 0.0015, "step": 4695 }, { "epoch": 0.3026358187794032, "grad_norm": 0.00013166228230322507, "learning_rate": 7.751521661296097e-06, "loss": 0.0, "step": 4696 }, { "epoch": 0.3027002642263324, "grad_norm": 9.777865384865902e-05, "learning_rate": 7.750805585392053e-06, "loss": 0.0, "step": 4697 }, { "epoch": 0.3027647096732616, "grad_norm": 0.734374634762937, "learning_rate": 7.750089509488008e-06, "loss": 0.0025, "step": 4698 }, { "epoch": 0.30282915512019076, "grad_norm": 0.006581345635300139, "learning_rate": 7.749373433583962e-06, "loss": 0.0, "step": 4699 }, { "epoch": 0.30289360056711995, "grad_norm": 0.1299164592212838, "learning_rate": 7.748657357679914e-06, "loss": 0.0009, "step": 4700 }, { "epoch": 0.30295804601404913, "grad_norm": 0.005917647420111509, "learning_rate": 7.747941281775869e-06, "loss": 0.0, "step": 4701 }, { "epoch": 0.30302249146097826, "grad_norm": 0.46501049288836965, "learning_rate": 7.747225205871823e-06, "loss": 0.0088, "step": 4702 }, { "epoch": 0.30308693690790744, "grad_norm": 0.0004601057773333485, "learning_rate": 7.746509129967777e-06, "loss": 0.0, "step": 4703 }, { "epoch": 0.30315138235483663, "grad_norm": 0.16790902384417963, "learning_rate": 7.745793054063732e-06, "loss": 0.0002, "step": 4704 }, { "epoch": 0.3032158278017658, "grad_norm": 0.009253831686704374, "learning_rate": 7.745076978159686e-06, "loss": 0.0, "step": 4705 }, { "epoch": 0.303280273248695, "grad_norm": 0.05335917375409336, "learning_rate": 7.74436090225564e-06, "loss": 0.0002, "step": 4706 }, { "epoch": 0.3033447186956241, "grad_norm": 0.0001441868512219672, "learning_rate": 7.743644826351595e-06, "loss": 0.0, "step": 4707 }, { "epoch": 0.3034091641425533, "grad_norm": 0.0002741329134754317, "learning_rate": 7.742928750447549e-06, "loss": 0.0, "step": 4708 }, { "epoch": 0.3034736095894825, "grad_norm": 0.01443467281263996, "learning_rate": 7.742212674543501e-06, "loss": 0.0001, "step": 4709 }, { "epoch": 0.3035380550364117, "grad_norm": 0.015183455470397374, "learning_rate": 7.741496598639456e-06, "loss": 0.0001, "step": 4710 }, { "epoch": 0.30360250048334086, "grad_norm": 0.023355433462827355, "learning_rate": 7.74078052273541e-06, "loss": 0.0002, "step": 4711 }, { "epoch": 0.30366694593027005, "grad_norm": 0.004497382272455204, "learning_rate": 7.740064446831364e-06, "loss": 0.0, "step": 4712 }, { "epoch": 0.3037313913771992, "grad_norm": 0.03580885399675683, "learning_rate": 7.739348370927319e-06, "loss": 0.0004, "step": 4713 }, { "epoch": 0.30379583682412836, "grad_norm": 0.00021593148826366693, "learning_rate": 7.738632295023273e-06, "loss": 0.0, "step": 4714 }, { "epoch": 0.30386028227105755, "grad_norm": 0.0006532279015124764, "learning_rate": 7.737916219119227e-06, "loss": 0.0, "step": 4715 }, { "epoch": 0.30392472771798673, "grad_norm": 0.0016168136163807064, "learning_rate": 7.737200143215182e-06, "loss": 0.0, "step": 4716 }, { "epoch": 0.3039891731649159, "grad_norm": 0.00016003900954974685, "learning_rate": 7.736484067311136e-06, "loss": 0.0, "step": 4717 }, { "epoch": 0.3040536186118451, "grad_norm": 0.04574878624670092, "learning_rate": 7.73576799140709e-06, "loss": 0.0001, "step": 4718 }, { "epoch": 0.30411806405877423, "grad_norm": 0.09921795946091631, "learning_rate": 7.735051915503043e-06, "loss": 0.0009, "step": 4719 }, { "epoch": 0.3041825095057034, "grad_norm": 0.0009618883446051705, "learning_rate": 7.734335839598997e-06, "loss": 0.0, "step": 4720 }, { "epoch": 0.3042469549526326, "grad_norm": 0.00015935827088833944, "learning_rate": 7.733619763694953e-06, "loss": 0.0, "step": 4721 }, { "epoch": 0.3043114003995618, "grad_norm": 0.004121915434638065, "learning_rate": 7.732903687790907e-06, "loss": 0.0, "step": 4722 }, { "epoch": 0.30437584584649097, "grad_norm": 1.251110299054438, "learning_rate": 7.732187611886862e-06, "loss": 0.0038, "step": 4723 }, { "epoch": 0.3044402912934201, "grad_norm": 0.0019873520206396024, "learning_rate": 7.731471535982816e-06, "loss": 0.0, "step": 4724 }, { "epoch": 0.3045047367403493, "grad_norm": 0.00038713677005660534, "learning_rate": 7.730755460078769e-06, "loss": 0.0, "step": 4725 }, { "epoch": 0.30456918218727846, "grad_norm": 0.007009648691403379, "learning_rate": 7.730039384174723e-06, "loss": 0.0001, "step": 4726 }, { "epoch": 0.30463362763420765, "grad_norm": 0.27969282933703976, "learning_rate": 7.729323308270677e-06, "loss": 0.0006, "step": 4727 }, { "epoch": 0.30469807308113683, "grad_norm": 0.0012857714808325838, "learning_rate": 7.728607232366631e-06, "loss": 0.0, "step": 4728 }, { "epoch": 0.304762518528066, "grad_norm": 0.5750549352359965, "learning_rate": 7.727891156462586e-06, "loss": 0.0035, "step": 4729 }, { "epoch": 0.30482696397499515, "grad_norm": 0.2587270122489265, "learning_rate": 7.72717508055854e-06, "loss": 0.0004, "step": 4730 }, { "epoch": 0.30489140942192433, "grad_norm": 0.002896226310871362, "learning_rate": 7.726459004654494e-06, "loss": 0.0, "step": 4731 }, { "epoch": 0.3049558548688535, "grad_norm": 0.00044256071047343225, "learning_rate": 7.725742928750449e-06, "loss": 0.0, "step": 4732 }, { "epoch": 0.3050203003157827, "grad_norm": 0.003308588148680433, "learning_rate": 7.725026852846403e-06, "loss": 0.0, "step": 4733 }, { "epoch": 0.3050847457627119, "grad_norm": 0.001083691728375002, "learning_rate": 7.724310776942357e-06, "loss": 0.0, "step": 4734 }, { "epoch": 0.305149191209641, "grad_norm": 0.0007827155588311144, "learning_rate": 7.72359470103831e-06, "loss": 0.0, "step": 4735 }, { "epoch": 0.3052136366565702, "grad_norm": 0.036329813310400326, "learning_rate": 7.722878625134264e-06, "loss": 0.0004, "step": 4736 }, { "epoch": 0.3052780821034994, "grad_norm": 0.0210308823587032, "learning_rate": 7.722162549230218e-06, "loss": 0.0, "step": 4737 }, { "epoch": 0.30534252755042857, "grad_norm": 0.16520561188861155, "learning_rate": 7.721446473326173e-06, "loss": 0.0023, "step": 4738 }, { "epoch": 0.30540697299735775, "grad_norm": 0.005275712354562943, "learning_rate": 7.720730397422127e-06, "loss": 0.0, "step": 4739 }, { "epoch": 0.30547141844428694, "grad_norm": 0.900496966853056, "learning_rate": 7.720014321518081e-06, "loss": 0.0048, "step": 4740 }, { "epoch": 0.30553586389121606, "grad_norm": 0.008231191698063514, "learning_rate": 7.719298245614036e-06, "loss": 0.0, "step": 4741 }, { "epoch": 0.30560030933814525, "grad_norm": 0.3738767171786509, "learning_rate": 7.71858216970999e-06, "loss": 0.0023, "step": 4742 }, { "epoch": 0.30566475478507443, "grad_norm": 0.3131810158747463, "learning_rate": 7.717866093805944e-06, "loss": 0.001, "step": 4743 }, { "epoch": 0.3057292002320036, "grad_norm": 0.20129595328139735, "learning_rate": 7.717150017901898e-06, "loss": 0.0007, "step": 4744 }, { "epoch": 0.3057936456789328, "grad_norm": 0.021132452629210614, "learning_rate": 7.716433941997853e-06, "loss": 0.0001, "step": 4745 }, { "epoch": 0.30585809112586193, "grad_norm": 0.037783536362330825, "learning_rate": 7.715717866093807e-06, "loss": 0.0001, "step": 4746 }, { "epoch": 0.3059225365727911, "grad_norm": 0.45276547013303264, "learning_rate": 7.715001790189761e-06, "loss": 0.0015, "step": 4747 }, { "epoch": 0.3059869820197203, "grad_norm": 0.49549389414773587, "learning_rate": 7.714285714285716e-06, "loss": 0.0024, "step": 4748 }, { "epoch": 0.3060514274666495, "grad_norm": 0.00019081945517868202, "learning_rate": 7.71356963838167e-06, "loss": 0.0, "step": 4749 }, { "epoch": 0.30611587291357867, "grad_norm": 0.0014320023238470958, "learning_rate": 7.712853562477624e-06, "loss": 0.0, "step": 4750 }, { "epoch": 0.30618031836050785, "grad_norm": 0.06193467670974739, "learning_rate": 7.712137486573577e-06, "loss": 0.0001, "step": 4751 }, { "epoch": 0.306244763807437, "grad_norm": 0.024614945322434304, "learning_rate": 7.711421410669531e-06, "loss": 0.0, "step": 4752 }, { "epoch": 0.30630920925436617, "grad_norm": 0.0035196920149812207, "learning_rate": 7.710705334765485e-06, "loss": 0.0, "step": 4753 }, { "epoch": 0.30637365470129535, "grad_norm": 0.13125440414169431, "learning_rate": 7.70998925886144e-06, "loss": 0.0002, "step": 4754 }, { "epoch": 0.30643810014822453, "grad_norm": 0.004681982640320887, "learning_rate": 7.709273182957394e-06, "loss": 0.0, "step": 4755 }, { "epoch": 0.3065025455951537, "grad_norm": 0.006949093560674066, "learning_rate": 7.708557107053348e-06, "loss": 0.0, "step": 4756 }, { "epoch": 0.3065669910420829, "grad_norm": 0.0036677102878780133, "learning_rate": 7.707841031149303e-06, "loss": 0.0, "step": 4757 }, { "epoch": 0.30663143648901203, "grad_norm": 0.12488582731538928, "learning_rate": 7.707124955245257e-06, "loss": 0.0002, "step": 4758 }, { "epoch": 0.3066958819359412, "grad_norm": 0.10194604235196442, "learning_rate": 7.706408879341211e-06, "loss": 0.002, "step": 4759 }, { "epoch": 0.3067603273828704, "grad_norm": 0.029717985220950458, "learning_rate": 7.705692803437166e-06, "loss": 0.0003, "step": 4760 }, { "epoch": 0.3068247728297996, "grad_norm": 0.29767039057667016, "learning_rate": 7.704976727533118e-06, "loss": 0.0043, "step": 4761 }, { "epoch": 0.30688921827672877, "grad_norm": 0.009892853815882239, "learning_rate": 7.704260651629072e-06, "loss": 0.0001, "step": 4762 }, { "epoch": 0.3069536637236579, "grad_norm": 0.9851313213488757, "learning_rate": 7.703544575725027e-06, "loss": 0.0037, "step": 4763 }, { "epoch": 0.3070181091705871, "grad_norm": 0.17237959830448984, "learning_rate": 7.702828499820981e-06, "loss": 0.0004, "step": 4764 }, { "epoch": 0.30708255461751627, "grad_norm": 0.20161969607969146, "learning_rate": 7.702112423916935e-06, "loss": 0.0007, "step": 4765 }, { "epoch": 0.30714700006444545, "grad_norm": 0.009081867704277675, "learning_rate": 7.70139634801289e-06, "loss": 0.0, "step": 4766 }, { "epoch": 0.30721144551137464, "grad_norm": 0.00021591472203924571, "learning_rate": 7.700680272108844e-06, "loss": 0.0, "step": 4767 }, { "epoch": 0.3072758909583038, "grad_norm": 0.1620586212402397, "learning_rate": 7.699964196204798e-06, "loss": 0.0014, "step": 4768 }, { "epoch": 0.30734033640523295, "grad_norm": 0.0013959799328598175, "learning_rate": 7.699248120300753e-06, "loss": 0.0, "step": 4769 }, { "epoch": 0.30740478185216213, "grad_norm": 0.0035084092927934003, "learning_rate": 7.698532044396707e-06, "loss": 0.0, "step": 4770 }, { "epoch": 0.3074692272990913, "grad_norm": 0.0012145909662963545, "learning_rate": 7.697815968492661e-06, "loss": 0.0, "step": 4771 }, { "epoch": 0.3075336727460205, "grad_norm": 0.0020774691234807267, "learning_rate": 7.697099892588615e-06, "loss": 0.0, "step": 4772 }, { "epoch": 0.3075981181929497, "grad_norm": 0.02511536306626666, "learning_rate": 7.69638381668457e-06, "loss": 0.0001, "step": 4773 }, { "epoch": 0.3076625636398788, "grad_norm": 0.3414683043347056, "learning_rate": 7.695667740780524e-06, "loss": 0.001, "step": 4774 }, { "epoch": 0.307727009086808, "grad_norm": 0.018721636445177253, "learning_rate": 7.694951664876478e-06, "loss": 0.0, "step": 4775 }, { "epoch": 0.3077914545337372, "grad_norm": 0.004516405023989719, "learning_rate": 7.694235588972433e-06, "loss": 0.0, "step": 4776 }, { "epoch": 0.30785589998066637, "grad_norm": 0.001999490388142513, "learning_rate": 7.693519513068385e-06, "loss": 0.0, "step": 4777 }, { "epoch": 0.30792034542759555, "grad_norm": 0.009126433007855877, "learning_rate": 7.69280343716434e-06, "loss": 0.0, "step": 4778 }, { "epoch": 0.30798479087452474, "grad_norm": 0.1669292889199868, "learning_rate": 7.692087361260294e-06, "loss": 0.0003, "step": 4779 }, { "epoch": 0.30804923632145387, "grad_norm": 0.1951716678231349, "learning_rate": 7.691371285356248e-06, "loss": 0.0021, "step": 4780 }, { "epoch": 0.30811368176838305, "grad_norm": 0.03909516756695523, "learning_rate": 7.690655209452202e-06, "loss": 0.0001, "step": 4781 }, { "epoch": 0.30817812721531224, "grad_norm": 0.03378777956318579, "learning_rate": 7.689939133548157e-06, "loss": 0.0001, "step": 4782 }, { "epoch": 0.3082425726622414, "grad_norm": 0.019446393540969095, "learning_rate": 7.689223057644111e-06, "loss": 0.0001, "step": 4783 }, { "epoch": 0.3083070181091706, "grad_norm": 0.00664535455497235, "learning_rate": 7.688506981740065e-06, "loss": 0.0, "step": 4784 }, { "epoch": 0.30837146355609973, "grad_norm": 0.03097241392532757, "learning_rate": 7.68779090583602e-06, "loss": 0.0, "step": 4785 }, { "epoch": 0.3084359090030289, "grad_norm": 0.00806853471227027, "learning_rate": 7.687074829931972e-06, "loss": 0.0001, "step": 4786 }, { "epoch": 0.3085003544499581, "grad_norm": 0.018839860233995302, "learning_rate": 7.686358754027927e-06, "loss": 0.0001, "step": 4787 }, { "epoch": 0.3085647998968873, "grad_norm": 0.04220925687343892, "learning_rate": 7.685642678123881e-06, "loss": 0.0001, "step": 4788 }, { "epoch": 0.30862924534381647, "grad_norm": 0.0034860404645811554, "learning_rate": 7.684926602219835e-06, "loss": 0.0, "step": 4789 }, { "epoch": 0.30869369079074566, "grad_norm": 0.0022177384041537087, "learning_rate": 7.68421052631579e-06, "loss": 0.0, "step": 4790 }, { "epoch": 0.3087581362376748, "grad_norm": 0.005044939837968819, "learning_rate": 7.683494450411745e-06, "loss": 0.0, "step": 4791 }, { "epoch": 0.30882258168460397, "grad_norm": 0.0005836039764449223, "learning_rate": 7.6827783745077e-06, "loss": 0.0, "step": 4792 }, { "epoch": 0.30888702713153315, "grad_norm": 0.0037036582704997596, "learning_rate": 7.682062298603652e-06, "loss": 0.0, "step": 4793 }, { "epoch": 0.30895147257846234, "grad_norm": 0.006685573662889619, "learning_rate": 7.681346222699607e-06, "loss": 0.0, "step": 4794 }, { "epoch": 0.3090159180253915, "grad_norm": 0.0004702842280755588, "learning_rate": 7.680630146795561e-06, "loss": 0.0, "step": 4795 }, { "epoch": 0.3090803634723207, "grad_norm": 0.041626408650233614, "learning_rate": 7.679914070891515e-06, "loss": 0.0002, "step": 4796 }, { "epoch": 0.30914480891924984, "grad_norm": 0.0007088733267549225, "learning_rate": 7.67919799498747e-06, "loss": 0.0, "step": 4797 }, { "epoch": 0.309209254366179, "grad_norm": 0.01583183010603208, "learning_rate": 7.678481919083424e-06, "loss": 0.0, "step": 4798 }, { "epoch": 0.3092736998131082, "grad_norm": 0.005335127912471382, "learning_rate": 7.677765843179378e-06, "loss": 0.0, "step": 4799 }, { "epoch": 0.3093381452600374, "grad_norm": 0.0010392895308987736, "learning_rate": 7.677049767275332e-06, "loss": 0.0, "step": 4800 }, { "epoch": 0.3094025907069666, "grad_norm": 0.04592088900990831, "learning_rate": 7.676333691371287e-06, "loss": 0.0001, "step": 4801 }, { "epoch": 0.3094670361538957, "grad_norm": 0.05353814637908716, "learning_rate": 7.67561761546724e-06, "loss": 0.0001, "step": 4802 }, { "epoch": 0.3095314816008249, "grad_norm": 0.004138582846488795, "learning_rate": 7.674901539563194e-06, "loss": 0.0, "step": 4803 }, { "epoch": 0.30959592704775407, "grad_norm": 0.02532980585580661, "learning_rate": 7.674185463659148e-06, "loss": 0.0002, "step": 4804 }, { "epoch": 0.30966037249468326, "grad_norm": 0.22175176851799505, "learning_rate": 7.673469387755102e-06, "loss": 0.0006, "step": 4805 }, { "epoch": 0.30972481794161244, "grad_norm": 0.02077356363290854, "learning_rate": 7.672753311851057e-06, "loss": 0.0001, "step": 4806 }, { "epoch": 0.3097892633885416, "grad_norm": 0.0009383453196531269, "learning_rate": 7.67203723594701e-06, "loss": 0.0, "step": 4807 }, { "epoch": 0.30985370883547075, "grad_norm": 0.0021745621033941653, "learning_rate": 7.671321160042965e-06, "loss": 0.0, "step": 4808 }, { "epoch": 0.30991815428239994, "grad_norm": 0.012943322912088268, "learning_rate": 7.67060508413892e-06, "loss": 0.0001, "step": 4809 }, { "epoch": 0.3099825997293291, "grad_norm": 1.1511934908108268, "learning_rate": 7.669889008234874e-06, "loss": 0.0127, "step": 4810 }, { "epoch": 0.3100470451762583, "grad_norm": 0.0009821788142993016, "learning_rate": 7.669172932330828e-06, "loss": 0.0, "step": 4811 }, { "epoch": 0.3101114906231875, "grad_norm": 0.0007450914756910727, "learning_rate": 7.66845685642678e-06, "loss": 0.0, "step": 4812 }, { "epoch": 0.3101759360701166, "grad_norm": 0.0015614974705150063, "learning_rate": 7.667740780522735e-06, "loss": 0.0, "step": 4813 }, { "epoch": 0.3102403815170458, "grad_norm": 0.007181676464111988, "learning_rate": 7.667024704618691e-06, "loss": 0.0, "step": 4814 }, { "epoch": 0.310304826963975, "grad_norm": 0.019878176986525983, "learning_rate": 7.666308628714645e-06, "loss": 0.0, "step": 4815 }, { "epoch": 0.3103692724109042, "grad_norm": 0.0002867674669890647, "learning_rate": 7.6655925528106e-06, "loss": 0.0, "step": 4816 }, { "epoch": 0.31043371785783336, "grad_norm": 1.71531500456341, "learning_rate": 7.664876476906554e-06, "loss": 0.0089, "step": 4817 }, { "epoch": 0.31049816330476254, "grad_norm": 0.020545370898791864, "learning_rate": 7.664160401002506e-06, "loss": 0.0, "step": 4818 }, { "epoch": 0.31056260875169167, "grad_norm": 0.0020818963706780835, "learning_rate": 7.66344432509846e-06, "loss": 0.0, "step": 4819 }, { "epoch": 0.31062705419862086, "grad_norm": 0.0007152623732576892, "learning_rate": 7.662728249194415e-06, "loss": 0.0, "step": 4820 }, { "epoch": 0.31069149964555004, "grad_norm": 0.0009722607342199007, "learning_rate": 7.66201217329037e-06, "loss": 0.0, "step": 4821 }, { "epoch": 0.3107559450924792, "grad_norm": 0.023470547345033253, "learning_rate": 7.661296097386324e-06, "loss": 0.0, "step": 4822 }, { "epoch": 0.3108203905394084, "grad_norm": 0.023717350679689185, "learning_rate": 7.660580021482278e-06, "loss": 0.0001, "step": 4823 }, { "epoch": 0.31088483598633754, "grad_norm": 0.04864605815166363, "learning_rate": 7.659863945578232e-06, "loss": 0.0001, "step": 4824 }, { "epoch": 0.3109492814332667, "grad_norm": 0.34392935235481353, "learning_rate": 7.659147869674187e-06, "loss": 0.0009, "step": 4825 }, { "epoch": 0.3110137268801959, "grad_norm": 0.0010634566579679346, "learning_rate": 7.65843179377014e-06, "loss": 0.0, "step": 4826 }, { "epoch": 0.3110781723271251, "grad_norm": 0.011746606897081312, "learning_rate": 7.657715717866095e-06, "loss": 0.0, "step": 4827 }, { "epoch": 0.3111426177740543, "grad_norm": 0.03706126639581151, "learning_rate": 7.656999641962048e-06, "loss": 0.0015, "step": 4828 }, { "epoch": 0.31120706322098346, "grad_norm": 0.2990590368965242, "learning_rate": 7.656283566058002e-06, "loss": 0.0015, "step": 4829 }, { "epoch": 0.3112715086679126, "grad_norm": 0.0010528169780914909, "learning_rate": 7.655567490153956e-06, "loss": 0.0, "step": 4830 }, { "epoch": 0.3113359541148418, "grad_norm": 0.042425665637751686, "learning_rate": 7.65485141424991e-06, "loss": 0.0, "step": 4831 }, { "epoch": 0.31140039956177096, "grad_norm": 0.001670182439381502, "learning_rate": 7.654135338345865e-06, "loss": 0.0, "step": 4832 }, { "epoch": 0.31146484500870014, "grad_norm": 0.0009654571293139372, "learning_rate": 7.65341926244182e-06, "loss": 0.0, "step": 4833 }, { "epoch": 0.3115292904556293, "grad_norm": 0.011802421818662695, "learning_rate": 7.652703186537774e-06, "loss": 0.0, "step": 4834 }, { "epoch": 0.3115937359025585, "grad_norm": 0.25027796074816694, "learning_rate": 7.651987110633728e-06, "loss": 0.0001, "step": 4835 }, { "epoch": 0.31165818134948764, "grad_norm": 0.009832647673374325, "learning_rate": 7.651271034729682e-06, "loss": 0.0, "step": 4836 }, { "epoch": 0.3117226267964168, "grad_norm": 0.29034152459702184, "learning_rate": 7.650554958825636e-06, "loss": 0.0042, "step": 4837 }, { "epoch": 0.311787072243346, "grad_norm": 0.0031092830812778157, "learning_rate": 7.64983888292159e-06, "loss": 0.0, "step": 4838 }, { "epoch": 0.3118515176902752, "grad_norm": 0.04667664571851142, "learning_rate": 7.649122807017545e-06, "loss": 0.0003, "step": 4839 }, { "epoch": 0.3119159631372044, "grad_norm": 0.007606067281312005, "learning_rate": 7.6484067311135e-06, "loss": 0.0, "step": 4840 }, { "epoch": 0.3119804085841335, "grad_norm": 0.02780274263883628, "learning_rate": 7.647690655209454e-06, "loss": 0.0, "step": 4841 }, { "epoch": 0.3120448540310627, "grad_norm": 0.023759968008487852, "learning_rate": 7.646974579305408e-06, "loss": 0.0001, "step": 4842 }, { "epoch": 0.3121092994779919, "grad_norm": 0.0008431761617935588, "learning_rate": 7.646258503401362e-06, "loss": 0.0, "step": 4843 }, { "epoch": 0.31217374492492106, "grad_norm": 0.015393384023410159, "learning_rate": 7.645542427497315e-06, "loss": 0.0, "step": 4844 }, { "epoch": 0.31223819037185024, "grad_norm": 0.004651234971706581, "learning_rate": 7.644826351593269e-06, "loss": 0.0, "step": 4845 }, { "epoch": 0.31230263581877943, "grad_norm": 0.3541639505543996, "learning_rate": 7.644110275689223e-06, "loss": 0.0008, "step": 4846 }, { "epoch": 0.31236708126570856, "grad_norm": 0.5029302800739834, "learning_rate": 7.643394199785178e-06, "loss": 0.0007, "step": 4847 }, { "epoch": 0.31243152671263774, "grad_norm": 1.1434380840791063, "learning_rate": 7.642678123881132e-06, "loss": 0.0061, "step": 4848 }, { "epoch": 0.3124959721595669, "grad_norm": 0.09346831740869416, "learning_rate": 7.641962047977086e-06, "loss": 0.0004, "step": 4849 }, { "epoch": 0.3125604176064961, "grad_norm": 0.05341842719165527, "learning_rate": 7.64124597207304e-06, "loss": 0.0001, "step": 4850 }, { "epoch": 0.3126248630534253, "grad_norm": 0.00365746558821454, "learning_rate": 7.640529896168995e-06, "loss": 0.0, "step": 4851 }, { "epoch": 0.3126893085003544, "grad_norm": 0.004174417788371213, "learning_rate": 7.63981382026495e-06, "loss": 0.0, "step": 4852 }, { "epoch": 0.3127537539472836, "grad_norm": 0.12209112752832342, "learning_rate": 7.639097744360904e-06, "loss": 0.0006, "step": 4853 }, { "epoch": 0.3128181993942128, "grad_norm": 4.058324927935555, "learning_rate": 7.638381668456856e-06, "loss": 0.0265, "step": 4854 }, { "epoch": 0.312882644841142, "grad_norm": 0.006323243901996307, "learning_rate": 7.63766559255281e-06, "loss": 0.0, "step": 4855 }, { "epoch": 0.31294709028807116, "grad_norm": 0.08450654898943195, "learning_rate": 7.636949516648765e-06, "loss": 0.0003, "step": 4856 }, { "epoch": 0.31301153573500035, "grad_norm": 0.036397043543195534, "learning_rate": 7.636233440744719e-06, "loss": 0.0, "step": 4857 }, { "epoch": 0.3130759811819295, "grad_norm": 0.1896315550434969, "learning_rate": 7.635517364840673e-06, "loss": 0.0002, "step": 4858 }, { "epoch": 0.31314042662885866, "grad_norm": 0.09269444465632841, "learning_rate": 7.634801288936628e-06, "loss": 0.0002, "step": 4859 }, { "epoch": 0.31320487207578784, "grad_norm": 1.4154748765446705, "learning_rate": 7.634085213032582e-06, "loss": 0.0064, "step": 4860 }, { "epoch": 0.31326931752271703, "grad_norm": 0.1601978399193165, "learning_rate": 7.633369137128536e-06, "loss": 0.0002, "step": 4861 }, { "epoch": 0.3133337629696462, "grad_norm": 0.001664992046050716, "learning_rate": 7.63265306122449e-06, "loss": 0.0, "step": 4862 }, { "epoch": 0.31339820841657534, "grad_norm": 0.005741379817492573, "learning_rate": 7.631936985320445e-06, "loss": 0.0, "step": 4863 }, { "epoch": 0.3134626538635045, "grad_norm": 0.3956785422217856, "learning_rate": 7.631220909416399e-06, "loss": 0.0017, "step": 4864 }, { "epoch": 0.3135270993104337, "grad_norm": 0.02680946006956498, "learning_rate": 7.630504833512353e-06, "loss": 0.0, "step": 4865 }, { "epoch": 0.3135915447573629, "grad_norm": 0.4638488605292008, "learning_rate": 7.629788757608308e-06, "loss": 0.0007, "step": 4866 }, { "epoch": 0.3136559902042921, "grad_norm": 0.14234355024120665, "learning_rate": 7.629072681704261e-06, "loss": 0.0013, "step": 4867 }, { "epoch": 0.31372043565122126, "grad_norm": 0.07415628314107933, "learning_rate": 7.6283566058002154e-06, "loss": 0.0001, "step": 4868 }, { "epoch": 0.3137848810981504, "grad_norm": 0.06906294810141103, "learning_rate": 7.62764052989617e-06, "loss": 0.0001, "step": 4869 }, { "epoch": 0.3138493265450796, "grad_norm": 0.13154624617698507, "learning_rate": 7.626924453992124e-06, "loss": 0.0018, "step": 4870 }, { "epoch": 0.31391377199200876, "grad_norm": 1.0724474365407197, "learning_rate": 7.626208378088078e-06, "loss": 0.0054, "step": 4871 }, { "epoch": 0.31397821743893795, "grad_norm": 0.3422724004482794, "learning_rate": 7.625492302184032e-06, "loss": 0.0024, "step": 4872 }, { "epoch": 0.31404266288586713, "grad_norm": 0.0016748577841006922, "learning_rate": 7.624776226279986e-06, "loss": 0.0, "step": 4873 }, { "epoch": 0.3141071083327963, "grad_norm": 0.041638429798124134, "learning_rate": 7.62406015037594e-06, "loss": 0.0001, "step": 4874 }, { "epoch": 0.31417155377972544, "grad_norm": 0.003857485529783318, "learning_rate": 7.623344074471895e-06, "loss": 0.0, "step": 4875 }, { "epoch": 0.3142359992266546, "grad_norm": 0.03168840340461246, "learning_rate": 7.622627998567849e-06, "loss": 0.0, "step": 4876 }, { "epoch": 0.3143004446735838, "grad_norm": 0.006814045902594915, "learning_rate": 7.6219119226638024e-06, "loss": 0.0, "step": 4877 }, { "epoch": 0.314364890120513, "grad_norm": 0.18182873983099754, "learning_rate": 7.621195846759757e-06, "loss": 0.0003, "step": 4878 }, { "epoch": 0.3144293355674422, "grad_norm": 0.00901951630925349, "learning_rate": 7.620479770855711e-06, "loss": 0.0, "step": 4879 }, { "epoch": 0.3144937810143713, "grad_norm": 1.9066806994546444, "learning_rate": 7.619763694951665e-06, "loss": 0.0055, "step": 4880 }, { "epoch": 0.3145582264613005, "grad_norm": 0.04556863316205216, "learning_rate": 7.61904761904762e-06, "loss": 0.0002, "step": 4881 }, { "epoch": 0.3146226719082297, "grad_norm": 0.0012398452923164066, "learning_rate": 7.618331543143573e-06, "loss": 0.0, "step": 4882 }, { "epoch": 0.31468711735515886, "grad_norm": 0.04519157202135707, "learning_rate": 7.617615467239527e-06, "loss": 0.0001, "step": 4883 }, { "epoch": 0.31475156280208805, "grad_norm": 0.02342436591337108, "learning_rate": 7.6168993913354825e-06, "loss": 0.0017, "step": 4884 }, { "epoch": 0.31481600824901723, "grad_norm": 0.018536176514005024, "learning_rate": 7.616183315431437e-06, "loss": 0.0002, "step": 4885 }, { "epoch": 0.31488045369594636, "grad_norm": 0.172805904026286, "learning_rate": 7.615467239527391e-06, "loss": 0.0019, "step": 4886 }, { "epoch": 0.31494489914287555, "grad_norm": 0.05709872304307744, "learning_rate": 7.614751163623345e-06, "loss": 0.0002, "step": 4887 }, { "epoch": 0.31500934458980473, "grad_norm": 0.04901532527782089, "learning_rate": 7.614035087719299e-06, "loss": 0.0, "step": 4888 }, { "epoch": 0.3150737900367339, "grad_norm": 0.045398359407119136, "learning_rate": 7.613319011815253e-06, "loss": 0.0001, "step": 4889 }, { "epoch": 0.3151382354836631, "grad_norm": 0.01797741643383171, "learning_rate": 7.6126029359112075e-06, "loss": 0.0, "step": 4890 }, { "epoch": 0.3152026809305922, "grad_norm": 0.003458750480485351, "learning_rate": 7.611886860007162e-06, "loss": 0.0, "step": 4891 }, { "epoch": 0.3152671263775214, "grad_norm": 0.041268147514423606, "learning_rate": 7.611170784103116e-06, "loss": 0.0003, "step": 4892 }, { "epoch": 0.3153315718244506, "grad_norm": 0.10172780046470717, "learning_rate": 7.6104547081990695e-06, "loss": 0.0001, "step": 4893 }, { "epoch": 0.3153960172713798, "grad_norm": 0.0037263810735243897, "learning_rate": 7.609738632295024e-06, "loss": 0.0, "step": 4894 }, { "epoch": 0.31546046271830896, "grad_norm": 0.0032725613126103575, "learning_rate": 7.609022556390978e-06, "loss": 0.0, "step": 4895 }, { "epoch": 0.31552490816523815, "grad_norm": 0.09012161182014738, "learning_rate": 7.608306480486932e-06, "loss": 0.0008, "step": 4896 }, { "epoch": 0.3155893536121673, "grad_norm": 0.09166959806201995, "learning_rate": 7.607590404582887e-06, "loss": 0.0017, "step": 4897 }, { "epoch": 0.31565379905909646, "grad_norm": 0.011971259717290881, "learning_rate": 7.60687432867884e-06, "loss": 0.0, "step": 4898 }, { "epoch": 0.31571824450602565, "grad_norm": 0.07688257728135989, "learning_rate": 7.6061582527747945e-06, "loss": 0.0001, "step": 4899 }, { "epoch": 0.31578268995295483, "grad_norm": 0.000963422072308008, "learning_rate": 7.605442176870749e-06, "loss": 0.0, "step": 4900 }, { "epoch": 0.315847135399884, "grad_norm": 0.04923102333675415, "learning_rate": 7.604726100966703e-06, "loss": 0.0001, "step": 4901 }, { "epoch": 0.3159115808468132, "grad_norm": 0.017858915009544632, "learning_rate": 7.604010025062657e-06, "loss": 0.0, "step": 4902 }, { "epoch": 0.31597602629374233, "grad_norm": 0.0074076590984602534, "learning_rate": 7.603293949158611e-06, "loss": 0.0, "step": 4903 }, { "epoch": 0.3160404717406715, "grad_norm": 0.004138050993269019, "learning_rate": 7.602577873254565e-06, "loss": 0.0, "step": 4904 }, { "epoch": 0.3161049171876007, "grad_norm": 0.007657744900865728, "learning_rate": 7.601861797350519e-06, "loss": 0.0001, "step": 4905 }, { "epoch": 0.3161693626345299, "grad_norm": 0.015958886199231824, "learning_rate": 7.601145721446474e-06, "loss": 0.0001, "step": 4906 }, { "epoch": 0.31623380808145907, "grad_norm": 0.11232840246170804, "learning_rate": 7.600429645542427e-06, "loss": 0.0003, "step": 4907 }, { "epoch": 0.3162982535283882, "grad_norm": 0.0019847038300378613, "learning_rate": 7.599713569638383e-06, "loss": 0.0, "step": 4908 }, { "epoch": 0.3163626989753174, "grad_norm": 0.015164377914120451, "learning_rate": 7.598997493734337e-06, "loss": 0.0, "step": 4909 }, { "epoch": 0.31642714442224656, "grad_norm": 0.0005348704955189589, "learning_rate": 7.598281417830291e-06, "loss": 0.0, "step": 4910 }, { "epoch": 0.31649158986917575, "grad_norm": 0.4584278384947784, "learning_rate": 7.597565341926245e-06, "loss": 0.0147, "step": 4911 }, { "epoch": 0.31655603531610493, "grad_norm": 0.01919581211447712, "learning_rate": 7.5968492660221995e-06, "loss": 0.0, "step": 4912 }, { "epoch": 0.3166204807630341, "grad_norm": 0.0255280418873319, "learning_rate": 7.596133190118154e-06, "loss": 0.0001, "step": 4913 }, { "epoch": 0.31668492620996325, "grad_norm": 0.022365740209846784, "learning_rate": 7.595417114214107e-06, "loss": 0.0, "step": 4914 }, { "epoch": 0.31674937165689243, "grad_norm": 0.0017737070032865822, "learning_rate": 7.5947010383100616e-06, "loss": 0.0, "step": 4915 }, { "epoch": 0.3168138171038216, "grad_norm": 0.000988195283464662, "learning_rate": 7.593984962406016e-06, "loss": 0.0, "step": 4916 }, { "epoch": 0.3168782625507508, "grad_norm": 0.11666120524551912, "learning_rate": 7.59326888650197e-06, "loss": 0.0002, "step": 4917 }, { "epoch": 0.31694270799768, "grad_norm": 0.02640212358308717, "learning_rate": 7.5925528105979244e-06, "loss": 0.0001, "step": 4918 }, { "epoch": 0.3170071534446091, "grad_norm": 0.0005211830684490821, "learning_rate": 7.591836734693878e-06, "loss": 0.0, "step": 4919 }, { "epoch": 0.3170715988915383, "grad_norm": 0.007234303323305554, "learning_rate": 7.591120658789832e-06, "loss": 0.0, "step": 4920 }, { "epoch": 0.3171360443384675, "grad_norm": 0.07723608651426664, "learning_rate": 7.5904045828857865e-06, "loss": 0.0002, "step": 4921 }, { "epoch": 0.31720048978539667, "grad_norm": 0.0030668760381343594, "learning_rate": 7.589688506981741e-06, "loss": 0.0, "step": 4922 }, { "epoch": 0.31726493523232585, "grad_norm": 0.08688448284729575, "learning_rate": 7.588972431077694e-06, "loss": 0.0002, "step": 4923 }, { "epoch": 0.31732938067925504, "grad_norm": 0.06115100927977217, "learning_rate": 7.5882563551736485e-06, "loss": 0.0005, "step": 4924 }, { "epoch": 0.31739382612618416, "grad_norm": 0.004299995558535509, "learning_rate": 7.587540279269603e-06, "loss": 0.0, "step": 4925 }, { "epoch": 0.31745827157311335, "grad_norm": 0.010539844593121827, "learning_rate": 7.586824203365557e-06, "loss": 0.0, "step": 4926 }, { "epoch": 0.31752271702004253, "grad_norm": 0.0030391662830343918, "learning_rate": 7.5861081274615114e-06, "loss": 0.0, "step": 4927 }, { "epoch": 0.3175871624669717, "grad_norm": 0.03277884812642534, "learning_rate": 7.585392051557465e-06, "loss": 0.0001, "step": 4928 }, { "epoch": 0.3176516079139009, "grad_norm": 0.03166498928316155, "learning_rate": 7.584675975653419e-06, "loss": 0.0001, "step": 4929 }, { "epoch": 0.31771605336083003, "grad_norm": 0.005663952936868069, "learning_rate": 7.5839598997493735e-06, "loss": 0.0, "step": 4930 }, { "epoch": 0.3177804988077592, "grad_norm": 0.016582148083116855, "learning_rate": 7.583243823845329e-06, "loss": 0.0, "step": 4931 }, { "epoch": 0.3178449442546884, "grad_norm": 0.20244802858863004, "learning_rate": 7.582527747941283e-06, "loss": 0.0004, "step": 4932 }, { "epoch": 0.3179093897016176, "grad_norm": 0.001806635185564558, "learning_rate": 7.581811672037237e-06, "loss": 0.0, "step": 4933 }, { "epoch": 0.31797383514854677, "grad_norm": 1.1378611169584365, "learning_rate": 7.5810955961331915e-06, "loss": 0.0016, "step": 4934 }, { "epoch": 0.31803828059547595, "grad_norm": 0.10410084035311608, "learning_rate": 7.580379520229145e-06, "loss": 0.0002, "step": 4935 }, { "epoch": 0.3181027260424051, "grad_norm": 0.0009001930670387458, "learning_rate": 7.579663444325099e-06, "loss": 0.0, "step": 4936 }, { "epoch": 0.31816717148933427, "grad_norm": 0.00022304137717573243, "learning_rate": 7.578947368421054e-06, "loss": 0.0, "step": 4937 }, { "epoch": 0.31823161693626345, "grad_norm": 0.021512349972738248, "learning_rate": 7.578231292517008e-06, "loss": 0.0, "step": 4938 }, { "epoch": 0.31829606238319264, "grad_norm": 0.10591603729833651, "learning_rate": 7.577515216612961e-06, "loss": 0.0002, "step": 4939 }, { "epoch": 0.3183605078301218, "grad_norm": 0.013542071059994441, "learning_rate": 7.576799140708916e-06, "loss": 0.0001, "step": 4940 }, { "epoch": 0.318424953277051, "grad_norm": 0.00536616140223203, "learning_rate": 7.57608306480487e-06, "loss": 0.0, "step": 4941 }, { "epoch": 0.31848939872398013, "grad_norm": 0.01473934950136891, "learning_rate": 7.575366988900824e-06, "loss": 0.0001, "step": 4942 }, { "epoch": 0.3185538441709093, "grad_norm": 0.02923624998055832, "learning_rate": 7.5746509129967785e-06, "loss": 0.0001, "step": 4943 }, { "epoch": 0.3186182896178385, "grad_norm": 0.00951459181256414, "learning_rate": 7.573934837092732e-06, "loss": 0.0001, "step": 4944 }, { "epoch": 0.3186827350647677, "grad_norm": 0.002520293197353339, "learning_rate": 7.573218761188686e-06, "loss": 0.0, "step": 4945 }, { "epoch": 0.31874718051169687, "grad_norm": 0.0006642405582552634, "learning_rate": 7.5725026852846406e-06, "loss": 0.0, "step": 4946 }, { "epoch": 0.318811625958626, "grad_norm": 0.0023286591485846588, "learning_rate": 7.571786609380595e-06, "loss": 0.0, "step": 4947 }, { "epoch": 0.3188760714055552, "grad_norm": 0.010581924490633903, "learning_rate": 7.571070533476549e-06, "loss": 0.0001, "step": 4948 }, { "epoch": 0.31894051685248437, "grad_norm": 0.5570829541365085, "learning_rate": 7.570354457572503e-06, "loss": 0.0034, "step": 4949 }, { "epoch": 0.31900496229941355, "grad_norm": 0.01401707071364209, "learning_rate": 7.569638381668457e-06, "loss": 0.0, "step": 4950 }, { "epoch": 0.31906940774634274, "grad_norm": 0.0011558668939177993, "learning_rate": 7.568922305764411e-06, "loss": 0.0, "step": 4951 }, { "epoch": 0.3191338531932719, "grad_norm": 0.2016895448269546, "learning_rate": 7.5682062298603655e-06, "loss": 0.0011, "step": 4952 }, { "epoch": 0.31919829864020105, "grad_norm": 1.4378902071553488, "learning_rate": 7.56749015395632e-06, "loss": 0.0022, "step": 4953 }, { "epoch": 0.31926274408713023, "grad_norm": 0.002368113513227609, "learning_rate": 7.566774078052275e-06, "loss": 0.0, "step": 4954 }, { "epoch": 0.3193271895340594, "grad_norm": 0.0015807385536410412, "learning_rate": 7.566058002148228e-06, "loss": 0.0, "step": 4955 }, { "epoch": 0.3193916349809886, "grad_norm": 0.023490105392632896, "learning_rate": 7.565341926244183e-06, "loss": 0.0002, "step": 4956 }, { "epoch": 0.3194560804279178, "grad_norm": 0.003028390952271756, "learning_rate": 7.564625850340137e-06, "loss": 0.0, "step": 4957 }, { "epoch": 0.3195205258748469, "grad_norm": 0.02639448995249658, "learning_rate": 7.563909774436091e-06, "loss": 0.0001, "step": 4958 }, { "epoch": 0.3195849713217761, "grad_norm": 0.0002196733853071634, "learning_rate": 7.563193698532046e-06, "loss": 0.0, "step": 4959 }, { "epoch": 0.3196494167687053, "grad_norm": 0.00525486369614464, "learning_rate": 7.562477622627999e-06, "loss": 0.0, "step": 4960 }, { "epoch": 0.31971386221563447, "grad_norm": 6.02780353327736, "learning_rate": 7.561761546723953e-06, "loss": 0.0112, "step": 4961 }, { "epoch": 0.31977830766256365, "grad_norm": 0.027587832602718406, "learning_rate": 7.561045470819908e-06, "loss": 0.0002, "step": 4962 }, { "epoch": 0.31984275310949284, "grad_norm": 0.0010843918612778584, "learning_rate": 7.560329394915862e-06, "loss": 0.0, "step": 4963 }, { "epoch": 0.31990719855642197, "grad_norm": 0.019770665104542517, "learning_rate": 7.559613319011816e-06, "loss": 0.0001, "step": 4964 }, { "epoch": 0.31997164400335115, "grad_norm": 0.0004921501486983964, "learning_rate": 7.55889724310777e-06, "loss": 0.0, "step": 4965 }, { "epoch": 0.32003608945028034, "grad_norm": 0.06938545379726922, "learning_rate": 7.558181167203724e-06, "loss": 0.0001, "step": 4966 }, { "epoch": 0.3201005348972095, "grad_norm": 0.004667437215269022, "learning_rate": 7.557465091299678e-06, "loss": 0.0, "step": 4967 }, { "epoch": 0.3201649803441387, "grad_norm": 0.014288407805833092, "learning_rate": 7.556749015395633e-06, "loss": 0.0001, "step": 4968 }, { "epoch": 0.32022942579106783, "grad_norm": 0.008945516329945936, "learning_rate": 7.556032939491587e-06, "loss": 0.0001, "step": 4969 }, { "epoch": 0.320293871237997, "grad_norm": 0.00023568888504286596, "learning_rate": 7.55531686358754e-06, "loss": 0.0, "step": 4970 }, { "epoch": 0.3203583166849262, "grad_norm": 0.01571903394353528, "learning_rate": 7.554600787683495e-06, "loss": 0.0001, "step": 4971 }, { "epoch": 0.3204227621318554, "grad_norm": 0.00460776685039601, "learning_rate": 7.553884711779449e-06, "loss": 0.0, "step": 4972 }, { "epoch": 0.32048720757878457, "grad_norm": 0.04010418068407689, "learning_rate": 7.553168635875403e-06, "loss": 0.0001, "step": 4973 }, { "epoch": 0.32055165302571376, "grad_norm": 0.00035000815771177833, "learning_rate": 7.5524525599713576e-06, "loss": 0.0, "step": 4974 }, { "epoch": 0.3206160984726429, "grad_norm": 0.0024140885061416342, "learning_rate": 7.551736484067311e-06, "loss": 0.0, "step": 4975 }, { "epoch": 0.32068054391957207, "grad_norm": 0.007612368659569762, "learning_rate": 7.551020408163265e-06, "loss": 0.0, "step": 4976 }, { "epoch": 0.32074498936650125, "grad_norm": 0.001607101033468048, "learning_rate": 7.55030433225922e-06, "loss": 0.0, "step": 4977 }, { "epoch": 0.32080943481343044, "grad_norm": 0.0698845675905335, "learning_rate": 7.549588256355175e-06, "loss": 0.0001, "step": 4978 }, { "epoch": 0.3208738802603596, "grad_norm": 0.00011869231645262834, "learning_rate": 7.548872180451129e-06, "loss": 0.0, "step": 4979 }, { "epoch": 0.3209383257072888, "grad_norm": 0.002316536849549659, "learning_rate": 7.548156104547083e-06, "loss": 0.0, "step": 4980 }, { "epoch": 0.32100277115421794, "grad_norm": 0.2341897152622086, "learning_rate": 7.547440028643037e-06, "loss": 0.0015, "step": 4981 }, { "epoch": 0.3210672166011471, "grad_norm": 0.28748342580289754, "learning_rate": 7.546723952738991e-06, "loss": 0.0002, "step": 4982 }, { "epoch": 0.3211316620480763, "grad_norm": 0.0005328684311772538, "learning_rate": 7.546007876834945e-06, "loss": 0.0, "step": 4983 }, { "epoch": 0.3211961074950055, "grad_norm": 0.02484436391275856, "learning_rate": 7.5452918009309e-06, "loss": 0.0001, "step": 4984 }, { "epoch": 0.3212605529419347, "grad_norm": 0.008556152524764708, "learning_rate": 7.544575725026854e-06, "loss": 0.0001, "step": 4985 }, { "epoch": 0.3213249983888638, "grad_norm": 0.038374263358831415, "learning_rate": 7.5438596491228074e-06, "loss": 0.0003, "step": 4986 }, { "epoch": 0.321389443835793, "grad_norm": 0.0023323979630988576, "learning_rate": 7.543143573218762e-06, "loss": 0.0, "step": 4987 }, { "epoch": 0.32145388928272217, "grad_norm": 0.0018241033879360137, "learning_rate": 7.542427497314716e-06, "loss": 0.0, "step": 4988 }, { "epoch": 0.32151833472965136, "grad_norm": 0.014841046251769881, "learning_rate": 7.54171142141067e-06, "loss": 0.0001, "step": 4989 }, { "epoch": 0.32158278017658054, "grad_norm": 0.00044722802438275366, "learning_rate": 7.540995345506625e-06, "loss": 0.0, "step": 4990 }, { "epoch": 0.3216472256235097, "grad_norm": 0.012483224214803168, "learning_rate": 7.540279269602578e-06, "loss": 0.0, "step": 4991 }, { "epoch": 0.32171167107043885, "grad_norm": 0.0073138178050027175, "learning_rate": 7.539563193698532e-06, "loss": 0.0, "step": 4992 }, { "epoch": 0.32177611651736804, "grad_norm": 0.1553142062233551, "learning_rate": 7.538847117794487e-06, "loss": 0.0002, "step": 4993 }, { "epoch": 0.3218405619642972, "grad_norm": 0.0007977184599504469, "learning_rate": 7.538131041890441e-06, "loss": 0.0, "step": 4994 }, { "epoch": 0.3219050074112264, "grad_norm": 0.0020614767277034192, "learning_rate": 7.537414965986395e-06, "loss": 0.0, "step": 4995 }, { "epoch": 0.3219694528581556, "grad_norm": 0.00481232716002087, "learning_rate": 7.536698890082349e-06, "loss": 0.0, "step": 4996 }, { "epoch": 0.3220338983050847, "grad_norm": 0.004769241063000902, "learning_rate": 7.535982814178303e-06, "loss": 0.0001, "step": 4997 }, { "epoch": 0.3220983437520139, "grad_norm": 0.000924509781990204, "learning_rate": 7.535266738274257e-06, "loss": 0.0, "step": 4998 }, { "epoch": 0.3221627891989431, "grad_norm": 0.002373089335570934, "learning_rate": 7.534550662370212e-06, "loss": 0.0, "step": 4999 }, { "epoch": 0.3222272346458723, "grad_norm": 2.5394377915254367, "learning_rate": 7.533834586466165e-06, "loss": 0.0116, "step": 5000 }, { "epoch": 0.32229168009280146, "grad_norm": 2.836771188312746, "learning_rate": 7.533118510562121e-06, "loss": 0.0571, "step": 5001 }, { "epoch": 0.32235612553973064, "grad_norm": 0.0867760639624333, "learning_rate": 7.5324024346580745e-06, "loss": 0.0004, "step": 5002 }, { "epoch": 0.32242057098665977, "grad_norm": 0.009055324801599299, "learning_rate": 7.531686358754029e-06, "loss": 0.0001, "step": 5003 }, { "epoch": 0.32248501643358896, "grad_norm": 0.0007850045675465759, "learning_rate": 7.530970282849983e-06, "loss": 0.0, "step": 5004 }, { "epoch": 0.32254946188051814, "grad_norm": 0.0054402043559637694, "learning_rate": 7.530254206945937e-06, "loss": 0.0001, "step": 5005 }, { "epoch": 0.3226139073274473, "grad_norm": 0.0003870119441539209, "learning_rate": 7.529538131041892e-06, "loss": 0.0, "step": 5006 }, { "epoch": 0.3226783527743765, "grad_norm": 0.002394564450146463, "learning_rate": 7.528822055137845e-06, "loss": 0.0, "step": 5007 }, { "epoch": 0.32274279822130564, "grad_norm": 0.0009554164590431097, "learning_rate": 7.5281059792337995e-06, "loss": 0.0, "step": 5008 }, { "epoch": 0.3228072436682348, "grad_norm": 0.018775061036906286, "learning_rate": 7.527389903329754e-06, "loss": 0.0001, "step": 5009 }, { "epoch": 0.322871689115164, "grad_norm": 0.0004067308282074071, "learning_rate": 7.526673827425708e-06, "loss": 0.0, "step": 5010 }, { "epoch": 0.3229361345620932, "grad_norm": 0.0022833677798131395, "learning_rate": 7.525957751521662e-06, "loss": 0.0, "step": 5011 }, { "epoch": 0.3230005800090224, "grad_norm": 0.0034419373129303036, "learning_rate": 7.525241675617616e-06, "loss": 0.0, "step": 5012 }, { "epoch": 0.32306502545595156, "grad_norm": 0.0013269891500375182, "learning_rate": 7.52452559971357e-06, "loss": 0.0, "step": 5013 }, { "epoch": 0.3231294709028807, "grad_norm": 0.005102314456417867, "learning_rate": 7.523809523809524e-06, "loss": 0.0, "step": 5014 }, { "epoch": 0.3231939163498099, "grad_norm": 0.0011584206982300578, "learning_rate": 7.523093447905479e-06, "loss": 0.0, "step": 5015 }, { "epoch": 0.32325836179673906, "grad_norm": 0.004096714247036613, "learning_rate": 7.522377372001432e-06, "loss": 0.0, "step": 5016 }, { "epoch": 0.32332280724366824, "grad_norm": 0.02179641642182101, "learning_rate": 7.5216612960973865e-06, "loss": 0.0001, "step": 5017 }, { "epoch": 0.3233872526905974, "grad_norm": 0.0008365472971686693, "learning_rate": 7.520945220193341e-06, "loss": 0.0, "step": 5018 }, { "epoch": 0.3234516981375266, "grad_norm": 0.06371238259548284, "learning_rate": 7.520229144289295e-06, "loss": 0.0001, "step": 5019 }, { "epoch": 0.32351614358445574, "grad_norm": 0.009985623889348058, "learning_rate": 7.519513068385249e-06, "loss": 0.0, "step": 5020 }, { "epoch": 0.3235805890313849, "grad_norm": 0.007412286860036669, "learning_rate": 7.518796992481203e-06, "loss": 0.0001, "step": 5021 }, { "epoch": 0.3236450344783141, "grad_norm": 1.3540490710721447, "learning_rate": 7.518080916577157e-06, "loss": 0.0054, "step": 5022 }, { "epoch": 0.3237094799252433, "grad_norm": 0.000858304434348953, "learning_rate": 7.517364840673111e-06, "loss": 0.0, "step": 5023 }, { "epoch": 0.3237739253721725, "grad_norm": 0.0041181096965707915, "learning_rate": 7.5166487647690666e-06, "loss": 0.0, "step": 5024 }, { "epoch": 0.3238383708191016, "grad_norm": 0.0003157757437743913, "learning_rate": 7.515932688865021e-06, "loss": 0.0, "step": 5025 }, { "epoch": 0.3239028162660308, "grad_norm": 0.06434296094244776, "learning_rate": 7.515216612960975e-06, "loss": 0.0003, "step": 5026 }, { "epoch": 0.32396726171296, "grad_norm": 0.3501482917633669, "learning_rate": 7.5145005370569295e-06, "loss": 0.0012, "step": 5027 }, { "epoch": 0.32403170715988916, "grad_norm": 0.003286186092908012, "learning_rate": 7.513784461152883e-06, "loss": 0.0, "step": 5028 }, { "epoch": 0.32409615260681834, "grad_norm": 0.006841491514237018, "learning_rate": 7.513068385248837e-06, "loss": 0.0, "step": 5029 }, { "epoch": 0.32416059805374753, "grad_norm": 0.0026199554590787223, "learning_rate": 7.5123523093447915e-06, "loss": 0.0, "step": 5030 }, { "epoch": 0.32422504350067666, "grad_norm": 0.002010553894782923, "learning_rate": 7.511636233440746e-06, "loss": 0.0, "step": 5031 }, { "epoch": 0.32428948894760584, "grad_norm": 0.01111582230251598, "learning_rate": 7.510920157536699e-06, "loss": 0.0, "step": 5032 }, { "epoch": 0.324353934394535, "grad_norm": 0.012234168882052655, "learning_rate": 7.5102040816326536e-06, "loss": 0.0, "step": 5033 }, { "epoch": 0.3244183798414642, "grad_norm": 0.24919705042346788, "learning_rate": 7.509488005728608e-06, "loss": 0.0018, "step": 5034 }, { "epoch": 0.3244828252883934, "grad_norm": 0.0015482715286001473, "learning_rate": 7.508771929824562e-06, "loss": 0.0, "step": 5035 }, { "epoch": 0.3245472707353225, "grad_norm": 0.004696018029830818, "learning_rate": 7.5080558539205164e-06, "loss": 0.0, "step": 5036 }, { "epoch": 0.3246117161822517, "grad_norm": 0.014119543213684357, "learning_rate": 7.50733977801647e-06, "loss": 0.0, "step": 5037 }, { "epoch": 0.3246761616291809, "grad_norm": 0.3867381890545684, "learning_rate": 7.506623702112424e-06, "loss": 0.0025, "step": 5038 }, { "epoch": 0.3247406070761101, "grad_norm": 0.00042987711144992877, "learning_rate": 7.5059076262083785e-06, "loss": 0.0, "step": 5039 }, { "epoch": 0.32480505252303926, "grad_norm": 0.2663883013006666, "learning_rate": 7.505191550304333e-06, "loss": 0.0002, "step": 5040 }, { "epoch": 0.32486949796996845, "grad_norm": 0.0004538031013607848, "learning_rate": 7.504475474400287e-06, "loss": 0.0, "step": 5041 }, { "epoch": 0.3249339434168976, "grad_norm": 0.001688240594975126, "learning_rate": 7.5037593984962405e-06, "loss": 0.0, "step": 5042 }, { "epoch": 0.32499838886382676, "grad_norm": 0.0006231698876898269, "learning_rate": 7.503043322592195e-06, "loss": 0.0, "step": 5043 }, { "epoch": 0.32506283431075594, "grad_norm": 0.001326850866979831, "learning_rate": 7.502327246688149e-06, "loss": 0.0, "step": 5044 }, { "epoch": 0.32512727975768513, "grad_norm": 0.09123642767439222, "learning_rate": 7.5016111707841034e-06, "loss": 0.0003, "step": 5045 }, { "epoch": 0.3251917252046143, "grad_norm": 0.00031140489873245536, "learning_rate": 7.500895094880058e-06, "loss": 0.0, "step": 5046 }, { "epoch": 0.32525617065154344, "grad_norm": 0.010423918369235648, "learning_rate": 7.500179018976011e-06, "loss": 0.0001, "step": 5047 }, { "epoch": 0.3253206160984726, "grad_norm": 0.027732214205477675, "learning_rate": 7.499462943071967e-06, "loss": 0.0001, "step": 5048 }, { "epoch": 0.3253850615454018, "grad_norm": 0.0008578557245992876, "learning_rate": 7.498746867167921e-06, "loss": 0.0, "step": 5049 }, { "epoch": 0.325449506992331, "grad_norm": 0.015105272515006055, "learning_rate": 7.498030791263875e-06, "loss": 0.0001, "step": 5050 }, { "epoch": 0.3255139524392602, "grad_norm": 0.3200695232774652, "learning_rate": 7.497314715359829e-06, "loss": 0.0018, "step": 5051 }, { "epoch": 0.32557839788618936, "grad_norm": 0.06262675857097338, "learning_rate": 7.4965986394557835e-06, "loss": 0.0001, "step": 5052 }, { "epoch": 0.3256428433331185, "grad_norm": 0.008279700894868208, "learning_rate": 7.495882563551737e-06, "loss": 0.0, "step": 5053 }, { "epoch": 0.3257072887800477, "grad_norm": 0.01809988351456632, "learning_rate": 7.495166487647691e-06, "loss": 0.0001, "step": 5054 }, { "epoch": 0.32577173422697686, "grad_norm": 0.00032587838609959163, "learning_rate": 7.494450411743646e-06, "loss": 0.0, "step": 5055 }, { "epoch": 0.32583617967390605, "grad_norm": 0.16255369537207687, "learning_rate": 7.4937343358396e-06, "loss": 0.0021, "step": 5056 }, { "epoch": 0.32590062512083523, "grad_norm": 0.008922854562315093, "learning_rate": 7.493018259935554e-06, "loss": 0.0, "step": 5057 }, { "epoch": 0.3259650705677644, "grad_norm": 0.002154537714572031, "learning_rate": 7.492302184031508e-06, "loss": 0.0, "step": 5058 }, { "epoch": 0.32602951601469354, "grad_norm": 0.0004839898917475037, "learning_rate": 7.491586108127462e-06, "loss": 0.0, "step": 5059 }, { "epoch": 0.32609396146162273, "grad_norm": 0.0004548610049905596, "learning_rate": 7.490870032223416e-06, "loss": 0.0, "step": 5060 }, { "epoch": 0.3261584069085519, "grad_norm": 0.44944039557347387, "learning_rate": 7.4901539563193705e-06, "loss": 0.0026, "step": 5061 }, { "epoch": 0.3262228523554811, "grad_norm": 0.00016835646080685742, "learning_rate": 7.489437880415325e-06, "loss": 0.0, "step": 5062 }, { "epoch": 0.3262872978024103, "grad_norm": 0.29209044726483824, "learning_rate": 7.488721804511278e-06, "loss": 0.0009, "step": 5063 }, { "epoch": 0.3263517432493394, "grad_norm": 0.0001249842603296567, "learning_rate": 7.4880057286072326e-06, "loss": 0.0, "step": 5064 }, { "epoch": 0.3264161886962686, "grad_norm": 0.011162206390915428, "learning_rate": 7.487289652703187e-06, "loss": 0.0, "step": 5065 }, { "epoch": 0.3264806341431978, "grad_norm": 0.005577277546888436, "learning_rate": 7.486573576799141e-06, "loss": 0.0001, "step": 5066 }, { "epoch": 0.32654507959012696, "grad_norm": 0.008560459027039776, "learning_rate": 7.4858575008950955e-06, "loss": 0.0, "step": 5067 }, { "epoch": 0.32660952503705615, "grad_norm": 0.22203547121366796, "learning_rate": 7.485141424991049e-06, "loss": 0.0013, "step": 5068 }, { "epoch": 0.32667397048398533, "grad_norm": 0.1355473815182943, "learning_rate": 7.484425349087003e-06, "loss": 0.0019, "step": 5069 }, { "epoch": 0.32673841593091446, "grad_norm": 0.0003878992249302429, "learning_rate": 7.4837092731829575e-06, "loss": 0.0, "step": 5070 }, { "epoch": 0.32680286137784365, "grad_norm": 0.0011205206516367939, "learning_rate": 7.482993197278913e-06, "loss": 0.0, "step": 5071 }, { "epoch": 0.32686730682477283, "grad_norm": 0.00033044966655028444, "learning_rate": 7.482277121374867e-06, "loss": 0.0, "step": 5072 }, { "epoch": 0.326931752271702, "grad_norm": 0.02445801950067175, "learning_rate": 7.481561045470821e-06, "loss": 0.0001, "step": 5073 }, { "epoch": 0.3269961977186312, "grad_norm": 0.012846676525802831, "learning_rate": 7.480844969566775e-06, "loss": 0.0, "step": 5074 }, { "epoch": 0.3270606431655603, "grad_norm": 0.006191962266799229, "learning_rate": 7.480128893662729e-06, "loss": 0.0, "step": 5075 }, { "epoch": 0.3271250886124895, "grad_norm": 0.0037964312015699485, "learning_rate": 7.479412817758683e-06, "loss": 0.0, "step": 5076 }, { "epoch": 0.3271895340594187, "grad_norm": 0.0003184726203052082, "learning_rate": 7.478696741854638e-06, "loss": 0.0, "step": 5077 }, { "epoch": 0.3272539795063479, "grad_norm": 0.47123552026941196, "learning_rate": 7.477980665950592e-06, "loss": 0.004, "step": 5078 }, { "epoch": 0.32731842495327707, "grad_norm": 0.35992211698106236, "learning_rate": 7.477264590046545e-06, "loss": 0.0007, "step": 5079 }, { "epoch": 0.32738287040020625, "grad_norm": 0.18303458304005787, "learning_rate": 7.4765485141425e-06, "loss": 0.0002, "step": 5080 }, { "epoch": 0.3274473158471354, "grad_norm": 0.005515852076379496, "learning_rate": 7.475832438238454e-06, "loss": 0.0, "step": 5081 }, { "epoch": 0.32751176129406456, "grad_norm": 0.2568020526304833, "learning_rate": 7.475116362334408e-06, "loss": 0.0008, "step": 5082 }, { "epoch": 0.32757620674099375, "grad_norm": 0.003086437234108956, "learning_rate": 7.4744002864303626e-06, "loss": 0.0, "step": 5083 }, { "epoch": 0.32764065218792293, "grad_norm": 1.3171834943355232, "learning_rate": 7.473684210526316e-06, "loss": 0.0013, "step": 5084 }, { "epoch": 0.3277050976348521, "grad_norm": 0.033313737024242854, "learning_rate": 7.47296813462227e-06, "loss": 0.0003, "step": 5085 }, { "epoch": 0.32776954308178124, "grad_norm": 0.7567478842426889, "learning_rate": 7.472252058718225e-06, "loss": 0.0061, "step": 5086 }, { "epoch": 0.32783398852871043, "grad_norm": 0.0049614953742675985, "learning_rate": 7.471535982814179e-06, "loss": 0.0, "step": 5087 }, { "epoch": 0.3278984339756396, "grad_norm": 0.025865455721768113, "learning_rate": 7.470819906910133e-06, "loss": 0.0001, "step": 5088 }, { "epoch": 0.3279628794225688, "grad_norm": 0.06708022955203878, "learning_rate": 7.470103831006087e-06, "loss": 0.0005, "step": 5089 }, { "epoch": 0.328027324869498, "grad_norm": 0.11167039041291603, "learning_rate": 7.469387755102041e-06, "loss": 0.001, "step": 5090 }, { "epoch": 0.32809177031642717, "grad_norm": 0.07170793525024977, "learning_rate": 7.468671679197995e-06, "loss": 0.0001, "step": 5091 }, { "epoch": 0.3281562157633563, "grad_norm": 0.7082797245233898, "learning_rate": 7.4679556032939496e-06, "loss": 0.0016, "step": 5092 }, { "epoch": 0.3282206612102855, "grad_norm": 0.006456634595156348, "learning_rate": 7.467239527389904e-06, "loss": 0.0, "step": 5093 }, { "epoch": 0.32828510665721466, "grad_norm": 0.01645892325423116, "learning_rate": 7.466523451485859e-06, "loss": 0.0003, "step": 5094 }, { "epoch": 0.32834955210414385, "grad_norm": 0.7233672896851489, "learning_rate": 7.4658073755818124e-06, "loss": 0.0031, "step": 5095 }, { "epoch": 0.32841399755107303, "grad_norm": 0.1790257063440346, "learning_rate": 7.465091299677767e-06, "loss": 0.0004, "step": 5096 }, { "epoch": 0.3284784429980022, "grad_norm": 0.004959311847834531, "learning_rate": 7.464375223773721e-06, "loss": 0.0, "step": 5097 }, { "epoch": 0.32854288844493135, "grad_norm": 0.33592448098438615, "learning_rate": 7.463659147869675e-06, "loss": 0.0036, "step": 5098 }, { "epoch": 0.32860733389186053, "grad_norm": 0.006044847333106452, "learning_rate": 7.46294307196563e-06, "loss": 0.0, "step": 5099 }, { "epoch": 0.3286717793387897, "grad_norm": 0.015346268232809247, "learning_rate": 7.462226996061583e-06, "loss": 0.0, "step": 5100 }, { "epoch": 0.3287362247857189, "grad_norm": 0.03213329618545794, "learning_rate": 7.461510920157537e-06, "loss": 0.0, "step": 5101 }, { "epoch": 0.3288006702326481, "grad_norm": 0.0006729679357930884, "learning_rate": 7.460794844253492e-06, "loss": 0.0, "step": 5102 }, { "epoch": 0.3288651156795772, "grad_norm": 0.3873201952901252, "learning_rate": 7.460078768349446e-06, "loss": 0.0025, "step": 5103 }, { "epoch": 0.3289295611265064, "grad_norm": 0.0012658050799610712, "learning_rate": 7.4593626924454e-06, "loss": 0.0, "step": 5104 }, { "epoch": 0.3289940065734356, "grad_norm": 0.1685233110152655, "learning_rate": 7.458646616541354e-06, "loss": 0.0004, "step": 5105 }, { "epoch": 0.32905845202036477, "grad_norm": 0.003145091526784752, "learning_rate": 7.457930540637308e-06, "loss": 0.0, "step": 5106 }, { "epoch": 0.32912289746729395, "grad_norm": 0.006677561097412213, "learning_rate": 7.457214464733262e-06, "loss": 0.0, "step": 5107 }, { "epoch": 0.32918734291422314, "grad_norm": 0.008704954859180948, "learning_rate": 7.456498388829217e-06, "loss": 0.0, "step": 5108 }, { "epoch": 0.32925178836115226, "grad_norm": 0.020684504992077225, "learning_rate": 7.455782312925171e-06, "loss": 0.0, "step": 5109 }, { "epoch": 0.32931623380808145, "grad_norm": 0.017915273181646683, "learning_rate": 7.455066237021124e-06, "loss": 0.0001, "step": 5110 }, { "epoch": 0.32938067925501063, "grad_norm": 0.0006413246172499555, "learning_rate": 7.454350161117079e-06, "loss": 0.0, "step": 5111 }, { "epoch": 0.3294451247019398, "grad_norm": 0.016389241157989606, "learning_rate": 7.453634085213033e-06, "loss": 0.0, "step": 5112 }, { "epoch": 0.329509570148869, "grad_norm": 0.04613150357756326, "learning_rate": 7.452918009308987e-06, "loss": 0.0002, "step": 5113 }, { "epoch": 0.32957401559579813, "grad_norm": 0.006047588655278598, "learning_rate": 7.452201933404941e-06, "loss": 0.0, "step": 5114 }, { "epoch": 0.3296384610427273, "grad_norm": 0.11379492971456501, "learning_rate": 7.451485857500895e-06, "loss": 0.0002, "step": 5115 }, { "epoch": 0.3297029064896565, "grad_norm": 0.18467343189665436, "learning_rate": 7.450769781596849e-06, "loss": 0.0017, "step": 5116 }, { "epoch": 0.3297673519365857, "grad_norm": 0.00666060152594704, "learning_rate": 7.450053705692804e-06, "loss": 0.0, "step": 5117 }, { "epoch": 0.32983179738351487, "grad_norm": 0.15716715098903433, "learning_rate": 7.449337629788759e-06, "loss": 0.0002, "step": 5118 }, { "epoch": 0.32989624283044405, "grad_norm": 0.03158307087649183, "learning_rate": 7.448621553884713e-06, "loss": 0.0001, "step": 5119 }, { "epoch": 0.3299606882773732, "grad_norm": 6.076223041511504, "learning_rate": 7.447905477980667e-06, "loss": 0.0233, "step": 5120 }, { "epoch": 0.33002513372430237, "grad_norm": 0.04275050059957507, "learning_rate": 7.447189402076621e-06, "loss": 0.0001, "step": 5121 }, { "epoch": 0.33008957917123155, "grad_norm": 0.017601381917265038, "learning_rate": 7.446473326172575e-06, "loss": 0.0001, "step": 5122 }, { "epoch": 0.33015402461816074, "grad_norm": 0.0008449752226208954, "learning_rate": 7.445757250268529e-06, "loss": 0.0, "step": 5123 }, { "epoch": 0.3302184700650899, "grad_norm": 0.1010258416027035, "learning_rate": 7.445041174364484e-06, "loss": 0.0014, "step": 5124 }, { "epoch": 0.33028291551201905, "grad_norm": 0.0027437871382046137, "learning_rate": 7.444325098460438e-06, "loss": 0.0, "step": 5125 }, { "epoch": 0.33034736095894823, "grad_norm": 0.7007602556671023, "learning_rate": 7.4436090225563915e-06, "loss": 0.0044, "step": 5126 }, { "epoch": 0.3304118064058774, "grad_norm": 0.06670512931405594, "learning_rate": 7.442892946652346e-06, "loss": 0.0002, "step": 5127 }, { "epoch": 0.3304762518528066, "grad_norm": 0.00035463289021841213, "learning_rate": 7.4421768707483e-06, "loss": 0.0, "step": 5128 }, { "epoch": 0.3305406972997358, "grad_norm": 0.011838379957474788, "learning_rate": 7.441460794844254e-06, "loss": 0.0, "step": 5129 }, { "epoch": 0.33060514274666497, "grad_norm": 0.10076242075093453, "learning_rate": 7.440744718940208e-06, "loss": 0.0001, "step": 5130 }, { "epoch": 0.3306695881935941, "grad_norm": 0.03547042955610553, "learning_rate": 7.440028643036162e-06, "loss": 0.0, "step": 5131 }, { "epoch": 0.3307340336405233, "grad_norm": 0.02167577114795024, "learning_rate": 7.439312567132116e-06, "loss": 0.0, "step": 5132 }, { "epoch": 0.33079847908745247, "grad_norm": 0.17646025275384114, "learning_rate": 7.438596491228071e-06, "loss": 0.0005, "step": 5133 }, { "epoch": 0.33086292453438165, "grad_norm": 0.2529165492824762, "learning_rate": 7.437880415324025e-06, "loss": 0.0008, "step": 5134 }, { "epoch": 0.33092736998131084, "grad_norm": 0.020512357560091968, "learning_rate": 7.4371643394199785e-06, "loss": 0.0001, "step": 5135 }, { "epoch": 0.33099181542824, "grad_norm": 0.48623961290478873, "learning_rate": 7.436448263515933e-06, "loss": 0.0024, "step": 5136 }, { "epoch": 0.33105626087516915, "grad_norm": 0.011717598828756112, "learning_rate": 7.435732187611887e-06, "loss": 0.0, "step": 5137 }, { "epoch": 0.33112070632209833, "grad_norm": 0.10853024365035156, "learning_rate": 7.435016111707841e-06, "loss": 0.0016, "step": 5138 }, { "epoch": 0.3311851517690275, "grad_norm": 0.011491480938313548, "learning_rate": 7.434300035803796e-06, "loss": 0.0001, "step": 5139 }, { "epoch": 0.3312495972159567, "grad_norm": 0.018680412427338224, "learning_rate": 7.433583959899749e-06, "loss": 0.0001, "step": 5140 }, { "epoch": 0.3313140426628859, "grad_norm": 0.0033161842348454814, "learning_rate": 7.432867883995705e-06, "loss": 0.0, "step": 5141 }, { "epoch": 0.331378488109815, "grad_norm": 0.00034673562412390456, "learning_rate": 7.4321518080916586e-06, "loss": 0.0, "step": 5142 }, { "epoch": 0.3314429335567442, "grad_norm": 0.0029976854056438, "learning_rate": 7.431435732187613e-06, "loss": 0.0, "step": 5143 }, { "epoch": 0.3315073790036734, "grad_norm": 0.0018374341236920763, "learning_rate": 7.430719656283567e-06, "loss": 0.0, "step": 5144 }, { "epoch": 0.33157182445060257, "grad_norm": 0.003731978135310744, "learning_rate": 7.4300035803795215e-06, "loss": 0.0, "step": 5145 }, { "epoch": 0.33163626989753175, "grad_norm": 0.01021389306752017, "learning_rate": 7.429287504475475e-06, "loss": 0.0001, "step": 5146 }, { "epoch": 0.33170071534446094, "grad_norm": 0.002822017216866906, "learning_rate": 7.428571428571429e-06, "loss": 0.0, "step": 5147 }, { "epoch": 0.33176516079139007, "grad_norm": 0.0014409824839609782, "learning_rate": 7.4278553526673835e-06, "loss": 0.0, "step": 5148 }, { "epoch": 0.33182960623831925, "grad_norm": 0.03521087826211135, "learning_rate": 7.427139276763338e-06, "loss": 0.0, "step": 5149 }, { "epoch": 0.33189405168524844, "grad_norm": 0.0005318137644412104, "learning_rate": 7.426423200859292e-06, "loss": 0.0, "step": 5150 }, { "epoch": 0.3319584971321776, "grad_norm": 0.001292276645488789, "learning_rate": 7.4257071249552456e-06, "loss": 0.0, "step": 5151 }, { "epoch": 0.3320229425791068, "grad_norm": 0.1877081827917423, "learning_rate": 7.4249910490512e-06, "loss": 0.0018, "step": 5152 }, { "epoch": 0.33208738802603593, "grad_norm": 0.0013139168940337887, "learning_rate": 7.424274973147154e-06, "loss": 0.0, "step": 5153 }, { "epoch": 0.3321518334729651, "grad_norm": 0.0013585570143822931, "learning_rate": 7.4235588972431084e-06, "loss": 0.0, "step": 5154 }, { "epoch": 0.3322162789198943, "grad_norm": 0.01602709453277668, "learning_rate": 7.422842821339063e-06, "loss": 0.0, "step": 5155 }, { "epoch": 0.3322807243668235, "grad_norm": 0.17357639773672925, "learning_rate": 7.422126745435016e-06, "loss": 0.0008, "step": 5156 }, { "epoch": 0.33234516981375267, "grad_norm": 0.527027119841736, "learning_rate": 7.4214106695309705e-06, "loss": 0.0079, "step": 5157 }, { "epoch": 0.33240961526068186, "grad_norm": 0.1942045831769039, "learning_rate": 7.420694593626925e-06, "loss": 0.0004, "step": 5158 }, { "epoch": 0.332474060707611, "grad_norm": 0.001252549471896302, "learning_rate": 7.419978517722879e-06, "loss": 0.0, "step": 5159 }, { "epoch": 0.33253850615454017, "grad_norm": 0.004385940819667968, "learning_rate": 7.419262441818833e-06, "loss": 0.0, "step": 5160 }, { "epoch": 0.33260295160146935, "grad_norm": 0.0018455546544822695, "learning_rate": 7.418546365914787e-06, "loss": 0.0, "step": 5161 }, { "epoch": 0.33266739704839854, "grad_norm": 0.0013265551464794725, "learning_rate": 7.417830290010741e-06, "loss": 0.0, "step": 5162 }, { "epoch": 0.3327318424953277, "grad_norm": 0.0005334555103447912, "learning_rate": 7.4171142141066954e-06, "loss": 0.0, "step": 5163 }, { "epoch": 0.33279628794225685, "grad_norm": 0.0003283532295627354, "learning_rate": 7.41639813820265e-06, "loss": 0.0, "step": 5164 }, { "epoch": 0.33286073338918604, "grad_norm": 0.05352444132349498, "learning_rate": 7.415682062298605e-06, "loss": 0.0001, "step": 5165 }, { "epoch": 0.3329251788361152, "grad_norm": 0.008953477244246817, "learning_rate": 7.414965986394559e-06, "loss": 0.0, "step": 5166 }, { "epoch": 0.3329896242830444, "grad_norm": 0.14708287994672403, "learning_rate": 7.414249910490513e-06, "loss": 0.0003, "step": 5167 }, { "epoch": 0.3330540697299736, "grad_norm": 0.0017898316957946575, "learning_rate": 7.413533834586467e-06, "loss": 0.0, "step": 5168 }, { "epoch": 0.3331185151769028, "grad_norm": 0.019036091577501586, "learning_rate": 7.412817758682421e-06, "loss": 0.0001, "step": 5169 }, { "epoch": 0.3331829606238319, "grad_norm": 0.0005750557774482397, "learning_rate": 7.4121016827783755e-06, "loss": 0.0, "step": 5170 }, { "epoch": 0.3332474060707611, "grad_norm": 0.00792877003601551, "learning_rate": 7.41138560687433e-06, "loss": 0.0, "step": 5171 }, { "epoch": 0.33331185151769027, "grad_norm": 0.46352720952438115, "learning_rate": 7.410669530970283e-06, "loss": 0.0031, "step": 5172 }, { "epoch": 0.33337629696461946, "grad_norm": 0.04526218280174097, "learning_rate": 7.409953455066238e-06, "loss": 0.0001, "step": 5173 }, { "epoch": 0.33344074241154864, "grad_norm": 0.00010384908638211477, "learning_rate": 7.409237379162192e-06, "loss": 0.0, "step": 5174 }, { "epoch": 0.3335051878584778, "grad_norm": 0.0001965139918647116, "learning_rate": 7.408521303258146e-06, "loss": 0.0, "step": 5175 }, { "epoch": 0.33356963330540695, "grad_norm": 0.002679323084956987, "learning_rate": 7.4078052273541005e-06, "loss": 0.0, "step": 5176 }, { "epoch": 0.33363407875233614, "grad_norm": 0.0040892885241793665, "learning_rate": 7.407089151450054e-06, "loss": 0.0, "step": 5177 }, { "epoch": 0.3336985241992653, "grad_norm": 0.004283341959280961, "learning_rate": 7.406373075546008e-06, "loss": 0.0, "step": 5178 }, { "epoch": 0.3337629696461945, "grad_norm": 0.010563679890308148, "learning_rate": 7.4056569996419625e-06, "loss": 0.0001, "step": 5179 }, { "epoch": 0.3338274150931237, "grad_norm": 0.014411587506700527, "learning_rate": 7.404940923737917e-06, "loss": 0.0, "step": 5180 }, { "epoch": 0.3338918605400528, "grad_norm": 0.002117332996749874, "learning_rate": 7.404224847833871e-06, "loss": 0.0, "step": 5181 }, { "epoch": 0.333956305986982, "grad_norm": 0.0051531902656213316, "learning_rate": 7.4035087719298246e-06, "loss": 0.0, "step": 5182 }, { "epoch": 0.3340207514339112, "grad_norm": 0.008657921238361855, "learning_rate": 7.402792696025779e-06, "loss": 0.0, "step": 5183 }, { "epoch": 0.3340851968808404, "grad_norm": 0.08266300786287209, "learning_rate": 7.402076620121733e-06, "loss": 0.0001, "step": 5184 }, { "epoch": 0.33414964232776956, "grad_norm": 0.053964652856025946, "learning_rate": 7.4013605442176875e-06, "loss": 0.0001, "step": 5185 }, { "epoch": 0.33421408777469874, "grad_norm": 0.00027337132137712447, "learning_rate": 7.400644468313642e-06, "loss": 0.0, "step": 5186 }, { "epoch": 0.33427853322162787, "grad_norm": 0.017739007588697363, "learning_rate": 7.399928392409595e-06, "loss": 0.0001, "step": 5187 }, { "epoch": 0.33434297866855706, "grad_norm": 0.10516118308706995, "learning_rate": 7.39921231650555e-06, "loss": 0.0001, "step": 5188 }, { "epoch": 0.33440742411548624, "grad_norm": 0.0012310758511546937, "learning_rate": 7.398496240601505e-06, "loss": 0.0, "step": 5189 }, { "epoch": 0.3344718695624154, "grad_norm": 0.0024450255738462916, "learning_rate": 7.397780164697459e-06, "loss": 0.0, "step": 5190 }, { "epoch": 0.3345363150093446, "grad_norm": 0.008509604235843085, "learning_rate": 7.397064088793413e-06, "loss": 0.0001, "step": 5191 }, { "epoch": 0.33460076045627374, "grad_norm": 0.016462490353666547, "learning_rate": 7.3963480128893676e-06, "loss": 0.0002, "step": 5192 }, { "epoch": 0.3346652059032029, "grad_norm": 0.001184509139402617, "learning_rate": 7.395631936985321e-06, "loss": 0.0, "step": 5193 }, { "epoch": 0.3347296513501321, "grad_norm": 0.0011018125580738482, "learning_rate": 7.394915861081275e-06, "loss": 0.0, "step": 5194 }, { "epoch": 0.3347940967970613, "grad_norm": 0.001489303290882407, "learning_rate": 7.39419978517723e-06, "loss": 0.0, "step": 5195 }, { "epoch": 0.3348585422439905, "grad_norm": 0.0018044500319175656, "learning_rate": 7.393483709273184e-06, "loss": 0.0, "step": 5196 }, { "epoch": 0.33492298769091966, "grad_norm": 0.17798532857531452, "learning_rate": 7.392767633369138e-06, "loss": 0.0001, "step": 5197 }, { "epoch": 0.3349874331378488, "grad_norm": 0.023186446715734367, "learning_rate": 7.392051557465092e-06, "loss": 0.0, "step": 5198 }, { "epoch": 0.335051878584778, "grad_norm": 0.14059826808740464, "learning_rate": 7.391335481561046e-06, "loss": 0.0005, "step": 5199 }, { "epoch": 0.33511632403170716, "grad_norm": 0.003308997020900199, "learning_rate": 7.390619405657e-06, "loss": 0.0, "step": 5200 }, { "epoch": 0.33518076947863634, "grad_norm": 0.031233637035030917, "learning_rate": 7.3899033297529546e-06, "loss": 0.0001, "step": 5201 }, { "epoch": 0.3352452149255655, "grad_norm": 0.0012639814431313896, "learning_rate": 7.389187253848909e-06, "loss": 0.0, "step": 5202 }, { "epoch": 0.33530966037249466, "grad_norm": 0.005084546256411885, "learning_rate": 7.388471177944862e-06, "loss": 0.0, "step": 5203 }, { "epoch": 0.33537410581942384, "grad_norm": 0.39486029094773817, "learning_rate": 7.387755102040817e-06, "loss": 0.0007, "step": 5204 }, { "epoch": 0.335438551266353, "grad_norm": 0.035950594309817285, "learning_rate": 7.387039026136771e-06, "loss": 0.0004, "step": 5205 }, { "epoch": 0.3355029967132822, "grad_norm": 0.022437522697819116, "learning_rate": 7.386322950232725e-06, "loss": 0.0002, "step": 5206 }, { "epoch": 0.3355674421602114, "grad_norm": 0.002273460860477751, "learning_rate": 7.385606874328679e-06, "loss": 0.0, "step": 5207 }, { "epoch": 0.3356318876071406, "grad_norm": 0.019155860026953915, "learning_rate": 7.384890798424633e-06, "loss": 0.0, "step": 5208 }, { "epoch": 0.3356963330540697, "grad_norm": 0.008933259082885026, "learning_rate": 7.384174722520587e-06, "loss": 0.0, "step": 5209 }, { "epoch": 0.3357607785009989, "grad_norm": 0.0018883702706931679, "learning_rate": 7.3834586466165416e-06, "loss": 0.0, "step": 5210 }, { "epoch": 0.3358252239479281, "grad_norm": 0.041224874042787886, "learning_rate": 7.382742570712497e-06, "loss": 0.0002, "step": 5211 }, { "epoch": 0.33588966939485726, "grad_norm": 0.005298047347779996, "learning_rate": 7.382026494808451e-06, "loss": 0.0, "step": 5212 }, { "epoch": 0.33595411484178644, "grad_norm": 0.012037538191049219, "learning_rate": 7.381310418904405e-06, "loss": 0.0, "step": 5213 }, { "epoch": 0.33601856028871563, "grad_norm": 0.018119530926582156, "learning_rate": 7.380594343000359e-06, "loss": 0.0, "step": 5214 }, { "epoch": 0.33608300573564476, "grad_norm": 0.0065361923963192506, "learning_rate": 7.379878267096313e-06, "loss": 0.0001, "step": 5215 }, { "epoch": 0.33614745118257394, "grad_norm": 0.03795719088023908, "learning_rate": 7.379162191192267e-06, "loss": 0.0002, "step": 5216 }, { "epoch": 0.3362118966295031, "grad_norm": 0.0003327354220714893, "learning_rate": 7.378446115288222e-06, "loss": 0.0, "step": 5217 }, { "epoch": 0.3362763420764323, "grad_norm": 0.002622280967886348, "learning_rate": 7.377730039384176e-06, "loss": 0.0, "step": 5218 }, { "epoch": 0.3363407875233615, "grad_norm": 0.003362164567001042, "learning_rate": 7.377013963480129e-06, "loss": 0.0, "step": 5219 }, { "epoch": 0.3364052329702906, "grad_norm": 0.002488965637931309, "learning_rate": 7.376297887576084e-06, "loss": 0.0, "step": 5220 }, { "epoch": 0.3364696784172198, "grad_norm": 0.021178266136436676, "learning_rate": 7.375581811672038e-06, "loss": 0.0017, "step": 5221 }, { "epoch": 0.336534123864149, "grad_norm": 5.556497552128451e-05, "learning_rate": 7.374865735767992e-06, "loss": 0.0, "step": 5222 }, { "epoch": 0.3365985693110782, "grad_norm": 1.27603100623005, "learning_rate": 7.374149659863946e-06, "loss": 0.0302, "step": 5223 }, { "epoch": 0.33666301475800736, "grad_norm": 0.10044723039261633, "learning_rate": 7.3734335839599e-06, "loss": 0.0005, "step": 5224 }, { "epoch": 0.33672746020493655, "grad_norm": 0.002654275139626637, "learning_rate": 7.372717508055854e-06, "loss": 0.0, "step": 5225 }, { "epoch": 0.3367919056518657, "grad_norm": 0.15670975755353522, "learning_rate": 7.372001432151809e-06, "loss": 0.0001, "step": 5226 }, { "epoch": 0.33685635109879486, "grad_norm": 0.006504992868248114, "learning_rate": 7.371285356247763e-06, "loss": 0.0, "step": 5227 }, { "epoch": 0.33692079654572404, "grad_norm": 0.0029773298244788306, "learning_rate": 7.370569280343716e-06, "loss": 0.0, "step": 5228 }, { "epoch": 0.33698524199265323, "grad_norm": 0.001564768362004572, "learning_rate": 7.369853204439671e-06, "loss": 0.0, "step": 5229 }, { "epoch": 0.3370496874395824, "grad_norm": 0.46053742972565676, "learning_rate": 7.369137128535625e-06, "loss": 0.0004, "step": 5230 }, { "epoch": 0.33711413288651154, "grad_norm": 0.03325454502955247, "learning_rate": 7.368421052631579e-06, "loss": 0.0001, "step": 5231 }, { "epoch": 0.3371785783334407, "grad_norm": 0.017572879836545905, "learning_rate": 7.367704976727534e-06, "loss": 0.0, "step": 5232 }, { "epoch": 0.3372430237803699, "grad_norm": 0.008048314701695543, "learning_rate": 7.366988900823487e-06, "loss": 0.0, "step": 5233 }, { "epoch": 0.3373074692272991, "grad_norm": 0.0466043386887468, "learning_rate": 7.366272824919441e-06, "loss": 0.0001, "step": 5234 }, { "epoch": 0.3373719146742283, "grad_norm": 0.17321825195418455, "learning_rate": 7.3655567490153965e-06, "loss": 0.0002, "step": 5235 }, { "epoch": 0.33743636012115746, "grad_norm": 0.014061451726326977, "learning_rate": 7.364840673111351e-06, "loss": 0.0001, "step": 5236 }, { "epoch": 0.3375008055680866, "grad_norm": 0.0063745520315426, "learning_rate": 7.364124597207305e-06, "loss": 0.0, "step": 5237 }, { "epoch": 0.3375652510150158, "grad_norm": 0.09786029955600638, "learning_rate": 7.363408521303259e-06, "loss": 0.0007, "step": 5238 }, { "epoch": 0.33762969646194496, "grad_norm": 0.0045003900085370694, "learning_rate": 7.362692445399213e-06, "loss": 0.0, "step": 5239 }, { "epoch": 0.33769414190887415, "grad_norm": 0.07432003388342096, "learning_rate": 7.361976369495167e-06, "loss": 0.0001, "step": 5240 }, { "epoch": 0.33775858735580333, "grad_norm": 0.013922346916827281, "learning_rate": 7.361260293591121e-06, "loss": 0.0, "step": 5241 }, { "epoch": 0.3378230328027325, "grad_norm": 0.0038764664985424606, "learning_rate": 7.360544217687076e-06, "loss": 0.0, "step": 5242 }, { "epoch": 0.33788747824966164, "grad_norm": 0.004857801852726184, "learning_rate": 7.35982814178303e-06, "loss": 0.0, "step": 5243 }, { "epoch": 0.33795192369659083, "grad_norm": 0.2933163297644183, "learning_rate": 7.3591120658789835e-06, "loss": 0.0012, "step": 5244 }, { "epoch": 0.33801636914352, "grad_norm": 0.013560541083518562, "learning_rate": 7.358395989974938e-06, "loss": 0.0, "step": 5245 }, { "epoch": 0.3380808145904492, "grad_norm": 0.0003962089272216273, "learning_rate": 7.357679914070892e-06, "loss": 0.0, "step": 5246 }, { "epoch": 0.3381452600373784, "grad_norm": 0.009339175938270656, "learning_rate": 7.356963838166846e-06, "loss": 0.0, "step": 5247 }, { "epoch": 0.3382097054843075, "grad_norm": 0.0007190247019661537, "learning_rate": 7.356247762262801e-06, "loss": 0.0, "step": 5248 }, { "epoch": 0.3382741509312367, "grad_norm": 0.0059187646550370775, "learning_rate": 7.355531686358754e-06, "loss": 0.0, "step": 5249 }, { "epoch": 0.3383385963781659, "grad_norm": 0.4797958686513202, "learning_rate": 7.354815610454708e-06, "loss": 0.0016, "step": 5250 }, { "epoch": 0.33840304182509506, "grad_norm": 0.20533320593971888, "learning_rate": 7.354099534550663e-06, "loss": 0.0026, "step": 5251 }, { "epoch": 0.33846748727202425, "grad_norm": 0.0036076111581535755, "learning_rate": 7.353383458646617e-06, "loss": 0.0, "step": 5252 }, { "epoch": 0.33853193271895343, "grad_norm": 0.004331472841271792, "learning_rate": 7.352667382742571e-06, "loss": 0.0, "step": 5253 }, { "epoch": 0.33859637816588256, "grad_norm": 0.08988593500177013, "learning_rate": 7.351951306838525e-06, "loss": 0.0002, "step": 5254 }, { "epoch": 0.33866082361281175, "grad_norm": 0.004191599223747066, "learning_rate": 7.351235230934479e-06, "loss": 0.0, "step": 5255 }, { "epoch": 0.33872526905974093, "grad_norm": 0.36925171890969277, "learning_rate": 7.350519155030433e-06, "loss": 0.0003, "step": 5256 }, { "epoch": 0.3387897145066701, "grad_norm": 0.04986970061517144, "learning_rate": 7.349803079126388e-06, "loss": 0.0001, "step": 5257 }, { "epoch": 0.3388541599535993, "grad_norm": 0.010657370180991166, "learning_rate": 7.349087003222343e-06, "loss": 0.0, "step": 5258 }, { "epoch": 0.3389186054005284, "grad_norm": 0.5126343899815423, "learning_rate": 7.348370927318297e-06, "loss": 0.0013, "step": 5259 }, { "epoch": 0.3389830508474576, "grad_norm": 0.05698766120418015, "learning_rate": 7.3476548514142506e-06, "loss": 0.0001, "step": 5260 }, { "epoch": 0.3390474962943868, "grad_norm": 0.004494436469781267, "learning_rate": 7.346938775510205e-06, "loss": 0.0, "step": 5261 }, { "epoch": 0.339111941741316, "grad_norm": 0.0020461885320504514, "learning_rate": 7.346222699606159e-06, "loss": 0.0, "step": 5262 }, { "epoch": 0.33917638718824517, "grad_norm": 0.02113158221440442, "learning_rate": 7.3455066237021135e-06, "loss": 0.0002, "step": 5263 }, { "epoch": 0.33924083263517435, "grad_norm": 0.11814162932010543, "learning_rate": 7.344790547798068e-06, "loss": 0.0005, "step": 5264 }, { "epoch": 0.3393052780821035, "grad_norm": 0.011550950562999067, "learning_rate": 7.344074471894021e-06, "loss": 0.0, "step": 5265 }, { "epoch": 0.33936972352903266, "grad_norm": 0.02042146368376844, "learning_rate": 7.3433583959899755e-06, "loss": 0.0002, "step": 5266 }, { "epoch": 0.33943416897596185, "grad_norm": 0.005358844574434535, "learning_rate": 7.34264232008593e-06, "loss": 0.0, "step": 5267 }, { "epoch": 0.33949861442289103, "grad_norm": 0.003798950955890751, "learning_rate": 7.341926244181884e-06, "loss": 0.0, "step": 5268 }, { "epoch": 0.3395630598698202, "grad_norm": 0.008024759503129957, "learning_rate": 7.341210168277838e-06, "loss": 0.0, "step": 5269 }, { "epoch": 0.33962750531674935, "grad_norm": 0.008326903955132609, "learning_rate": 7.340494092373792e-06, "loss": 0.0, "step": 5270 }, { "epoch": 0.33969195076367853, "grad_norm": 0.11832471047203198, "learning_rate": 7.339778016469746e-06, "loss": 0.0019, "step": 5271 }, { "epoch": 0.3397563962106077, "grad_norm": 0.0015766559886541335, "learning_rate": 7.3390619405657004e-06, "loss": 0.0, "step": 5272 }, { "epoch": 0.3398208416575369, "grad_norm": 0.07010359991070295, "learning_rate": 7.338345864661655e-06, "loss": 0.0001, "step": 5273 }, { "epoch": 0.3398852871044661, "grad_norm": 0.0008208800151459135, "learning_rate": 7.337629788757609e-06, "loss": 0.0, "step": 5274 }, { "epoch": 0.33994973255139527, "grad_norm": 0.0007744022262015843, "learning_rate": 7.3369137128535625e-06, "loss": 0.0, "step": 5275 }, { "epoch": 0.3400141779983244, "grad_norm": 0.19669594039359198, "learning_rate": 7.336197636949517e-06, "loss": 0.0024, "step": 5276 }, { "epoch": 0.3400786234452536, "grad_norm": 0.001586631846665345, "learning_rate": 7.335481561045471e-06, "loss": 0.0, "step": 5277 }, { "epoch": 0.34014306889218276, "grad_norm": 0.0016974822848838252, "learning_rate": 7.334765485141425e-06, "loss": 0.0, "step": 5278 }, { "epoch": 0.34020751433911195, "grad_norm": 0.010299373551429326, "learning_rate": 7.33404940923738e-06, "loss": 0.0, "step": 5279 }, { "epoch": 0.34027195978604113, "grad_norm": 0.021553520264792618, "learning_rate": 7.333333333333333e-06, "loss": 0.0, "step": 5280 }, { "epoch": 0.3403364052329703, "grad_norm": 0.0010162259051668598, "learning_rate": 7.332617257429288e-06, "loss": 0.0, "step": 5281 }, { "epoch": 0.34040085067989945, "grad_norm": 0.19239838048229532, "learning_rate": 7.331901181525243e-06, "loss": 0.0016, "step": 5282 }, { "epoch": 0.34046529612682863, "grad_norm": 0.0019388955843136493, "learning_rate": 7.331185105621197e-06, "loss": 0.0, "step": 5283 }, { "epoch": 0.3405297415737578, "grad_norm": 0.10166173451835614, "learning_rate": 7.330469029717151e-06, "loss": 0.0002, "step": 5284 }, { "epoch": 0.340594187020687, "grad_norm": 0.2888227447924931, "learning_rate": 7.3297529538131055e-06, "loss": 0.0002, "step": 5285 }, { "epoch": 0.3406586324676162, "grad_norm": 0.10953043213619218, "learning_rate": 7.329036877909059e-06, "loss": 0.0018, "step": 5286 }, { "epoch": 0.3407230779145453, "grad_norm": 0.08343678035229286, "learning_rate": 7.328320802005013e-06, "loss": 0.0003, "step": 5287 }, { "epoch": 0.3407875233614745, "grad_norm": 0.008743033563584184, "learning_rate": 7.3276047261009675e-06, "loss": 0.0, "step": 5288 }, { "epoch": 0.3408519688084037, "grad_norm": 0.015663007613879978, "learning_rate": 7.326888650196922e-06, "loss": 0.0016, "step": 5289 }, { "epoch": 0.34091641425533287, "grad_norm": 0.16035313832000075, "learning_rate": 7.326172574292876e-06, "loss": 0.0004, "step": 5290 }, { "epoch": 0.34098085970226205, "grad_norm": 0.02837167493002224, "learning_rate": 7.32545649838883e-06, "loss": 0.0, "step": 5291 }, { "epoch": 0.34104530514919124, "grad_norm": 0.05193844351158227, "learning_rate": 7.324740422484784e-06, "loss": 0.0, "step": 5292 }, { "epoch": 0.34110975059612036, "grad_norm": 0.002242715562046738, "learning_rate": 7.324024346580738e-06, "loss": 0.0, "step": 5293 }, { "epoch": 0.34117419604304955, "grad_norm": 0.11049409118395545, "learning_rate": 7.3233082706766925e-06, "loss": 0.0005, "step": 5294 }, { "epoch": 0.34123864148997873, "grad_norm": 0.0005390615075129139, "learning_rate": 7.322592194772647e-06, "loss": 0.0, "step": 5295 }, { "epoch": 0.3413030869369079, "grad_norm": 0.0013760686804202237, "learning_rate": 7.3218761188686e-06, "loss": 0.0, "step": 5296 }, { "epoch": 0.3413675323838371, "grad_norm": 0.09472930550290676, "learning_rate": 7.3211600429645545e-06, "loss": 0.0011, "step": 5297 }, { "epoch": 0.34143197783076623, "grad_norm": 0.2669654151871651, "learning_rate": 7.320443967060509e-06, "loss": 0.0006, "step": 5298 }, { "epoch": 0.3414964232776954, "grad_norm": 0.005662463238322093, "learning_rate": 7.319727891156463e-06, "loss": 0.0, "step": 5299 }, { "epoch": 0.3415608687246246, "grad_norm": 1.0199606272634265, "learning_rate": 7.3190118152524166e-06, "loss": 0.001, "step": 5300 }, { "epoch": 0.3416253141715538, "grad_norm": 0.0006392407831279938, "learning_rate": 7.318295739348371e-06, "loss": 0.0, "step": 5301 }, { "epoch": 0.34168975961848297, "grad_norm": 0.0009464675577917425, "learning_rate": 7.317579663444325e-06, "loss": 0.0, "step": 5302 }, { "epoch": 0.34175420506541215, "grad_norm": 0.33982247253106124, "learning_rate": 7.3168635875402795e-06, "loss": 0.0008, "step": 5303 }, { "epoch": 0.3418186505123413, "grad_norm": 0.0005554538138186463, "learning_rate": 7.316147511636234e-06, "loss": 0.0, "step": 5304 }, { "epoch": 0.34188309595927047, "grad_norm": 0.011539964270771048, "learning_rate": 7.315431435732189e-06, "loss": 0.0, "step": 5305 }, { "epoch": 0.34194754140619965, "grad_norm": 0.004195442787445282, "learning_rate": 7.314715359828143e-06, "loss": 0.0, "step": 5306 }, { "epoch": 0.34201198685312884, "grad_norm": 0.007462997480980597, "learning_rate": 7.313999283924097e-06, "loss": 0.0, "step": 5307 }, { "epoch": 0.342076432300058, "grad_norm": 0.00024238609706196129, "learning_rate": 7.313283208020051e-06, "loss": 0.0, "step": 5308 }, { "epoch": 0.34214087774698715, "grad_norm": 0.0022106063684528892, "learning_rate": 7.312567132116005e-06, "loss": 0.0, "step": 5309 }, { "epoch": 0.34220532319391633, "grad_norm": 0.01913270911313999, "learning_rate": 7.3118510562119596e-06, "loss": 0.0001, "step": 5310 }, { "epoch": 0.3422697686408455, "grad_norm": 0.002142273071297971, "learning_rate": 7.311134980307914e-06, "loss": 0.0, "step": 5311 }, { "epoch": 0.3423342140877747, "grad_norm": 0.0065981141478347735, "learning_rate": 7.310418904403867e-06, "loss": 0.0, "step": 5312 }, { "epoch": 0.3423986595347039, "grad_norm": 0.004802954010512389, "learning_rate": 7.309702828499822e-06, "loss": 0.0, "step": 5313 }, { "epoch": 0.34246310498163307, "grad_norm": 0.00110266347810728, "learning_rate": 7.308986752595776e-06, "loss": 0.0, "step": 5314 }, { "epoch": 0.3425275504285622, "grad_norm": 0.0005134383818776354, "learning_rate": 7.30827067669173e-06, "loss": 0.0, "step": 5315 }, { "epoch": 0.3425919958754914, "grad_norm": 0.538310230953227, "learning_rate": 7.307554600787684e-06, "loss": 0.001, "step": 5316 }, { "epoch": 0.34265644132242057, "grad_norm": 0.00018143450225810087, "learning_rate": 7.306838524883638e-06, "loss": 0.0, "step": 5317 }, { "epoch": 0.34272088676934975, "grad_norm": 0.40119536868709277, "learning_rate": 7.306122448979592e-06, "loss": 0.0025, "step": 5318 }, { "epoch": 0.34278533221627894, "grad_norm": 0.024010840365756462, "learning_rate": 7.3054063730755466e-06, "loss": 0.0, "step": 5319 }, { "epoch": 0.3428497776632081, "grad_norm": 0.0008069614846977794, "learning_rate": 7.304690297171501e-06, "loss": 0.0, "step": 5320 }, { "epoch": 0.34291422311013725, "grad_norm": 0.028020230829516272, "learning_rate": 7.303974221267454e-06, "loss": 0.0, "step": 5321 }, { "epoch": 0.34297866855706644, "grad_norm": 0.00010121037613932627, "learning_rate": 7.303258145363409e-06, "loss": 0.0, "step": 5322 }, { "epoch": 0.3430431140039956, "grad_norm": 0.10079432169841467, "learning_rate": 7.302542069459363e-06, "loss": 0.0003, "step": 5323 }, { "epoch": 0.3431075594509248, "grad_norm": 0.03269509886827942, "learning_rate": 7.301825993555317e-06, "loss": 0.0002, "step": 5324 }, { "epoch": 0.343172004897854, "grad_norm": 0.0022781756443171625, "learning_rate": 7.3011099176512715e-06, "loss": 0.0, "step": 5325 }, { "epoch": 0.3432364503447831, "grad_norm": 0.0014035525781737842, "learning_rate": 7.300393841747225e-06, "loss": 0.0, "step": 5326 }, { "epoch": 0.3433008957917123, "grad_norm": 0.0006713917775058356, "learning_rate": 7.299677765843179e-06, "loss": 0.0, "step": 5327 }, { "epoch": 0.3433653412386415, "grad_norm": 0.01338775709200437, "learning_rate": 7.298961689939134e-06, "loss": 0.0, "step": 5328 }, { "epoch": 0.34342978668557067, "grad_norm": 0.0006338464533540836, "learning_rate": 7.298245614035089e-06, "loss": 0.0, "step": 5329 }, { "epoch": 0.34349423213249985, "grad_norm": 0.0021448701466866347, "learning_rate": 7.297529538131043e-06, "loss": 0.0, "step": 5330 }, { "epoch": 0.34355867757942904, "grad_norm": 0.012620152073128372, "learning_rate": 7.296813462226997e-06, "loss": 0.0, "step": 5331 }, { "epoch": 0.34362312302635817, "grad_norm": 0.000746423385854066, "learning_rate": 7.296097386322952e-06, "loss": 0.0, "step": 5332 }, { "epoch": 0.34368756847328735, "grad_norm": 9.873649287161303e-05, "learning_rate": 7.295381310418905e-06, "loss": 0.0, "step": 5333 }, { "epoch": 0.34375201392021654, "grad_norm": 0.00031365937247385795, "learning_rate": 7.294665234514859e-06, "loss": 0.0, "step": 5334 }, { "epoch": 0.3438164593671457, "grad_norm": 0.00528864045369544, "learning_rate": 7.293949158610814e-06, "loss": 0.0, "step": 5335 }, { "epoch": 0.3438809048140749, "grad_norm": 0.0005738690377691539, "learning_rate": 7.293233082706768e-06, "loss": 0.0, "step": 5336 }, { "epoch": 0.34394535026100403, "grad_norm": 0.0009102301472844842, "learning_rate": 7.292517006802721e-06, "loss": 0.0, "step": 5337 }, { "epoch": 0.3440097957079332, "grad_norm": 0.00034346797846901265, "learning_rate": 7.291800930898676e-06, "loss": 0.0, "step": 5338 }, { "epoch": 0.3440742411548624, "grad_norm": 0.049793358991671866, "learning_rate": 7.29108485499463e-06, "loss": 0.0004, "step": 5339 }, { "epoch": 0.3441386866017916, "grad_norm": 0.014751395360373839, "learning_rate": 7.290368779090584e-06, "loss": 0.0, "step": 5340 }, { "epoch": 0.3442031320487208, "grad_norm": 0.005169372080883912, "learning_rate": 7.289652703186539e-06, "loss": 0.0, "step": 5341 }, { "epoch": 0.34426757749564996, "grad_norm": 2.6960228307514163e-05, "learning_rate": 7.288936627282492e-06, "loss": 0.0, "step": 5342 }, { "epoch": 0.3443320229425791, "grad_norm": 0.050153460105654975, "learning_rate": 7.288220551378446e-06, "loss": 0.0001, "step": 5343 }, { "epoch": 0.34439646838950827, "grad_norm": 0.00024310397212061517, "learning_rate": 7.287504475474401e-06, "loss": 0.0, "step": 5344 }, { "epoch": 0.34446091383643745, "grad_norm": 0.05808561250266076, "learning_rate": 7.286788399570355e-06, "loss": 0.0006, "step": 5345 }, { "epoch": 0.34452535928336664, "grad_norm": 0.00013547144495678156, "learning_rate": 7.286072323666309e-06, "loss": 0.0, "step": 5346 }, { "epoch": 0.3445898047302958, "grad_norm": 0.3373302818885634, "learning_rate": 7.285356247762263e-06, "loss": 0.0025, "step": 5347 }, { "epoch": 0.34465425017722495, "grad_norm": 0.016717329194680847, "learning_rate": 7.284640171858217e-06, "loss": 0.0001, "step": 5348 }, { "epoch": 0.34471869562415414, "grad_norm": 0.00030821020629320307, "learning_rate": 7.283924095954171e-06, "loss": 0.0, "step": 5349 }, { "epoch": 0.3447831410710833, "grad_norm": 0.0033671255527705796, "learning_rate": 7.283208020050126e-06, "loss": 0.0, "step": 5350 }, { "epoch": 0.3448475865180125, "grad_norm": 0.005871330298462043, "learning_rate": 7.282491944146081e-06, "loss": 0.0, "step": 5351 }, { "epoch": 0.3449120319649417, "grad_norm": 0.0003919803199134911, "learning_rate": 7.281775868242035e-06, "loss": 0.0, "step": 5352 }, { "epoch": 0.3449764774118709, "grad_norm": 0.0004434323271049176, "learning_rate": 7.2810597923379885e-06, "loss": 0.0, "step": 5353 }, { "epoch": 0.3450409228588, "grad_norm": 0.17988686282623115, "learning_rate": 7.280343716433943e-06, "loss": 0.0011, "step": 5354 }, { "epoch": 0.3451053683057292, "grad_norm": 5.375923874340952e-05, "learning_rate": 7.279627640529897e-06, "loss": 0.0, "step": 5355 }, { "epoch": 0.34516981375265837, "grad_norm": 0.0009465090573032732, "learning_rate": 7.278911564625851e-06, "loss": 0.0, "step": 5356 }, { "epoch": 0.34523425919958756, "grad_norm": 0.0020489958458895278, "learning_rate": 7.278195488721806e-06, "loss": 0.0, "step": 5357 }, { "epoch": 0.34529870464651674, "grad_norm": 0.00010094518209825967, "learning_rate": 7.277479412817759e-06, "loss": 0.0, "step": 5358 }, { "epoch": 0.3453631500934459, "grad_norm": 0.0006345648995169979, "learning_rate": 7.276763336913713e-06, "loss": 0.0, "step": 5359 }, { "epoch": 0.34542759554037505, "grad_norm": 0.41101601724437337, "learning_rate": 7.276047261009668e-06, "loss": 0.0011, "step": 5360 }, { "epoch": 0.34549204098730424, "grad_norm": 0.01636080831375019, "learning_rate": 7.275331185105622e-06, "loss": 0.0001, "step": 5361 }, { "epoch": 0.3455564864342334, "grad_norm": 0.030371541605649418, "learning_rate": 7.274615109201576e-06, "loss": 0.0001, "step": 5362 }, { "epoch": 0.3456209318811626, "grad_norm": 0.0009595921015281959, "learning_rate": 7.27389903329753e-06, "loss": 0.0, "step": 5363 }, { "epoch": 0.3456853773280918, "grad_norm": 0.0028069042707871083, "learning_rate": 7.273182957393484e-06, "loss": 0.0, "step": 5364 }, { "epoch": 0.3457498227750209, "grad_norm": 0.006725710750406669, "learning_rate": 7.272466881489438e-06, "loss": 0.0, "step": 5365 }, { "epoch": 0.3458142682219501, "grad_norm": 0.02202792303496806, "learning_rate": 7.271750805585393e-06, "loss": 0.0002, "step": 5366 }, { "epoch": 0.3458787136688793, "grad_norm": 0.0033364088445729323, "learning_rate": 7.271034729681347e-06, "loss": 0.0, "step": 5367 }, { "epoch": 0.3459431591158085, "grad_norm": 0.4054470012322448, "learning_rate": 7.2703186537773e-06, "loss": 0.0021, "step": 5368 }, { "epoch": 0.34600760456273766, "grad_norm": 0.006543067397167515, "learning_rate": 7.269602577873255e-06, "loss": 0.0, "step": 5369 }, { "epoch": 0.34607205000966684, "grad_norm": 4.882931587784024e-05, "learning_rate": 7.268886501969209e-06, "loss": 0.0, "step": 5370 }, { "epoch": 0.34613649545659597, "grad_norm": 0.2890103138391657, "learning_rate": 7.268170426065163e-06, "loss": 0.0013, "step": 5371 }, { "epoch": 0.34620094090352516, "grad_norm": 0.02928876168441755, "learning_rate": 7.267454350161118e-06, "loss": 0.0002, "step": 5372 }, { "epoch": 0.34626538635045434, "grad_norm": 0.03327625299827368, "learning_rate": 7.266738274257071e-06, "loss": 0.0, "step": 5373 }, { "epoch": 0.3463298317973835, "grad_norm": 0.004407488781814231, "learning_rate": 7.266022198353025e-06, "loss": 0.0, "step": 5374 }, { "epoch": 0.3463942772443127, "grad_norm": 0.00046352221804896843, "learning_rate": 7.2653061224489805e-06, "loss": 0.0, "step": 5375 }, { "epoch": 0.34645872269124184, "grad_norm": 1.0442598045825846, "learning_rate": 7.264590046544935e-06, "loss": 0.0033, "step": 5376 }, { "epoch": 0.346523168138171, "grad_norm": 0.0087028528833674, "learning_rate": 7.263873970640889e-06, "loss": 0.0001, "step": 5377 }, { "epoch": 0.3465876135851002, "grad_norm": 0.04466082821105845, "learning_rate": 7.263157894736843e-06, "loss": 0.0, "step": 5378 }, { "epoch": 0.3466520590320294, "grad_norm": 0.00022414270993919194, "learning_rate": 7.262441818832797e-06, "loss": 0.0, "step": 5379 }, { "epoch": 0.3467165044789586, "grad_norm": 0.13606521537237062, "learning_rate": 7.261725742928751e-06, "loss": 0.0017, "step": 5380 }, { "epoch": 0.34678094992588776, "grad_norm": 0.0032358758394306617, "learning_rate": 7.2610096670247055e-06, "loss": 0.0, "step": 5381 }, { "epoch": 0.3468453953728169, "grad_norm": 0.10357858884203731, "learning_rate": 7.26029359112066e-06, "loss": 0.0012, "step": 5382 }, { "epoch": 0.3469098408197461, "grad_norm": 9.994017630786523e-05, "learning_rate": 7.259577515216614e-06, "loss": 0.0, "step": 5383 }, { "epoch": 0.34697428626667526, "grad_norm": 9.690410155464549e-05, "learning_rate": 7.2588614393125675e-06, "loss": 0.0, "step": 5384 }, { "epoch": 0.34703873171360444, "grad_norm": 0.005215186637607239, "learning_rate": 7.258145363408522e-06, "loss": 0.0, "step": 5385 }, { "epoch": 0.3471031771605336, "grad_norm": 0.0011850453431943883, "learning_rate": 7.257429287504476e-06, "loss": 0.0, "step": 5386 }, { "epoch": 0.34716762260746276, "grad_norm": 0.0797241166546595, "learning_rate": 7.25671321160043e-06, "loss": 0.0005, "step": 5387 }, { "epoch": 0.34723206805439194, "grad_norm": 0.14444908177693655, "learning_rate": 7.255997135696385e-06, "loss": 0.0022, "step": 5388 }, { "epoch": 0.3472965135013211, "grad_norm": 0.0015494501170997498, "learning_rate": 7.255281059792338e-06, "loss": 0.0, "step": 5389 }, { "epoch": 0.3473609589482503, "grad_norm": 0.0723502269609664, "learning_rate": 7.2545649838882924e-06, "loss": 0.0002, "step": 5390 }, { "epoch": 0.3474254043951795, "grad_norm": 0.13437147135869884, "learning_rate": 7.253848907984247e-06, "loss": 0.002, "step": 5391 }, { "epoch": 0.3474898498421087, "grad_norm": 0.07793027930166455, "learning_rate": 7.253132832080201e-06, "loss": 0.0005, "step": 5392 }, { "epoch": 0.3475542952890378, "grad_norm": 0.0005459188861817741, "learning_rate": 7.252416756176155e-06, "loss": 0.0, "step": 5393 }, { "epoch": 0.347618740735967, "grad_norm": 0.0005229998610681955, "learning_rate": 7.251700680272109e-06, "loss": 0.0, "step": 5394 }, { "epoch": 0.3476831861828962, "grad_norm": 0.014832489254280722, "learning_rate": 7.250984604368063e-06, "loss": 0.0, "step": 5395 }, { "epoch": 0.34774763162982536, "grad_norm": 0.001830075605712059, "learning_rate": 7.250268528464017e-06, "loss": 0.0, "step": 5396 }, { "epoch": 0.34781207707675454, "grad_norm": 0.00035845492018812364, "learning_rate": 7.249552452559972e-06, "loss": 0.0015, "step": 5397 }, { "epoch": 0.34787652252368373, "grad_norm": 0.22111401246031417, "learning_rate": 7.248836376655927e-06, "loss": 0.0018, "step": 5398 }, { "epoch": 0.34794096797061286, "grad_norm": 0.34508336003421725, "learning_rate": 7.248120300751881e-06, "loss": 0.0013, "step": 5399 }, { "epoch": 0.34800541341754204, "grad_norm": 0.11998791833124678, "learning_rate": 7.247404224847835e-06, "loss": 0.0003, "step": 5400 }, { "epoch": 0.3480698588644712, "grad_norm": 0.0010438379812748842, "learning_rate": 7.246688148943789e-06, "loss": 0.0, "step": 5401 }, { "epoch": 0.3481343043114004, "grad_norm": 0.0009734335987623431, "learning_rate": 7.245972073039743e-06, "loss": 0.0, "step": 5402 }, { "epoch": 0.3481987497583296, "grad_norm": 0.0018563996895338923, "learning_rate": 7.2452559971356975e-06, "loss": 0.0, "step": 5403 }, { "epoch": 0.3482631952052587, "grad_norm": 0.20984205384467244, "learning_rate": 7.244539921231652e-06, "loss": 0.0004, "step": 5404 }, { "epoch": 0.3483276406521879, "grad_norm": 0.015448803826021713, "learning_rate": 7.243823845327605e-06, "loss": 0.0002, "step": 5405 }, { "epoch": 0.3483920860991171, "grad_norm": 0.03937960007927294, "learning_rate": 7.2431077694235595e-06, "loss": 0.0, "step": 5406 }, { "epoch": 0.3484565315460463, "grad_norm": 0.022501578310819947, "learning_rate": 7.242391693519514e-06, "loss": 0.0, "step": 5407 }, { "epoch": 0.34852097699297546, "grad_norm": 0.03170367334764925, "learning_rate": 7.241675617615468e-06, "loss": 0.0016, "step": 5408 }, { "epoch": 0.34858542243990465, "grad_norm": 0.0038783959984967516, "learning_rate": 7.2409595417114224e-06, "loss": 0.0, "step": 5409 }, { "epoch": 0.3486498678868338, "grad_norm": 0.24674016584155095, "learning_rate": 7.240243465807376e-06, "loss": 0.0024, "step": 5410 }, { "epoch": 0.34871431333376296, "grad_norm": 0.000528044999434226, "learning_rate": 7.23952738990333e-06, "loss": 0.0, "step": 5411 }, { "epoch": 0.34877875878069214, "grad_norm": 0.04335431754627749, "learning_rate": 7.2388113139992845e-06, "loss": 0.0001, "step": 5412 }, { "epoch": 0.34884320422762133, "grad_norm": 9.593325914361795e-05, "learning_rate": 7.238095238095239e-06, "loss": 0.0, "step": 5413 }, { "epoch": 0.3489076496745505, "grad_norm": 0.0014304134812169755, "learning_rate": 7.237379162191192e-06, "loss": 0.0, "step": 5414 }, { "epoch": 0.34897209512147964, "grad_norm": 0.004716281375558154, "learning_rate": 7.2366630862871465e-06, "loss": 0.0, "step": 5415 }, { "epoch": 0.3490365405684088, "grad_norm": 0.0077143613239694445, "learning_rate": 7.235947010383101e-06, "loss": 0.0, "step": 5416 }, { "epoch": 0.349100986015338, "grad_norm": 0.019198324219276017, "learning_rate": 7.235230934479055e-06, "loss": 0.0002, "step": 5417 }, { "epoch": 0.3491654314622672, "grad_norm": 0.001355964329896379, "learning_rate": 7.234514858575009e-06, "loss": 0.0, "step": 5418 }, { "epoch": 0.3492298769091964, "grad_norm": 0.022225915169465784, "learning_rate": 7.233798782670963e-06, "loss": 0.0, "step": 5419 }, { "epoch": 0.34929432235612556, "grad_norm": 0.017610877868679407, "learning_rate": 7.233082706766917e-06, "loss": 0.0, "step": 5420 }, { "epoch": 0.3493587678030547, "grad_norm": 0.007438692190637162, "learning_rate": 7.232366630862872e-06, "loss": 0.0, "step": 5421 }, { "epoch": 0.3494232132499839, "grad_norm": 0.007550065798805919, "learning_rate": 7.231650554958827e-06, "loss": 0.0001, "step": 5422 }, { "epoch": 0.34948765869691306, "grad_norm": 1.1858373848494488, "learning_rate": 7.230934479054781e-06, "loss": 0.0052, "step": 5423 }, { "epoch": 0.34955210414384225, "grad_norm": 0.13834342205455954, "learning_rate": 7.230218403150735e-06, "loss": 0.0013, "step": 5424 }, { "epoch": 0.34961654959077143, "grad_norm": 0.0019500085784708264, "learning_rate": 7.2295023272466895e-06, "loss": 0.0, "step": 5425 }, { "epoch": 0.34968099503770056, "grad_norm": 0.274820237577197, "learning_rate": 7.228786251342643e-06, "loss": 0.0014, "step": 5426 }, { "epoch": 0.34974544048462974, "grad_norm": 0.16902668597399362, "learning_rate": 7.228070175438597e-06, "loss": 0.0004, "step": 5427 }, { "epoch": 0.34980988593155893, "grad_norm": 0.0031173492434176304, "learning_rate": 7.2273540995345516e-06, "loss": 0.0, "step": 5428 }, { "epoch": 0.3498743313784881, "grad_norm": 0.0015306366566559873, "learning_rate": 7.226638023630506e-06, "loss": 0.0, "step": 5429 }, { "epoch": 0.3499387768254173, "grad_norm": 0.0010650450143334291, "learning_rate": 7.225921947726459e-06, "loss": 0.0, "step": 5430 }, { "epoch": 0.3500032222723465, "grad_norm": 0.00024179158076437042, "learning_rate": 7.225205871822414e-06, "loss": 0.0, "step": 5431 }, { "epoch": 0.3500676677192756, "grad_norm": 0.007258163297915242, "learning_rate": 7.224489795918368e-06, "loss": 0.0, "step": 5432 }, { "epoch": 0.3501321131662048, "grad_norm": 2.513392435115923, "learning_rate": 7.223773720014322e-06, "loss": 0.0187, "step": 5433 }, { "epoch": 0.350196558613134, "grad_norm": 0.0008973330386759609, "learning_rate": 7.2230576441102765e-06, "loss": 0.0, "step": 5434 }, { "epoch": 0.35026100406006316, "grad_norm": 0.011253913607002416, "learning_rate": 7.22234156820623e-06, "loss": 0.0, "step": 5435 }, { "epoch": 0.35032544950699235, "grad_norm": 0.6856501140185183, "learning_rate": 7.221625492302184e-06, "loss": 0.0033, "step": 5436 }, { "epoch": 0.35038989495392153, "grad_norm": 0.002744451414998837, "learning_rate": 7.2209094163981386e-06, "loss": 0.0, "step": 5437 }, { "epoch": 0.35045434040085066, "grad_norm": 0.1043653877480623, "learning_rate": 7.220193340494093e-06, "loss": 0.0001, "step": 5438 }, { "epoch": 0.35051878584777985, "grad_norm": 0.04470527560901967, "learning_rate": 7.219477264590047e-06, "loss": 0.0001, "step": 5439 }, { "epoch": 0.35058323129470903, "grad_norm": 0.0028691901618445375, "learning_rate": 7.218761188686001e-06, "loss": 0.0, "step": 5440 }, { "epoch": 0.3506476767416382, "grad_norm": 0.014048890434080821, "learning_rate": 7.218045112781955e-06, "loss": 0.0001, "step": 5441 }, { "epoch": 0.3507121221885674, "grad_norm": 0.0001558104690521515, "learning_rate": 7.217329036877909e-06, "loss": 0.0, "step": 5442 }, { "epoch": 0.35077656763549653, "grad_norm": 0.0006746233065462414, "learning_rate": 7.2166129609738635e-06, "loss": 0.0, "step": 5443 }, { "epoch": 0.3508410130824257, "grad_norm": 0.0024750607397397654, "learning_rate": 7.215896885069818e-06, "loss": 0.0, "step": 5444 }, { "epoch": 0.3509054585293549, "grad_norm": 0.006352597500439202, "learning_rate": 7.215180809165773e-06, "loss": 0.0, "step": 5445 }, { "epoch": 0.3509699039762841, "grad_norm": 0.1022585984155468, "learning_rate": 7.214464733261726e-06, "loss": 0.0001, "step": 5446 }, { "epoch": 0.35103434942321327, "grad_norm": 0.003986765644523545, "learning_rate": 7.213748657357681e-06, "loss": 0.0, "step": 5447 }, { "epoch": 0.35109879487014245, "grad_norm": 0.35015860097590396, "learning_rate": 7.213032581453635e-06, "loss": 0.0015, "step": 5448 }, { "epoch": 0.3511632403170716, "grad_norm": 0.004855895917974423, "learning_rate": 7.212316505549589e-06, "loss": 0.0, "step": 5449 }, { "epoch": 0.35122768576400076, "grad_norm": 0.00476196278000269, "learning_rate": 7.211600429645544e-06, "loss": 0.0, "step": 5450 }, { "epoch": 0.35129213121092995, "grad_norm": 4.2020598196443586e-05, "learning_rate": 7.210884353741497e-06, "loss": 0.0, "step": 5451 }, { "epoch": 0.35135657665785913, "grad_norm": 0.000898003348724344, "learning_rate": 7.210168277837451e-06, "loss": 0.0, "step": 5452 }, { "epoch": 0.3514210221047883, "grad_norm": 0.00018579366299213354, "learning_rate": 7.209452201933406e-06, "loss": 0.0, "step": 5453 }, { "epoch": 0.35148546755171745, "grad_norm": 0.158084677871953, "learning_rate": 7.20873612602936e-06, "loss": 0.001, "step": 5454 }, { "epoch": 0.35154991299864663, "grad_norm": 0.028924409174075277, "learning_rate": 7.208020050125314e-06, "loss": 0.0001, "step": 5455 }, { "epoch": 0.3516143584455758, "grad_norm": 0.10685927277426384, "learning_rate": 7.207303974221268e-06, "loss": 0.0016, "step": 5456 }, { "epoch": 0.351678803892505, "grad_norm": 0.003219382873925816, "learning_rate": 7.206587898317222e-06, "loss": 0.0, "step": 5457 }, { "epoch": 0.3517432493394342, "grad_norm": 0.023248814491553707, "learning_rate": 7.205871822413176e-06, "loss": 0.0002, "step": 5458 }, { "epoch": 0.35180769478636337, "grad_norm": 0.008260090754341405, "learning_rate": 7.205155746509131e-06, "loss": 0.0, "step": 5459 }, { "epoch": 0.3518721402332925, "grad_norm": 0.02113740518555449, "learning_rate": 7.204439670605085e-06, "loss": 0.0002, "step": 5460 }, { "epoch": 0.3519365856802217, "grad_norm": 0.00722376485948921, "learning_rate": 7.203723594701038e-06, "loss": 0.0001, "step": 5461 }, { "epoch": 0.35200103112715087, "grad_norm": 0.0833738754293488, "learning_rate": 7.203007518796993e-06, "loss": 0.0001, "step": 5462 }, { "epoch": 0.35206547657408005, "grad_norm": 0.00026359184926858333, "learning_rate": 7.202291442892947e-06, "loss": 0.0, "step": 5463 }, { "epoch": 0.35212992202100923, "grad_norm": 0.14296674674876744, "learning_rate": 7.201575366988901e-06, "loss": 0.0005, "step": 5464 }, { "epoch": 0.35219436746793836, "grad_norm": 0.0016080032851724442, "learning_rate": 7.2008592910848555e-06, "loss": 0.0, "step": 5465 }, { "epoch": 0.35225881291486755, "grad_norm": 0.0009047653986284783, "learning_rate": 7.200143215180809e-06, "loss": 0.0, "step": 5466 }, { "epoch": 0.35232325836179673, "grad_norm": 0.0308639411701821, "learning_rate": 7.199427139276763e-06, "loss": 0.0001, "step": 5467 }, { "epoch": 0.3523877038087259, "grad_norm": 0.4904011446517464, "learning_rate": 7.1987110633727184e-06, "loss": 0.0024, "step": 5468 }, { "epoch": 0.3524521492556551, "grad_norm": 0.04524271869956901, "learning_rate": 7.197994987468673e-06, "loss": 0.0001, "step": 5469 }, { "epoch": 0.3525165947025843, "grad_norm": 0.04035843557537505, "learning_rate": 7.197278911564627e-06, "loss": 0.0002, "step": 5470 }, { "epoch": 0.3525810401495134, "grad_norm": 0.006825177577924673, "learning_rate": 7.196562835660581e-06, "loss": 0.0, "step": 5471 }, { "epoch": 0.3526454855964426, "grad_norm": 0.13986985453114945, "learning_rate": 7.195846759756535e-06, "loss": 0.0006, "step": 5472 }, { "epoch": 0.3527099310433718, "grad_norm": 0.0017970094026375993, "learning_rate": 7.195130683852489e-06, "loss": 0.0, "step": 5473 }, { "epoch": 0.35277437649030097, "grad_norm": 0.006094273782439035, "learning_rate": 7.194414607948443e-06, "loss": 0.0, "step": 5474 }, { "epoch": 0.35283882193723015, "grad_norm": 0.008172755866807201, "learning_rate": 7.193698532044398e-06, "loss": 0.0001, "step": 5475 }, { "epoch": 0.35290326738415934, "grad_norm": 5.158375250050825, "learning_rate": 7.192982456140352e-06, "loss": 0.0934, "step": 5476 }, { "epoch": 0.35296771283108846, "grad_norm": 0.22788083518064708, "learning_rate": 7.192266380236305e-06, "loss": 0.0008, "step": 5477 }, { "epoch": 0.35303215827801765, "grad_norm": 1.3529695452401322, "learning_rate": 7.19155030433226e-06, "loss": 0.0108, "step": 5478 }, { "epoch": 0.35309660372494683, "grad_norm": 0.001636092079940293, "learning_rate": 7.190834228428214e-06, "loss": 0.0, "step": 5479 }, { "epoch": 0.353161049171876, "grad_norm": 1.787735879849132e-05, "learning_rate": 7.190118152524168e-06, "loss": 0.0, "step": 5480 }, { "epoch": 0.3532254946188052, "grad_norm": 0.004427899683739982, "learning_rate": 7.189402076620123e-06, "loss": 0.0, "step": 5481 }, { "epoch": 0.35328994006573433, "grad_norm": 0.0019082844941634223, "learning_rate": 7.188686000716076e-06, "loss": 0.0, "step": 5482 }, { "epoch": 0.3533543855126635, "grad_norm": 0.0046977578647996515, "learning_rate": 7.18796992481203e-06, "loss": 0.0, "step": 5483 }, { "epoch": 0.3534188309595927, "grad_norm": 0.040097359922366724, "learning_rate": 7.187253848907985e-06, "loss": 0.0002, "step": 5484 }, { "epoch": 0.3534832764065219, "grad_norm": 0.001663645825594052, "learning_rate": 7.186537773003939e-06, "loss": 0.0, "step": 5485 }, { "epoch": 0.35354772185345107, "grad_norm": 0.07505223049971804, "learning_rate": 7.185821697099893e-06, "loss": 0.0007, "step": 5486 }, { "epoch": 0.35361216730038025, "grad_norm": 3.3424625948722473, "learning_rate": 7.185105621195847e-06, "loss": 0.0171, "step": 5487 }, { "epoch": 0.3536766127473094, "grad_norm": 0.002825290381998295, "learning_rate": 7.184389545291801e-06, "loss": 0.0, "step": 5488 }, { "epoch": 0.35374105819423857, "grad_norm": 0.009157336320531529, "learning_rate": 7.183673469387755e-06, "loss": 0.0, "step": 5489 }, { "epoch": 0.35380550364116775, "grad_norm": 0.002773670880667257, "learning_rate": 7.18295739348371e-06, "loss": 0.0, "step": 5490 }, { "epoch": 0.35386994908809694, "grad_norm": 0.08855313046890825, "learning_rate": 7.182241317579665e-06, "loss": 0.0003, "step": 5491 }, { "epoch": 0.3539343945350261, "grad_norm": 0.004166906161995427, "learning_rate": 7.181525241675619e-06, "loss": 0.0, "step": 5492 }, { "epoch": 0.35399883998195525, "grad_norm": 0.005168623229808937, "learning_rate": 7.1808091657715725e-06, "loss": 0.0, "step": 5493 }, { "epoch": 0.35406328542888443, "grad_norm": 0.003997300840861494, "learning_rate": 7.180093089867527e-06, "loss": 0.0, "step": 5494 }, { "epoch": 0.3541277308758136, "grad_norm": 0.009798578823934631, "learning_rate": 7.179377013963481e-06, "loss": 0.0, "step": 5495 }, { "epoch": 0.3541921763227428, "grad_norm": 0.003605997833364615, "learning_rate": 7.178660938059435e-06, "loss": 0.0, "step": 5496 }, { "epoch": 0.354256621769672, "grad_norm": 0.39026054546592165, "learning_rate": 7.17794486215539e-06, "loss": 0.0028, "step": 5497 }, { "epoch": 0.35432106721660117, "grad_norm": 0.22522868316952996, "learning_rate": 7.177228786251343e-06, "loss": 0.0006, "step": 5498 }, { "epoch": 0.3543855126635303, "grad_norm": 0.0007982156110889913, "learning_rate": 7.1765127103472975e-06, "loss": 0.0, "step": 5499 }, { "epoch": 0.3544499581104595, "grad_norm": 0.26714035756006893, "learning_rate": 7.175796634443252e-06, "loss": 0.002, "step": 5500 }, { "epoch": 0.35451440355738867, "grad_norm": 0.0017835782053076952, "learning_rate": 7.175080558539206e-06, "loss": 0.0, "step": 5501 }, { "epoch": 0.35457884900431785, "grad_norm": 0.001526153733418559, "learning_rate": 7.17436448263516e-06, "loss": 0.0, "step": 5502 }, { "epoch": 0.35464329445124704, "grad_norm": 0.006026490988079468, "learning_rate": 7.173648406731114e-06, "loss": 0.0, "step": 5503 }, { "epoch": 0.35470773989817617, "grad_norm": 0.017582708992891122, "learning_rate": 7.172932330827068e-06, "loss": 0.0015, "step": 5504 }, { "epoch": 0.35477218534510535, "grad_norm": 0.30581867098200244, "learning_rate": 7.172216254923022e-06, "loss": 0.0019, "step": 5505 }, { "epoch": 0.35483663079203454, "grad_norm": 0.0036598724000132385, "learning_rate": 7.171500179018977e-06, "loss": 0.0, "step": 5506 }, { "epoch": 0.3549010762389637, "grad_norm": 0.0026242730294129815, "learning_rate": 7.17078410311493e-06, "loss": 0.0, "step": 5507 }, { "epoch": 0.3549655216858929, "grad_norm": 0.0011075331112727902, "learning_rate": 7.1700680272108844e-06, "loss": 0.0, "step": 5508 }, { "epoch": 0.3550299671328221, "grad_norm": 0.19952681974901088, "learning_rate": 7.169351951306839e-06, "loss": 0.0004, "step": 5509 }, { "epoch": 0.3550944125797512, "grad_norm": 0.000878946801695103, "learning_rate": 7.168635875402793e-06, "loss": 0.0, "step": 5510 }, { "epoch": 0.3551588580266804, "grad_norm": 0.24027537949617142, "learning_rate": 7.167919799498747e-06, "loss": 0.0018, "step": 5511 }, { "epoch": 0.3552233034736096, "grad_norm": 0.015573056221064105, "learning_rate": 7.167203723594701e-06, "loss": 0.0, "step": 5512 }, { "epoch": 0.35528774892053877, "grad_norm": 0.012980932203767375, "learning_rate": 7.166487647690655e-06, "loss": 0.0, "step": 5513 }, { "epoch": 0.35535219436746796, "grad_norm": 0.02937732815710757, "learning_rate": 7.165771571786609e-06, "loss": 0.0001, "step": 5514 }, { "epoch": 0.35541663981439714, "grad_norm": 0.003133471086012606, "learning_rate": 7.1650554958825645e-06, "loss": 0.0, "step": 5515 }, { "epoch": 0.35548108526132627, "grad_norm": 0.0001353438010940419, "learning_rate": 7.164339419978519e-06, "loss": 0.0, "step": 5516 }, { "epoch": 0.35554553070825545, "grad_norm": 0.0015784288790910163, "learning_rate": 7.163623344074473e-06, "loss": 0.0, "step": 5517 }, { "epoch": 0.35560997615518464, "grad_norm": 0.25518015621623313, "learning_rate": 7.1629072681704274e-06, "loss": 0.0006, "step": 5518 }, { "epoch": 0.3556744216021138, "grad_norm": 0.05870437907137682, "learning_rate": 7.162191192266381e-06, "loss": 0.0014, "step": 5519 }, { "epoch": 0.355738867049043, "grad_norm": 0.008105418444959963, "learning_rate": 7.161475116362335e-06, "loss": 0.0, "step": 5520 }, { "epoch": 0.35580331249597213, "grad_norm": 0.07082251511150572, "learning_rate": 7.1607590404582895e-06, "loss": 0.002, "step": 5521 }, { "epoch": 0.3558677579429013, "grad_norm": 0.025880416565134647, "learning_rate": 7.160042964554244e-06, "loss": 0.0, "step": 5522 }, { "epoch": 0.3559322033898305, "grad_norm": 0.0074415682950392205, "learning_rate": 7.159326888650197e-06, "loss": 0.0, "step": 5523 }, { "epoch": 0.3559966488367597, "grad_norm": 0.010568843378593547, "learning_rate": 7.1586108127461515e-06, "loss": 0.0, "step": 5524 }, { "epoch": 0.3560610942836889, "grad_norm": 0.02883981875404712, "learning_rate": 7.157894736842106e-06, "loss": 0.0001, "step": 5525 }, { "epoch": 0.35612553973061806, "grad_norm": 0.1443228572447026, "learning_rate": 7.15717866093806e-06, "loss": 0.0004, "step": 5526 }, { "epoch": 0.3561899851775472, "grad_norm": 0.1543692648358986, "learning_rate": 7.1564625850340144e-06, "loss": 0.0007, "step": 5527 }, { "epoch": 0.35625443062447637, "grad_norm": 0.006400996936471855, "learning_rate": 7.155746509129968e-06, "loss": 0.0, "step": 5528 }, { "epoch": 0.35631887607140555, "grad_norm": 0.023857215512726463, "learning_rate": 7.155030433225922e-06, "loss": 0.0, "step": 5529 }, { "epoch": 0.35638332151833474, "grad_norm": 0.0010024296612209454, "learning_rate": 7.1543143573218765e-06, "loss": 0.0, "step": 5530 }, { "epoch": 0.3564477669652639, "grad_norm": 0.46602568507627373, "learning_rate": 7.153598281417831e-06, "loss": 0.0007, "step": 5531 }, { "epoch": 0.35651221241219305, "grad_norm": 0.007157312719751308, "learning_rate": 7.152882205513785e-06, "loss": 0.0, "step": 5532 }, { "epoch": 0.35657665785912224, "grad_norm": 0.0022695185644662445, "learning_rate": 7.1521661296097385e-06, "loss": 0.0, "step": 5533 }, { "epoch": 0.3566411033060514, "grad_norm": 0.0007219926736973189, "learning_rate": 7.151450053705693e-06, "loss": 0.0, "step": 5534 }, { "epoch": 0.3567055487529806, "grad_norm": 0.007866555084452822, "learning_rate": 7.150733977801647e-06, "loss": 0.0, "step": 5535 }, { "epoch": 0.3567699941999098, "grad_norm": 0.001497095775340077, "learning_rate": 7.150017901897601e-06, "loss": 0.0, "step": 5536 }, { "epoch": 0.356834439646839, "grad_norm": 0.0021093867092792707, "learning_rate": 7.149301825993556e-06, "loss": 0.0, "step": 5537 }, { "epoch": 0.3568988850937681, "grad_norm": 0.0030429109849377583, "learning_rate": 7.148585750089511e-06, "loss": 0.0, "step": 5538 }, { "epoch": 0.3569633305406973, "grad_norm": 0.005652474576479686, "learning_rate": 7.147869674185464e-06, "loss": 0.0, "step": 5539 }, { "epoch": 0.3570277759876265, "grad_norm": 0.0002939338741031065, "learning_rate": 7.147153598281419e-06, "loss": 0.0, "step": 5540 }, { "epoch": 0.35709222143455566, "grad_norm": 0.0005272829418229353, "learning_rate": 7.146437522377373e-06, "loss": 0.0, "step": 5541 }, { "epoch": 0.35715666688148484, "grad_norm": 0.0014691027056069703, "learning_rate": 7.145721446473327e-06, "loss": 0.0, "step": 5542 }, { "epoch": 0.35722111232841397, "grad_norm": 0.0019714445798182743, "learning_rate": 7.1450053705692815e-06, "loss": 0.0, "step": 5543 }, { "epoch": 0.35728555777534315, "grad_norm": 0.0017453958343376377, "learning_rate": 7.144289294665235e-06, "loss": 0.0, "step": 5544 }, { "epoch": 0.35735000322227234, "grad_norm": 0.009047168410867279, "learning_rate": 7.143573218761189e-06, "loss": 0.0, "step": 5545 }, { "epoch": 0.3574144486692015, "grad_norm": 0.2842775943064396, "learning_rate": 7.1428571428571436e-06, "loss": 0.0018, "step": 5546 }, { "epoch": 0.3574788941161307, "grad_norm": 0.001421204330789419, "learning_rate": 7.142141066953098e-06, "loss": 0.0, "step": 5547 }, { "epoch": 0.3575433395630599, "grad_norm": 0.003525207027181184, "learning_rate": 7.141424991049052e-06, "loss": 0.0, "step": 5548 }, { "epoch": 0.357607785009989, "grad_norm": 0.05103926720907329, "learning_rate": 7.140708915145006e-06, "loss": 0.0001, "step": 5549 }, { "epoch": 0.3576722304569182, "grad_norm": 0.5437392661251964, "learning_rate": 7.13999283924096e-06, "loss": 0.001, "step": 5550 }, { "epoch": 0.3577366759038474, "grad_norm": 0.0900597256928726, "learning_rate": 7.139276763336914e-06, "loss": 0.0006, "step": 5551 }, { "epoch": 0.3578011213507766, "grad_norm": 0.0013100415151973432, "learning_rate": 7.1385606874328685e-06, "loss": 0.0, "step": 5552 }, { "epoch": 0.35786556679770576, "grad_norm": 0.287950904363612, "learning_rate": 7.137844611528823e-06, "loss": 0.0014, "step": 5553 }, { "epoch": 0.35793001224463494, "grad_norm": 0.0008622037652708815, "learning_rate": 7.137128535624776e-06, "loss": 0.0, "step": 5554 }, { "epoch": 0.35799445769156407, "grad_norm": 0.0015019054203790178, "learning_rate": 7.1364124597207306e-06, "loss": 0.0, "step": 5555 }, { "epoch": 0.35805890313849326, "grad_norm": 0.020955137988755305, "learning_rate": 7.135696383816685e-06, "loss": 0.0001, "step": 5556 }, { "epoch": 0.35812334858542244, "grad_norm": 0.3577950619108931, "learning_rate": 7.134980307912639e-06, "loss": 0.0015, "step": 5557 }, { "epoch": 0.3581877940323516, "grad_norm": 0.0007089497421827163, "learning_rate": 7.1342642320085935e-06, "loss": 0.0, "step": 5558 }, { "epoch": 0.3582522394792808, "grad_norm": 0.0246501357034617, "learning_rate": 7.133548156104547e-06, "loss": 0.0001, "step": 5559 }, { "epoch": 0.35831668492620994, "grad_norm": 0.5051342395314098, "learning_rate": 7.132832080200501e-06, "loss": 0.0037, "step": 5560 }, { "epoch": 0.3583811303731391, "grad_norm": 0.02137042368144486, "learning_rate": 7.1321160042964555e-06, "loss": 0.0002, "step": 5561 }, { "epoch": 0.3584455758200683, "grad_norm": 0.0010559639345811118, "learning_rate": 7.131399928392411e-06, "loss": 0.0, "step": 5562 }, { "epoch": 0.3585100212669975, "grad_norm": 0.559211362846977, "learning_rate": 7.130683852488365e-06, "loss": 0.0012, "step": 5563 }, { "epoch": 0.3585744667139267, "grad_norm": 0.017202826393729526, "learning_rate": 7.129967776584319e-06, "loss": 0.0, "step": 5564 }, { "epoch": 0.35863891216085586, "grad_norm": 0.005808692085628072, "learning_rate": 7.129251700680273e-06, "loss": 0.0, "step": 5565 }, { "epoch": 0.358703357607785, "grad_norm": 0.06375497502894896, "learning_rate": 7.128535624776227e-06, "loss": 0.0002, "step": 5566 }, { "epoch": 0.3587678030547142, "grad_norm": 0.0058093430870949725, "learning_rate": 7.127819548872181e-06, "loss": 0.0, "step": 5567 }, { "epoch": 0.35883224850164336, "grad_norm": 0.032179049666154896, "learning_rate": 7.127103472968136e-06, "loss": 0.0, "step": 5568 }, { "epoch": 0.35889669394857254, "grad_norm": 0.03255481688625018, "learning_rate": 7.12638739706409e-06, "loss": 0.0, "step": 5569 }, { "epoch": 0.3589611393955017, "grad_norm": 0.03756726107666502, "learning_rate": 7.125671321160043e-06, "loss": 0.0002, "step": 5570 }, { "epoch": 0.35902558484243086, "grad_norm": 0.0024971589116067573, "learning_rate": 7.124955245255998e-06, "loss": 0.0, "step": 5571 }, { "epoch": 0.35909003028936004, "grad_norm": 0.00042623072682908026, "learning_rate": 7.124239169351952e-06, "loss": 0.0, "step": 5572 }, { "epoch": 0.3591544757362892, "grad_norm": 0.0017245227030109347, "learning_rate": 7.123523093447906e-06, "loss": 0.0, "step": 5573 }, { "epoch": 0.3592189211832184, "grad_norm": 3.358811867999132, "learning_rate": 7.1228070175438605e-06, "loss": 0.007, "step": 5574 }, { "epoch": 0.3592833666301476, "grad_norm": 0.005075338771603977, "learning_rate": 7.122090941639814e-06, "loss": 0.0, "step": 5575 }, { "epoch": 0.3593478120770768, "grad_norm": 0.0017143627481109437, "learning_rate": 7.121374865735768e-06, "loss": 0.0, "step": 5576 }, { "epoch": 0.3594122575240059, "grad_norm": 0.0020707422092871275, "learning_rate": 7.120658789831723e-06, "loss": 0.0, "step": 5577 }, { "epoch": 0.3594767029709351, "grad_norm": 0.0007588354010330688, "learning_rate": 7.119942713927677e-06, "loss": 0.0, "step": 5578 }, { "epoch": 0.3595411484178643, "grad_norm": 0.08036000371090321, "learning_rate": 7.119226638023631e-06, "loss": 0.0004, "step": 5579 }, { "epoch": 0.35960559386479346, "grad_norm": 0.00012982236447759736, "learning_rate": 7.118510562119585e-06, "loss": 0.0, "step": 5580 }, { "epoch": 0.35967003931172264, "grad_norm": 0.029504824766621456, "learning_rate": 7.117794486215539e-06, "loss": 0.0002, "step": 5581 }, { "epoch": 0.3597344847586518, "grad_norm": 0.0033183237279156897, "learning_rate": 7.117078410311493e-06, "loss": 0.0, "step": 5582 }, { "epoch": 0.35979893020558096, "grad_norm": 0.004723393791790135, "learning_rate": 7.1163623344074475e-06, "loss": 0.0, "step": 5583 }, { "epoch": 0.35986337565251014, "grad_norm": 0.08625253997566465, "learning_rate": 7.115646258503401e-06, "loss": 0.0003, "step": 5584 }, { "epoch": 0.3599278210994393, "grad_norm": 0.009677660664644353, "learning_rate": 7.114930182599357e-06, "loss": 0.0, "step": 5585 }, { "epoch": 0.3599922665463685, "grad_norm": 0.00018866588452496163, "learning_rate": 7.1142141066953104e-06, "loss": 0.0, "step": 5586 }, { "epoch": 0.3600567119932977, "grad_norm": 0.0016751659241547368, "learning_rate": 7.113498030791265e-06, "loss": 0.0, "step": 5587 }, { "epoch": 0.3601211574402268, "grad_norm": 0.0017755997623683422, "learning_rate": 7.112781954887219e-06, "loss": 0.0, "step": 5588 }, { "epoch": 0.360185602887156, "grad_norm": 0.013514959361658361, "learning_rate": 7.112065878983173e-06, "loss": 0.0, "step": 5589 }, { "epoch": 0.3602500483340852, "grad_norm": 0.00037803318271111, "learning_rate": 7.111349803079128e-06, "loss": 0.0, "step": 5590 }, { "epoch": 0.3603144937810144, "grad_norm": 0.050001389420844165, "learning_rate": 7.110633727175081e-06, "loss": 0.0002, "step": 5591 }, { "epoch": 0.36037893922794356, "grad_norm": 0.0014661494256198607, "learning_rate": 7.109917651271035e-06, "loss": 0.0, "step": 5592 }, { "epoch": 0.36044338467487275, "grad_norm": 0.0172112343024897, "learning_rate": 7.10920157536699e-06, "loss": 0.0, "step": 5593 }, { "epoch": 0.3605078301218019, "grad_norm": 0.003739827721603054, "learning_rate": 7.108485499462944e-06, "loss": 0.0, "step": 5594 }, { "epoch": 0.36057227556873106, "grad_norm": 0.0035318203953639943, "learning_rate": 7.107769423558898e-06, "loss": 0.0, "step": 5595 }, { "epoch": 0.36063672101566024, "grad_norm": 0.024730250178133278, "learning_rate": 7.107053347654852e-06, "loss": 0.0, "step": 5596 }, { "epoch": 0.36070116646258943, "grad_norm": 0.00018136025456127962, "learning_rate": 7.106337271750806e-06, "loss": 0.0, "step": 5597 }, { "epoch": 0.3607656119095186, "grad_norm": 0.006373948769455688, "learning_rate": 7.10562119584676e-06, "loss": 0.0015, "step": 5598 }, { "epoch": 0.36083005735644774, "grad_norm": 0.0007691793894941653, "learning_rate": 7.104905119942715e-06, "loss": 0.0, "step": 5599 }, { "epoch": 0.3608945028033769, "grad_norm": 0.009427548311023269, "learning_rate": 7.104189044038668e-06, "loss": 0.0, "step": 5600 }, { "epoch": 0.3609589482503061, "grad_norm": 0.14387279738820716, "learning_rate": 7.103472968134622e-06, "loss": 0.0001, "step": 5601 }, { "epoch": 0.3610233936972353, "grad_norm": 0.060341764475685185, "learning_rate": 7.102756892230577e-06, "loss": 0.0016, "step": 5602 }, { "epoch": 0.3610878391441645, "grad_norm": 0.00019170525434062738, "learning_rate": 7.102040816326531e-06, "loss": 0.0, "step": 5603 }, { "epoch": 0.36115228459109366, "grad_norm": 0.018135760065123022, "learning_rate": 7.101324740422485e-06, "loss": 0.0, "step": 5604 }, { "epoch": 0.3612167300380228, "grad_norm": 0.07888118928994316, "learning_rate": 7.100608664518439e-06, "loss": 0.0001, "step": 5605 }, { "epoch": 0.361281175484952, "grad_norm": 0.0009429972317331661, "learning_rate": 7.099892588614393e-06, "loss": 0.0, "step": 5606 }, { "epoch": 0.36134562093188116, "grad_norm": 0.0005612899489036782, "learning_rate": 7.099176512710347e-06, "loss": 0.0, "step": 5607 }, { "epoch": 0.36141006637881035, "grad_norm": 0.0023027160039246065, "learning_rate": 7.0984604368063025e-06, "loss": 0.0, "step": 5608 }, { "epoch": 0.36147451182573953, "grad_norm": 0.0004911979683064441, "learning_rate": 7.097744360902257e-06, "loss": 0.0, "step": 5609 }, { "epoch": 0.36153895727266866, "grad_norm": 0.15138024304600262, "learning_rate": 7.097028284998211e-06, "loss": 0.0017, "step": 5610 }, { "epoch": 0.36160340271959784, "grad_norm": 0.007569795518407454, "learning_rate": 7.096312209094165e-06, "loss": 0.0, "step": 5611 }, { "epoch": 0.36166784816652703, "grad_norm": 0.009712033820648556, "learning_rate": 7.095596133190119e-06, "loss": 0.0, "step": 5612 }, { "epoch": 0.3617322936134562, "grad_norm": 0.12356386491916947, "learning_rate": 7.094880057286073e-06, "loss": 0.0003, "step": 5613 }, { "epoch": 0.3617967390603854, "grad_norm": 0.0013265063818277874, "learning_rate": 7.094163981382027e-06, "loss": 0.0, "step": 5614 }, { "epoch": 0.3618611845073146, "grad_norm": 0.0016747608507945689, "learning_rate": 7.093447905477982e-06, "loss": 0.0, "step": 5615 }, { "epoch": 0.3619256299542437, "grad_norm": 0.0008821640464383144, "learning_rate": 7.092731829573936e-06, "loss": 0.0, "step": 5616 }, { "epoch": 0.3619900754011729, "grad_norm": 0.00030392545722019647, "learning_rate": 7.0920157536698895e-06, "loss": 0.0, "step": 5617 }, { "epoch": 0.3620545208481021, "grad_norm": 0.0054529868195787165, "learning_rate": 7.091299677765844e-06, "loss": 0.0, "step": 5618 }, { "epoch": 0.36211896629503126, "grad_norm": 0.13914422730415102, "learning_rate": 7.090583601861798e-06, "loss": 0.0001, "step": 5619 }, { "epoch": 0.36218341174196045, "grad_norm": 0.02705458926689199, "learning_rate": 7.089867525957752e-06, "loss": 0.0001, "step": 5620 }, { "epoch": 0.36224785718888963, "grad_norm": 0.13120434966247765, "learning_rate": 7.089151450053706e-06, "loss": 0.0008, "step": 5621 }, { "epoch": 0.36231230263581876, "grad_norm": 0.12314011810163554, "learning_rate": 7.08843537414966e-06, "loss": 0.0015, "step": 5622 }, { "epoch": 0.36237674808274795, "grad_norm": 0.01434271131636332, "learning_rate": 7.087719298245614e-06, "loss": 0.0001, "step": 5623 }, { "epoch": 0.36244119352967713, "grad_norm": 0.021528547431360342, "learning_rate": 7.087003222341569e-06, "loss": 0.0, "step": 5624 }, { "epoch": 0.3625056389766063, "grad_norm": 0.01073329697163935, "learning_rate": 7.086287146437523e-06, "loss": 0.0, "step": 5625 }, { "epoch": 0.3625700844235355, "grad_norm": 1.960513490832337, "learning_rate": 7.0855710705334764e-06, "loss": 0.0038, "step": 5626 }, { "epoch": 0.36263452987046463, "grad_norm": 0.025460006296816034, "learning_rate": 7.084854994629431e-06, "loss": 0.0, "step": 5627 }, { "epoch": 0.3626989753173938, "grad_norm": 0.20646837479210997, "learning_rate": 7.084138918725385e-06, "loss": 0.0004, "step": 5628 }, { "epoch": 0.362763420764323, "grad_norm": 0.25142192289102916, "learning_rate": 7.083422842821339e-06, "loss": 0.001, "step": 5629 }, { "epoch": 0.3628278662112522, "grad_norm": 0.006383531148001112, "learning_rate": 7.082706766917294e-06, "loss": 0.0, "step": 5630 }, { "epoch": 0.36289231165818137, "grad_norm": 0.0029308486291541765, "learning_rate": 7.081990691013247e-06, "loss": 0.0, "step": 5631 }, { "epoch": 0.36295675710511055, "grad_norm": 0.0007120470554510387, "learning_rate": 7.081274615109203e-06, "loss": 0.0, "step": 5632 }, { "epoch": 0.3630212025520397, "grad_norm": 0.0012225325945689409, "learning_rate": 7.0805585392051565e-06, "loss": 0.0, "step": 5633 }, { "epoch": 0.36308564799896886, "grad_norm": 0.019594588990991643, "learning_rate": 7.079842463301111e-06, "loss": 0.0001, "step": 5634 }, { "epoch": 0.36315009344589805, "grad_norm": 0.003277557610881603, "learning_rate": 7.079126387397065e-06, "loss": 0.0, "step": 5635 }, { "epoch": 0.36321453889282723, "grad_norm": 0.04047106517839488, "learning_rate": 7.0784103114930194e-06, "loss": 0.0001, "step": 5636 }, { "epoch": 0.3632789843397564, "grad_norm": 0.006228676430910245, "learning_rate": 7.077694235588973e-06, "loss": 0.0, "step": 5637 }, { "epoch": 0.36334342978668555, "grad_norm": 0.004968203124256135, "learning_rate": 7.076978159684927e-06, "loss": 0.0, "step": 5638 }, { "epoch": 0.36340787523361473, "grad_norm": 0.004763822483671288, "learning_rate": 7.0762620837808815e-06, "loss": 0.0, "step": 5639 }, { "epoch": 0.3634723206805439, "grad_norm": 0.47286776074125114, "learning_rate": 7.075546007876836e-06, "loss": 0.0032, "step": 5640 }, { "epoch": 0.3635367661274731, "grad_norm": 0.21851157069067817, "learning_rate": 7.07482993197279e-06, "loss": 0.0024, "step": 5641 }, { "epoch": 0.3636012115744023, "grad_norm": 0.000831134689962334, "learning_rate": 7.0741138560687435e-06, "loss": 0.0, "step": 5642 }, { "epoch": 0.36366565702133147, "grad_norm": 0.0012023844883557063, "learning_rate": 7.073397780164698e-06, "loss": 0.0, "step": 5643 }, { "epoch": 0.3637301024682606, "grad_norm": 0.00047426837176558134, "learning_rate": 7.072681704260652e-06, "loss": 0.0, "step": 5644 }, { "epoch": 0.3637945479151898, "grad_norm": 0.0028780370890813933, "learning_rate": 7.0719656283566064e-06, "loss": 0.0, "step": 5645 }, { "epoch": 0.36385899336211897, "grad_norm": 0.16501946869057837, "learning_rate": 7.071249552452561e-06, "loss": 0.0012, "step": 5646 }, { "epoch": 0.36392343880904815, "grad_norm": 0.26383039160046534, "learning_rate": 7.070533476548514e-06, "loss": 0.0006, "step": 5647 }, { "epoch": 0.36398788425597733, "grad_norm": 0.7559218269984767, "learning_rate": 7.0698174006444685e-06, "loss": 0.0082, "step": 5648 }, { "epoch": 0.36405232970290646, "grad_norm": 0.11576493211318237, "learning_rate": 7.069101324740423e-06, "loss": 0.001, "step": 5649 }, { "epoch": 0.36411677514983565, "grad_norm": 0.3949228552741045, "learning_rate": 7.068385248836377e-06, "loss": 0.0015, "step": 5650 }, { "epoch": 0.36418122059676483, "grad_norm": 0.0030827061663709814, "learning_rate": 7.067669172932331e-06, "loss": 0.0, "step": 5651 }, { "epoch": 0.364245666043694, "grad_norm": 0.05707504512960632, "learning_rate": 7.066953097028285e-06, "loss": 0.0044, "step": 5652 }, { "epoch": 0.3643101114906232, "grad_norm": 0.0029535415213127816, "learning_rate": 7.066237021124239e-06, "loss": 0.0, "step": 5653 }, { "epoch": 0.3643745569375524, "grad_norm": 0.6487911363102642, "learning_rate": 7.065520945220193e-06, "loss": 0.0012, "step": 5654 }, { "epoch": 0.3644390023844815, "grad_norm": 8.800743879255332e-05, "learning_rate": 7.0648048693161486e-06, "loss": 0.0, "step": 5655 }, { "epoch": 0.3645034478314107, "grad_norm": 0.003355957561265595, "learning_rate": 7.064088793412103e-06, "loss": 0.0, "step": 5656 }, { "epoch": 0.3645678932783399, "grad_norm": 0.007201152011268038, "learning_rate": 7.063372717508057e-06, "loss": 0.0, "step": 5657 }, { "epoch": 0.36463233872526907, "grad_norm": 0.021779140689082172, "learning_rate": 7.062656641604011e-06, "loss": 0.0001, "step": 5658 }, { "epoch": 0.36469678417219825, "grad_norm": 0.1626429279984157, "learning_rate": 7.061940565699965e-06, "loss": 0.0009, "step": 5659 }, { "epoch": 0.36476122961912744, "grad_norm": 0.8840851851432028, "learning_rate": 7.061224489795919e-06, "loss": 0.0044, "step": 5660 }, { "epoch": 0.36482567506605657, "grad_norm": 0.3878680980398993, "learning_rate": 7.0605084138918735e-06, "loss": 0.0022, "step": 5661 }, { "epoch": 0.36489012051298575, "grad_norm": 0.010117391982138556, "learning_rate": 7.059792337987828e-06, "loss": 0.0, "step": 5662 }, { "epoch": 0.36495456595991493, "grad_norm": 0.05438971549517224, "learning_rate": 7.059076262083781e-06, "loss": 0.0002, "step": 5663 }, { "epoch": 0.3650190114068441, "grad_norm": 0.0023272584511669445, "learning_rate": 7.0583601861797356e-06, "loss": 0.0, "step": 5664 }, { "epoch": 0.3650834568537733, "grad_norm": 0.008117755104337068, "learning_rate": 7.05764411027569e-06, "loss": 0.0, "step": 5665 }, { "epoch": 0.36514790230070243, "grad_norm": 0.1603883944921648, "learning_rate": 7.056928034371644e-06, "loss": 0.0011, "step": 5666 }, { "epoch": 0.3652123477476316, "grad_norm": 0.1780197137734566, "learning_rate": 7.0562119584675985e-06, "loss": 0.0018, "step": 5667 }, { "epoch": 0.3652767931945608, "grad_norm": 0.05630731138250431, "learning_rate": 7.055495882563552e-06, "loss": 0.0001, "step": 5668 }, { "epoch": 0.36534123864149, "grad_norm": 0.004059539192963445, "learning_rate": 7.054779806659506e-06, "loss": 0.0, "step": 5669 }, { "epoch": 0.36540568408841917, "grad_norm": 0.2651977187882061, "learning_rate": 7.0540637307554605e-06, "loss": 0.0011, "step": 5670 }, { "epoch": 0.36547012953534835, "grad_norm": 0.18956324729432547, "learning_rate": 7.053347654851415e-06, "loss": 0.0001, "step": 5671 }, { "epoch": 0.3655345749822775, "grad_norm": 0.06572701027378827, "learning_rate": 7.052631578947369e-06, "loss": 0.0001, "step": 5672 }, { "epoch": 0.36559902042920667, "grad_norm": 0.0016957948449696752, "learning_rate": 7.0519155030433226e-06, "loss": 0.0, "step": 5673 }, { "epoch": 0.36566346587613585, "grad_norm": 0.017716966280276046, "learning_rate": 7.051199427139277e-06, "loss": 0.0001, "step": 5674 }, { "epoch": 0.36572791132306504, "grad_norm": 0.001855080590519422, "learning_rate": 7.050483351235231e-06, "loss": 0.0, "step": 5675 }, { "epoch": 0.3657923567699942, "grad_norm": 0.0014049639301356737, "learning_rate": 7.0497672753311855e-06, "loss": 0.0, "step": 5676 }, { "epoch": 0.36585680221692335, "grad_norm": 0.006962279213334821, "learning_rate": 7.04905119942714e-06, "loss": 0.0, "step": 5677 }, { "epoch": 0.36592124766385253, "grad_norm": 0.020090188529929526, "learning_rate": 7.048335123523095e-06, "loss": 0.0, "step": 5678 }, { "epoch": 0.3659856931107817, "grad_norm": 0.10503809921713848, "learning_rate": 7.047619047619048e-06, "loss": 0.0002, "step": 5679 }, { "epoch": 0.3660501385577109, "grad_norm": 0.004166492745336915, "learning_rate": 7.046902971715003e-06, "loss": 0.0, "step": 5680 }, { "epoch": 0.3661145840046401, "grad_norm": 0.0009582915293647691, "learning_rate": 7.046186895810957e-06, "loss": 0.0, "step": 5681 }, { "epoch": 0.36617902945156927, "grad_norm": 0.037545327208160215, "learning_rate": 7.045470819906911e-06, "loss": 0.0001, "step": 5682 }, { "epoch": 0.3662434748984984, "grad_norm": 0.10824613353346786, "learning_rate": 7.0447547440028655e-06, "loss": 0.0003, "step": 5683 }, { "epoch": 0.3663079203454276, "grad_norm": 0.0014164057504567702, "learning_rate": 7.044038668098819e-06, "loss": 0.0, "step": 5684 }, { "epoch": 0.36637236579235677, "grad_norm": 0.729636427012959, "learning_rate": 7.043322592194773e-06, "loss": 0.0045, "step": 5685 }, { "epoch": 0.36643681123928595, "grad_norm": 0.05235399263431841, "learning_rate": 7.042606516290728e-06, "loss": 0.0001, "step": 5686 }, { "epoch": 0.36650125668621514, "grad_norm": 0.0010319380225699353, "learning_rate": 7.041890440386682e-06, "loss": 0.0, "step": 5687 }, { "epoch": 0.36656570213314427, "grad_norm": 0.0012176516510191088, "learning_rate": 7.041174364482636e-06, "loss": 0.0, "step": 5688 }, { "epoch": 0.36663014758007345, "grad_norm": 0.0004752984702248449, "learning_rate": 7.04045828857859e-06, "loss": 0.0, "step": 5689 }, { "epoch": 0.36669459302700264, "grad_norm": 0.019731819460332183, "learning_rate": 7.039742212674544e-06, "loss": 0.0001, "step": 5690 }, { "epoch": 0.3667590384739318, "grad_norm": 0.0057392792271502915, "learning_rate": 7.039026136770498e-06, "loss": 0.0, "step": 5691 }, { "epoch": 0.366823483920861, "grad_norm": 0.46144410757981064, "learning_rate": 7.0383100608664525e-06, "loss": 0.0028, "step": 5692 }, { "epoch": 0.3668879293677902, "grad_norm": 0.0238733995461195, "learning_rate": 7.037593984962407e-06, "loss": 0.0001, "step": 5693 }, { "epoch": 0.3669523748147193, "grad_norm": 0.30712166491100057, "learning_rate": 7.03687790905836e-06, "loss": 0.0024, "step": 5694 }, { "epoch": 0.3670168202616485, "grad_norm": 0.00197018473504978, "learning_rate": 7.036161833154315e-06, "loss": 0.0, "step": 5695 }, { "epoch": 0.3670812657085777, "grad_norm": 0.007235413846213685, "learning_rate": 7.035445757250269e-06, "loss": 0.0, "step": 5696 }, { "epoch": 0.36714571115550687, "grad_norm": 0.0008705440038263109, "learning_rate": 7.034729681346223e-06, "loss": 0.0, "step": 5697 }, { "epoch": 0.36721015660243606, "grad_norm": 0.016319481775685544, "learning_rate": 7.034013605442177e-06, "loss": 0.0, "step": 5698 }, { "epoch": 0.36727460204936524, "grad_norm": 0.060190379923427566, "learning_rate": 7.033297529538131e-06, "loss": 0.0001, "step": 5699 }, { "epoch": 0.36733904749629437, "grad_norm": 0.07271395616914396, "learning_rate": 7.032581453634085e-06, "loss": 0.0001, "step": 5700 }, { "epoch": 0.36740349294322355, "grad_norm": 0.004665248624292797, "learning_rate": 7.0318653777300395e-06, "loss": 0.0, "step": 5701 }, { "epoch": 0.36746793839015274, "grad_norm": 0.08727091492317983, "learning_rate": 7.031149301825995e-06, "loss": 0.0001, "step": 5702 }, { "epoch": 0.3675323838370819, "grad_norm": 0.00788484775339447, "learning_rate": 7.030433225921949e-06, "loss": 0.0, "step": 5703 }, { "epoch": 0.3675968292840111, "grad_norm": 0.035432436020510494, "learning_rate": 7.029717150017903e-06, "loss": 0.0001, "step": 5704 }, { "epoch": 0.36766127473094024, "grad_norm": 0.031635243714684846, "learning_rate": 7.029001074113857e-06, "loss": 0.0001, "step": 5705 }, { "epoch": 0.3677257201778694, "grad_norm": 0.4187037698648444, "learning_rate": 7.028284998209811e-06, "loss": 0.0015, "step": 5706 }, { "epoch": 0.3677901656247986, "grad_norm": 0.001485610405980241, "learning_rate": 7.027568922305765e-06, "loss": 0.0, "step": 5707 }, { "epoch": 0.3678546110717278, "grad_norm": 0.0032538214693292274, "learning_rate": 7.02685284640172e-06, "loss": 0.0, "step": 5708 }, { "epoch": 0.367919056518657, "grad_norm": 0.4946226701409237, "learning_rate": 7.026136770497674e-06, "loss": 0.0016, "step": 5709 }, { "epoch": 0.36798350196558616, "grad_norm": 0.02520561557442119, "learning_rate": 7.025420694593627e-06, "loss": 0.0001, "step": 5710 }, { "epoch": 0.3680479474125153, "grad_norm": 0.010406505327414488, "learning_rate": 7.024704618689582e-06, "loss": 0.0, "step": 5711 }, { "epoch": 0.36811239285944447, "grad_norm": 0.009454967464173939, "learning_rate": 7.023988542785536e-06, "loss": 0.0, "step": 5712 }, { "epoch": 0.36817683830637365, "grad_norm": 0.5461984263936956, "learning_rate": 7.02327246688149e-06, "loss": 0.0025, "step": 5713 }, { "epoch": 0.36824128375330284, "grad_norm": 0.011822903242047569, "learning_rate": 7.022556390977444e-06, "loss": 0.0001, "step": 5714 }, { "epoch": 0.368305729200232, "grad_norm": 0.002773322625933541, "learning_rate": 7.021840315073398e-06, "loss": 0.0, "step": 5715 }, { "epoch": 0.36837017464716115, "grad_norm": 0.14855857353534296, "learning_rate": 7.021124239169352e-06, "loss": 0.0002, "step": 5716 }, { "epoch": 0.36843462009409034, "grad_norm": 0.027646845724586845, "learning_rate": 7.020408163265307e-06, "loss": 0.0, "step": 5717 }, { "epoch": 0.3684990655410195, "grad_norm": 0.0027564969182885253, "learning_rate": 7.019692087361261e-06, "loss": 0.0, "step": 5718 }, { "epoch": 0.3685635109879487, "grad_norm": 0.0005528286557136186, "learning_rate": 7.018976011457214e-06, "loss": 0.0, "step": 5719 }, { "epoch": 0.3686279564348779, "grad_norm": 1.5152654319623102, "learning_rate": 7.018259935553169e-06, "loss": 0.0032, "step": 5720 }, { "epoch": 0.3686924018818071, "grad_norm": 0.0014066507855464159, "learning_rate": 7.017543859649123e-06, "loss": 0.0, "step": 5721 }, { "epoch": 0.3687568473287362, "grad_norm": 0.0002940719089644006, "learning_rate": 7.016827783745077e-06, "loss": 0.0, "step": 5722 }, { "epoch": 0.3688212927756654, "grad_norm": 0.7755664462750681, "learning_rate": 7.0161117078410316e-06, "loss": 0.0042, "step": 5723 }, { "epoch": 0.3688857382225946, "grad_norm": 0.011148354046125965, "learning_rate": 7.015395631936985e-06, "loss": 0.0, "step": 5724 }, { "epoch": 0.36895018366952376, "grad_norm": 0.037286561287033865, "learning_rate": 7.014679556032941e-06, "loss": 0.0001, "step": 5725 }, { "epoch": 0.36901462911645294, "grad_norm": 0.28917756239200254, "learning_rate": 7.0139634801288945e-06, "loss": 0.001, "step": 5726 }, { "epoch": 0.36907907456338207, "grad_norm": 0.02074656065833119, "learning_rate": 7.013247404224849e-06, "loss": 0.0001, "step": 5727 }, { "epoch": 0.36914352001031125, "grad_norm": 0.009874683740494761, "learning_rate": 7.012531328320803e-06, "loss": 0.0, "step": 5728 }, { "epoch": 0.36920796545724044, "grad_norm": 0.0010247783570523034, "learning_rate": 7.011815252416757e-06, "loss": 0.0, "step": 5729 }, { "epoch": 0.3692724109041696, "grad_norm": 0.12852172866055342, "learning_rate": 7.011099176512711e-06, "loss": 0.0017, "step": 5730 }, { "epoch": 0.3693368563510988, "grad_norm": 0.022266211156909587, "learning_rate": 7.010383100608665e-06, "loss": 0.0, "step": 5731 }, { "epoch": 0.369401301798028, "grad_norm": 0.03223963132931994, "learning_rate": 7.009667024704619e-06, "loss": 0.0, "step": 5732 }, { "epoch": 0.3694657472449571, "grad_norm": 0.01174380559418009, "learning_rate": 7.008950948800574e-06, "loss": 0.0, "step": 5733 }, { "epoch": 0.3695301926918863, "grad_norm": 0.0016373883989292677, "learning_rate": 7.008234872896528e-06, "loss": 0.0, "step": 5734 }, { "epoch": 0.3695946381388155, "grad_norm": 0.015438489387722118, "learning_rate": 7.0075187969924815e-06, "loss": 0.0, "step": 5735 }, { "epoch": 0.3696590835857447, "grad_norm": 0.004045239143780003, "learning_rate": 7.006802721088436e-06, "loss": 0.0, "step": 5736 }, { "epoch": 0.36972352903267386, "grad_norm": 0.014195178455213005, "learning_rate": 7.00608664518439e-06, "loss": 0.0, "step": 5737 }, { "epoch": 0.36978797447960304, "grad_norm": 0.024569513616097965, "learning_rate": 7.005370569280344e-06, "loss": 0.0, "step": 5738 }, { "epoch": 0.36985241992653217, "grad_norm": 0.020241459484012674, "learning_rate": 7.004654493376299e-06, "loss": 0.0, "step": 5739 }, { "epoch": 0.36991686537346136, "grad_norm": 0.004866278303404176, "learning_rate": 7.003938417472252e-06, "loss": 0.0, "step": 5740 }, { "epoch": 0.36998131082039054, "grad_norm": 0.010689225892144915, "learning_rate": 7.003222341568206e-06, "loss": 0.0001, "step": 5741 }, { "epoch": 0.3700457562673197, "grad_norm": 0.03240860651724199, "learning_rate": 7.002506265664161e-06, "loss": 0.0, "step": 5742 }, { "epoch": 0.3701102017142489, "grad_norm": 0.6012695391805059, "learning_rate": 7.001790189760115e-06, "loss": 0.0037, "step": 5743 }, { "epoch": 0.37017464716117804, "grad_norm": 0.04119383182365942, "learning_rate": 7.001074113856069e-06, "loss": 0.0001, "step": 5744 }, { "epoch": 0.3702390926081072, "grad_norm": 0.008040074272905727, "learning_rate": 7.000358037952023e-06, "loss": 0.0, "step": 5745 }, { "epoch": 0.3703035380550364, "grad_norm": 0.14133781336635506, "learning_rate": 6.999641962047977e-06, "loss": 0.0022, "step": 5746 }, { "epoch": 0.3703679835019656, "grad_norm": 0.024683611120630507, "learning_rate": 6.998925886143931e-06, "loss": 0.0, "step": 5747 }, { "epoch": 0.3704324289488948, "grad_norm": 0.08903047211636257, "learning_rate": 6.9982098102398865e-06, "loss": 0.0003, "step": 5748 }, { "epoch": 0.37049687439582396, "grad_norm": 0.45973190145044607, "learning_rate": 6.997493734335841e-06, "loss": 0.0009, "step": 5749 }, { "epoch": 0.3705613198427531, "grad_norm": 0.06435428612509528, "learning_rate": 6.996777658431795e-06, "loss": 0.0001, "step": 5750 }, { "epoch": 0.3706257652896823, "grad_norm": 0.018558351255393086, "learning_rate": 6.9960615825277485e-06, "loss": 0.0, "step": 5751 }, { "epoch": 0.37069021073661146, "grad_norm": 0.008864585347216576, "learning_rate": 6.995345506623703e-06, "loss": 0.0, "step": 5752 }, { "epoch": 0.37075465618354064, "grad_norm": 0.009975094178598903, "learning_rate": 6.994629430719657e-06, "loss": 0.0, "step": 5753 }, { "epoch": 0.3708191016304698, "grad_norm": 0.016556182540155888, "learning_rate": 6.9939133548156114e-06, "loss": 0.0001, "step": 5754 }, { "epoch": 0.37088354707739896, "grad_norm": 0.13527741899174603, "learning_rate": 6.993197278911566e-06, "loss": 0.0019, "step": 5755 }, { "epoch": 0.37094799252432814, "grad_norm": 0.06295406905782694, "learning_rate": 6.992481203007519e-06, "loss": 0.0001, "step": 5756 }, { "epoch": 0.3710124379712573, "grad_norm": 0.0010064822738222764, "learning_rate": 6.9917651271034735e-06, "loss": 0.0, "step": 5757 }, { "epoch": 0.3710768834181865, "grad_norm": 0.024290977389738094, "learning_rate": 6.991049051199428e-06, "loss": 0.0015, "step": 5758 }, { "epoch": 0.3711413288651157, "grad_norm": 0.010974935412825312, "learning_rate": 6.990332975295382e-06, "loss": 0.0, "step": 5759 }, { "epoch": 0.3712057743120449, "grad_norm": 0.10455954243655033, "learning_rate": 6.989616899391336e-06, "loss": 0.0003, "step": 5760 }, { "epoch": 0.371270219758974, "grad_norm": 0.12336619134229843, "learning_rate": 6.98890082348729e-06, "loss": 0.0002, "step": 5761 }, { "epoch": 0.3713346652059032, "grad_norm": 0.21171697777713713, "learning_rate": 6.988184747583244e-06, "loss": 0.0009, "step": 5762 }, { "epoch": 0.3713991106528324, "grad_norm": 0.19279942206943126, "learning_rate": 6.9874686716791984e-06, "loss": 0.0026, "step": 5763 }, { "epoch": 0.37146355609976156, "grad_norm": 0.01216910669966571, "learning_rate": 6.986752595775153e-06, "loss": 0.0001, "step": 5764 }, { "epoch": 0.37152800154669074, "grad_norm": 0.00037731451971985587, "learning_rate": 6.986036519871107e-06, "loss": 0.0, "step": 5765 }, { "epoch": 0.3715924469936199, "grad_norm": 0.00018807497398390133, "learning_rate": 6.9853204439670605e-06, "loss": 0.0, "step": 5766 }, { "epoch": 0.37165689244054906, "grad_norm": 0.0026170447983087115, "learning_rate": 6.984604368063015e-06, "loss": 0.0, "step": 5767 }, { "epoch": 0.37172133788747824, "grad_norm": 0.017747659184786897, "learning_rate": 6.983888292158969e-06, "loss": 0.0002, "step": 5768 }, { "epoch": 0.3717857833344074, "grad_norm": 0.3520613627754859, "learning_rate": 6.983172216254923e-06, "loss": 0.0017, "step": 5769 }, { "epoch": 0.3718502287813366, "grad_norm": 0.00016192167565096303, "learning_rate": 6.982456140350878e-06, "loss": 0.0, "step": 5770 }, { "epoch": 0.3719146742282658, "grad_norm": 0.0008348356854251009, "learning_rate": 6.981740064446831e-06, "loss": 0.0, "step": 5771 }, { "epoch": 0.3719791196751949, "grad_norm": 0.005497509674599073, "learning_rate": 6.981023988542786e-06, "loss": 0.0001, "step": 5772 }, { "epoch": 0.3720435651221241, "grad_norm": 0.0037413784761350334, "learning_rate": 6.9803079126387406e-06, "loss": 0.0, "step": 5773 }, { "epoch": 0.3721080105690533, "grad_norm": 0.0026230430236184187, "learning_rate": 6.979591836734695e-06, "loss": 0.0, "step": 5774 }, { "epoch": 0.3721724560159825, "grad_norm": 0.41783501811893103, "learning_rate": 6.978875760830649e-06, "loss": 0.0034, "step": 5775 }, { "epoch": 0.37223690146291166, "grad_norm": 0.024433689572735673, "learning_rate": 6.9781596849266035e-06, "loss": 0.0, "step": 5776 }, { "epoch": 0.37230134690984085, "grad_norm": 0.20971572218454174, "learning_rate": 6.977443609022557e-06, "loss": 0.0025, "step": 5777 }, { "epoch": 0.37236579235677, "grad_norm": 0.006778586627347576, "learning_rate": 6.976727533118511e-06, "loss": 0.0, "step": 5778 }, { "epoch": 0.37243023780369916, "grad_norm": 0.004765972105939649, "learning_rate": 6.9760114572144655e-06, "loss": 0.0, "step": 5779 }, { "epoch": 0.37249468325062834, "grad_norm": 0.00936975226703011, "learning_rate": 6.97529538131042e-06, "loss": 0.0, "step": 5780 }, { "epoch": 0.37255912869755753, "grad_norm": 0.03314593066305474, "learning_rate": 6.974579305406374e-06, "loss": 0.0002, "step": 5781 }, { "epoch": 0.3726235741444867, "grad_norm": 0.10569911541069012, "learning_rate": 6.9738632295023276e-06, "loss": 0.0001, "step": 5782 }, { "epoch": 0.37268801959141584, "grad_norm": 0.005564481327632969, "learning_rate": 6.973147153598282e-06, "loss": 0.0, "step": 5783 }, { "epoch": 0.372752465038345, "grad_norm": 0.0011321496959383888, "learning_rate": 6.972431077694236e-06, "loss": 0.0, "step": 5784 }, { "epoch": 0.3728169104852742, "grad_norm": 0.004791011659612873, "learning_rate": 6.9717150017901905e-06, "loss": 0.0, "step": 5785 }, { "epoch": 0.3728813559322034, "grad_norm": 0.005532310001008372, "learning_rate": 6.970998925886145e-06, "loss": 0.0001, "step": 5786 }, { "epoch": 0.3729458013791326, "grad_norm": 0.7089383028892337, "learning_rate": 6.970282849982098e-06, "loss": 0.0035, "step": 5787 }, { "epoch": 0.37301024682606176, "grad_norm": 0.10018688504803142, "learning_rate": 6.9695667740780525e-06, "loss": 0.0001, "step": 5788 }, { "epoch": 0.3730746922729909, "grad_norm": 0.0001188095476045239, "learning_rate": 6.968850698174007e-06, "loss": 0.0, "step": 5789 }, { "epoch": 0.3731391377199201, "grad_norm": 0.31996598923516045, "learning_rate": 6.968134622269961e-06, "loss": 0.0045, "step": 5790 }, { "epoch": 0.37320358316684926, "grad_norm": 0.00032090989078132145, "learning_rate": 6.9674185463659146e-06, "loss": 0.0, "step": 5791 }, { "epoch": 0.37326802861377845, "grad_norm": 0.0027813093535380045, "learning_rate": 6.966702470461869e-06, "loss": 0.0, "step": 5792 }, { "epoch": 0.37333247406070763, "grad_norm": 0.0006011110428928335, "learning_rate": 6.965986394557823e-06, "loss": 0.0, "step": 5793 }, { "epoch": 0.37339691950763676, "grad_norm": 0.006022162366228975, "learning_rate": 6.9652703186537775e-06, "loss": 0.0, "step": 5794 }, { "epoch": 0.37346136495456594, "grad_norm": 0.0009564569230516909, "learning_rate": 6.964554242749733e-06, "loss": 0.0, "step": 5795 }, { "epoch": 0.37352581040149513, "grad_norm": 0.03388242371408433, "learning_rate": 6.963838166845687e-06, "loss": 0.0, "step": 5796 }, { "epoch": 0.3735902558484243, "grad_norm": 0.00852415166096573, "learning_rate": 6.963122090941641e-06, "loss": 0.0, "step": 5797 }, { "epoch": 0.3736547012953535, "grad_norm": 0.014353758440833062, "learning_rate": 6.962406015037595e-06, "loss": 0.0001, "step": 5798 }, { "epoch": 0.3737191467422827, "grad_norm": 0.001276369092390287, "learning_rate": 6.961689939133549e-06, "loss": 0.0, "step": 5799 }, { "epoch": 0.3737835921892118, "grad_norm": 0.0008902047543594542, "learning_rate": 6.960973863229503e-06, "loss": 0.0, "step": 5800 }, { "epoch": 0.373848037636141, "grad_norm": 0.004717272669452164, "learning_rate": 6.9602577873254575e-06, "loss": 0.0, "step": 5801 }, { "epoch": 0.3739124830830702, "grad_norm": 0.014702761769368668, "learning_rate": 6.959541711421412e-06, "loss": 0.0001, "step": 5802 }, { "epoch": 0.37397692852999936, "grad_norm": 0.03831489377639327, "learning_rate": 6.958825635517365e-06, "loss": 0.0001, "step": 5803 }, { "epoch": 0.37404137397692855, "grad_norm": 0.5141810225466743, "learning_rate": 6.95810955961332e-06, "loss": 0.0018, "step": 5804 }, { "epoch": 0.3741058194238577, "grad_norm": 0.0005838929170589029, "learning_rate": 6.957393483709274e-06, "loss": 0.0, "step": 5805 }, { "epoch": 0.37417026487078686, "grad_norm": 0.0002700920403723447, "learning_rate": 6.956677407805228e-06, "loss": 0.0, "step": 5806 }, { "epoch": 0.37423471031771605, "grad_norm": 0.06860687546291591, "learning_rate": 6.955961331901182e-06, "loss": 0.0017, "step": 5807 }, { "epoch": 0.37429915576464523, "grad_norm": 0.004176513204807584, "learning_rate": 6.955245255997136e-06, "loss": 0.0, "step": 5808 }, { "epoch": 0.3743636012115744, "grad_norm": 0.005652395412120658, "learning_rate": 6.95452918009309e-06, "loss": 0.0, "step": 5809 }, { "epoch": 0.3744280466585036, "grad_norm": 0.019276447759645723, "learning_rate": 6.9538131041890445e-06, "loss": 0.0, "step": 5810 }, { "epoch": 0.37449249210543273, "grad_norm": 0.004779306271954092, "learning_rate": 6.953097028284999e-06, "loss": 0.0, "step": 5811 }, { "epoch": 0.3745569375523619, "grad_norm": 0.05639422399771823, "learning_rate": 6.952380952380952e-06, "loss": 0.0003, "step": 5812 }, { "epoch": 0.3746213829992911, "grad_norm": 0.013047953267711436, "learning_rate": 6.951664876476907e-06, "loss": 0.0, "step": 5813 }, { "epoch": 0.3746858284462203, "grad_norm": 0.0800606691129166, "learning_rate": 6.950948800572861e-06, "loss": 0.0, "step": 5814 }, { "epoch": 0.37475027389314947, "grad_norm": 0.12582311229794294, "learning_rate": 6.950232724668815e-06, "loss": 0.0017, "step": 5815 }, { "epoch": 0.37481471934007865, "grad_norm": 0.019311332882545055, "learning_rate": 6.9495166487647695e-06, "loss": 0.0001, "step": 5816 }, { "epoch": 0.3748791647870078, "grad_norm": 0.2597594869935363, "learning_rate": 6.948800572860723e-06, "loss": 0.0006, "step": 5817 }, { "epoch": 0.37494361023393696, "grad_norm": 0.013510250554619812, "learning_rate": 6.948084496956679e-06, "loss": 0.0, "step": 5818 }, { "epoch": 0.37500805568086615, "grad_norm": 0.004594365982372799, "learning_rate": 6.947368421052632e-06, "loss": 0.0, "step": 5819 }, { "epoch": 0.37507250112779533, "grad_norm": 0.001138969696631606, "learning_rate": 6.946652345148587e-06, "loss": 0.0, "step": 5820 }, { "epoch": 0.3751369465747245, "grad_norm": 0.0006642511054819653, "learning_rate": 6.945936269244541e-06, "loss": 0.0, "step": 5821 }, { "epoch": 0.37520139202165365, "grad_norm": 0.044176775604888044, "learning_rate": 6.945220193340495e-06, "loss": 0.0001, "step": 5822 }, { "epoch": 0.37526583746858283, "grad_norm": 0.0004816241632529937, "learning_rate": 6.944504117436449e-06, "loss": 0.0, "step": 5823 }, { "epoch": 0.375330282915512, "grad_norm": 0.0020537732340232817, "learning_rate": 6.943788041532403e-06, "loss": 0.0, "step": 5824 }, { "epoch": 0.3753947283624412, "grad_norm": 0.3551568350092731, "learning_rate": 6.943071965628357e-06, "loss": 0.0015, "step": 5825 }, { "epoch": 0.3754591738093704, "grad_norm": 0.0009804151741977017, "learning_rate": 6.942355889724312e-06, "loss": 0.0, "step": 5826 }, { "epoch": 0.37552361925629957, "grad_norm": 1.3854608552621122, "learning_rate": 6.941639813820266e-06, "loss": 0.0075, "step": 5827 }, { "epoch": 0.3755880647032287, "grad_norm": 0.000444632576506098, "learning_rate": 6.940923737916219e-06, "loss": 0.0, "step": 5828 }, { "epoch": 0.3756525101501579, "grad_norm": 0.24999937415044649, "learning_rate": 6.940207662012174e-06, "loss": 0.001, "step": 5829 }, { "epoch": 0.37571695559708707, "grad_norm": 0.001790503591942046, "learning_rate": 6.939491586108128e-06, "loss": 0.0, "step": 5830 }, { "epoch": 0.37578140104401625, "grad_norm": 0.00021567154767956597, "learning_rate": 6.938775510204082e-06, "loss": 0.0, "step": 5831 }, { "epoch": 0.37584584649094543, "grad_norm": 0.001552857672059101, "learning_rate": 6.9380594343000366e-06, "loss": 0.0, "step": 5832 }, { "epoch": 0.37591029193787456, "grad_norm": 0.0009687577369576802, "learning_rate": 6.93734335839599e-06, "loss": 0.0, "step": 5833 }, { "epoch": 0.37597473738480375, "grad_norm": 0.22006186439099512, "learning_rate": 6.936627282491944e-06, "loss": 0.0018, "step": 5834 }, { "epoch": 0.37603918283173293, "grad_norm": 0.0022716589278178995, "learning_rate": 6.935911206587899e-06, "loss": 0.0, "step": 5835 }, { "epoch": 0.3761036282786621, "grad_norm": 0.07283411149368349, "learning_rate": 6.935195130683853e-06, "loss": 0.0003, "step": 5836 }, { "epoch": 0.3761680737255913, "grad_norm": 0.0513859471633266, "learning_rate": 6.934479054779807e-06, "loss": 0.0002, "step": 5837 }, { "epoch": 0.3762325191725205, "grad_norm": 0.216646623524965, "learning_rate": 6.933762978875761e-06, "loss": 0.0014, "step": 5838 }, { "epoch": 0.3762969646194496, "grad_norm": 0.003388630037906614, "learning_rate": 6.933046902971715e-06, "loss": 0.0, "step": 5839 }, { "epoch": 0.3763614100663788, "grad_norm": 0.028671905161233677, "learning_rate": 6.932330827067669e-06, "loss": 0.0, "step": 5840 }, { "epoch": 0.376425855513308, "grad_norm": 0.024152302581481176, "learning_rate": 6.9316147511636236e-06, "loss": 0.0, "step": 5841 }, { "epoch": 0.37649030096023717, "grad_norm": 0.0009817171541373576, "learning_rate": 6.930898675259579e-06, "loss": 0.0, "step": 5842 }, { "epoch": 0.37655474640716635, "grad_norm": 0.0013014101235195464, "learning_rate": 6.930182599355533e-06, "loss": 0.0, "step": 5843 }, { "epoch": 0.3766191918540955, "grad_norm": 0.0028327639840403755, "learning_rate": 6.9294665234514865e-06, "loss": 0.0, "step": 5844 }, { "epoch": 0.37668363730102467, "grad_norm": 0.0032929799671172245, "learning_rate": 6.928750447547441e-06, "loss": 0.0, "step": 5845 }, { "epoch": 0.37674808274795385, "grad_norm": 0.030509106413843804, "learning_rate": 6.928034371643395e-06, "loss": 0.0002, "step": 5846 }, { "epoch": 0.37681252819488303, "grad_norm": 0.004954742198539419, "learning_rate": 6.927318295739349e-06, "loss": 0.0, "step": 5847 }, { "epoch": 0.3768769736418122, "grad_norm": 0.0002230131182336926, "learning_rate": 6.926602219835304e-06, "loss": 0.0, "step": 5848 }, { "epoch": 0.3769414190887414, "grad_norm": 0.023321539143309727, "learning_rate": 6.925886143931257e-06, "loss": 0.0001, "step": 5849 }, { "epoch": 0.37700586453567053, "grad_norm": 0.2192969636607764, "learning_rate": 6.925170068027211e-06, "loss": 0.0005, "step": 5850 }, { "epoch": 0.3770703099825997, "grad_norm": 0.0029399686461097315, "learning_rate": 6.924453992123166e-06, "loss": 0.0, "step": 5851 }, { "epoch": 0.3771347554295289, "grad_norm": 0.006376825308723674, "learning_rate": 6.92373791621912e-06, "loss": 0.0, "step": 5852 }, { "epoch": 0.3771992008764581, "grad_norm": 0.15974989372908052, "learning_rate": 6.923021840315074e-06, "loss": 0.0006, "step": 5853 }, { "epoch": 0.37726364632338727, "grad_norm": 0.003535994812387964, "learning_rate": 6.922305764411028e-06, "loss": 0.0, "step": 5854 }, { "epoch": 0.37732809177031645, "grad_norm": 0.001929273478040189, "learning_rate": 6.921589688506982e-06, "loss": 0.0, "step": 5855 }, { "epoch": 0.3773925372172456, "grad_norm": 0.001803497544205884, "learning_rate": 6.920873612602936e-06, "loss": 0.0, "step": 5856 }, { "epoch": 0.37745698266417477, "grad_norm": 1.0116954786061811, "learning_rate": 6.920157536698891e-06, "loss": 0.0011, "step": 5857 }, { "epoch": 0.37752142811110395, "grad_norm": 0.0044865772161063605, "learning_rate": 6.919441460794845e-06, "loss": 0.0, "step": 5858 }, { "epoch": 0.37758587355803314, "grad_norm": 0.002553314405904031, "learning_rate": 6.918725384890798e-06, "loss": 0.0, "step": 5859 }, { "epoch": 0.3776503190049623, "grad_norm": 0.0049119126899543186, "learning_rate": 6.918009308986753e-06, "loss": 0.0, "step": 5860 }, { "epoch": 0.37771476445189145, "grad_norm": 0.0008667354202076583, "learning_rate": 6.917293233082707e-06, "loss": 0.0, "step": 5861 }, { "epoch": 0.37777920989882063, "grad_norm": 0.0016562458722985242, "learning_rate": 6.916577157178661e-06, "loss": 0.0, "step": 5862 }, { "epoch": 0.3778436553457498, "grad_norm": 0.0004253058844486389, "learning_rate": 6.915861081274616e-06, "loss": 0.0, "step": 5863 }, { "epoch": 0.377908100792679, "grad_norm": 0.010075668762335689, "learning_rate": 6.915145005370569e-06, "loss": 0.0001, "step": 5864 }, { "epoch": 0.3779725462396082, "grad_norm": 0.004239878134591785, "learning_rate": 6.914428929466524e-06, "loss": 0.0, "step": 5865 }, { "epoch": 0.37803699168653737, "grad_norm": 0.30177530528108326, "learning_rate": 6.9137128535624785e-06, "loss": 0.0011, "step": 5866 }, { "epoch": 0.3781014371334665, "grad_norm": 0.00040066692664154734, "learning_rate": 6.912996777658433e-06, "loss": 0.0, "step": 5867 }, { "epoch": 0.3781658825803957, "grad_norm": 0.10780252724976731, "learning_rate": 6.912280701754387e-06, "loss": 0.0004, "step": 5868 }, { "epoch": 0.37823032802732487, "grad_norm": 0.002191261152124998, "learning_rate": 6.911564625850341e-06, "loss": 0.0, "step": 5869 }, { "epoch": 0.37829477347425405, "grad_norm": 0.2787256869160595, "learning_rate": 6.910848549946295e-06, "loss": 0.0014, "step": 5870 }, { "epoch": 0.37835921892118324, "grad_norm": 0.0006742879125156188, "learning_rate": 6.910132474042249e-06, "loss": 0.0, "step": 5871 }, { "epoch": 0.37842366436811237, "grad_norm": 0.012795925913342132, "learning_rate": 6.9094163981382034e-06, "loss": 0.0001, "step": 5872 }, { "epoch": 0.37848810981504155, "grad_norm": 0.0007479872914136376, "learning_rate": 6.908700322234158e-06, "loss": 0.0, "step": 5873 }, { "epoch": 0.37855255526197074, "grad_norm": 0.00018037472474745242, "learning_rate": 6.907984246330112e-06, "loss": 0.0, "step": 5874 }, { "epoch": 0.3786170007088999, "grad_norm": 0.0015442612656284755, "learning_rate": 6.9072681704260655e-06, "loss": 0.0, "step": 5875 }, { "epoch": 0.3786814461558291, "grad_norm": 0.0024565796104232176, "learning_rate": 6.90655209452202e-06, "loss": 0.0, "step": 5876 }, { "epoch": 0.3787458916027583, "grad_norm": 0.500224241993818, "learning_rate": 6.905836018617974e-06, "loss": 0.0036, "step": 5877 }, { "epoch": 0.3788103370496874, "grad_norm": 0.00017855047645974685, "learning_rate": 6.905119942713928e-06, "loss": 0.0, "step": 5878 }, { "epoch": 0.3788747824966166, "grad_norm": 0.012190953634215032, "learning_rate": 6.904403866809883e-06, "loss": 0.0001, "step": 5879 }, { "epoch": 0.3789392279435458, "grad_norm": 0.004909180990656321, "learning_rate": 6.903687790905836e-06, "loss": 0.0, "step": 5880 }, { "epoch": 0.37900367339047497, "grad_norm": 0.5589098802746703, "learning_rate": 6.9029717150017904e-06, "loss": 0.0005, "step": 5881 }, { "epoch": 0.37906811883740416, "grad_norm": 0.003212166081683288, "learning_rate": 6.902255639097745e-06, "loss": 0.0, "step": 5882 }, { "epoch": 0.3791325642843333, "grad_norm": 0.08818240476463624, "learning_rate": 6.901539563193699e-06, "loss": 0.0016, "step": 5883 }, { "epoch": 0.37919700973126247, "grad_norm": 0.003632769423772264, "learning_rate": 6.9008234872896525e-06, "loss": 0.0, "step": 5884 }, { "epoch": 0.37926145517819165, "grad_norm": 0.009019636505328735, "learning_rate": 6.900107411385607e-06, "loss": 0.0001, "step": 5885 }, { "epoch": 0.37932590062512084, "grad_norm": 0.0004343660014682446, "learning_rate": 6.899391335481561e-06, "loss": 0.0, "step": 5886 }, { "epoch": 0.37939034607205, "grad_norm": 0.00027411815184107786, "learning_rate": 6.898675259577515e-06, "loss": 0.0, "step": 5887 }, { "epoch": 0.3794547915189792, "grad_norm": 0.018313065196746546, "learning_rate": 6.8979591836734705e-06, "loss": 0.0, "step": 5888 }, { "epoch": 0.37951923696590834, "grad_norm": 0.05844586306158265, "learning_rate": 6.897243107769425e-06, "loss": 0.0001, "step": 5889 }, { "epoch": 0.3795836824128375, "grad_norm": 0.008817164212300388, "learning_rate": 6.896527031865379e-06, "loss": 0.0, "step": 5890 }, { "epoch": 0.3796481278597667, "grad_norm": 0.440502219983676, "learning_rate": 6.8958109559613326e-06, "loss": 0.0005, "step": 5891 }, { "epoch": 0.3797125733066959, "grad_norm": 0.03877985442103465, "learning_rate": 6.895094880057287e-06, "loss": 0.0001, "step": 5892 }, { "epoch": 0.3797770187536251, "grad_norm": 0.0006761241313574944, "learning_rate": 6.894378804153241e-06, "loss": 0.0, "step": 5893 }, { "epoch": 0.37984146420055426, "grad_norm": 0.0003246361163867012, "learning_rate": 6.8936627282491955e-06, "loss": 0.0, "step": 5894 }, { "epoch": 0.3799059096474834, "grad_norm": 0.001269483495536825, "learning_rate": 6.89294665234515e-06, "loss": 0.0, "step": 5895 }, { "epoch": 0.37997035509441257, "grad_norm": 0.6018471230283085, "learning_rate": 6.892230576441103e-06, "loss": 0.003, "step": 5896 }, { "epoch": 0.38003480054134176, "grad_norm": 0.00046990029288776074, "learning_rate": 6.8915145005370575e-06, "loss": 0.0, "step": 5897 }, { "epoch": 0.38009924598827094, "grad_norm": 0.2449174500328802, "learning_rate": 6.890798424633012e-06, "loss": 0.0001, "step": 5898 }, { "epoch": 0.3801636914352001, "grad_norm": 0.00953486729292568, "learning_rate": 6.890082348728966e-06, "loss": 0.0, "step": 5899 }, { "epoch": 0.38022813688212925, "grad_norm": 0.0004494877435115907, "learning_rate": 6.88936627282492e-06, "loss": 0.0, "step": 5900 }, { "epoch": 0.38029258232905844, "grad_norm": 0.007622277277941377, "learning_rate": 6.888650196920874e-06, "loss": 0.0, "step": 5901 }, { "epoch": 0.3803570277759876, "grad_norm": 0.00021103624434805576, "learning_rate": 6.887934121016828e-06, "loss": 0.0, "step": 5902 }, { "epoch": 0.3804214732229168, "grad_norm": 0.00270139356453862, "learning_rate": 6.8872180451127825e-06, "loss": 0.0, "step": 5903 }, { "epoch": 0.380485918669846, "grad_norm": 0.01119123283242716, "learning_rate": 6.886501969208737e-06, "loss": 0.0001, "step": 5904 }, { "epoch": 0.3805503641167752, "grad_norm": 0.0005282343466651458, "learning_rate": 6.88578589330469e-06, "loss": 0.0, "step": 5905 }, { "epoch": 0.3806148095637043, "grad_norm": 2.001432859226168, "learning_rate": 6.8850698174006445e-06, "loss": 0.0158, "step": 5906 }, { "epoch": 0.3806792550106335, "grad_norm": 0.001497510037133082, "learning_rate": 6.884353741496599e-06, "loss": 0.0, "step": 5907 }, { "epoch": 0.3807437004575627, "grad_norm": 0.0058557848933884354, "learning_rate": 6.883637665592553e-06, "loss": 0.0, "step": 5908 }, { "epoch": 0.38080814590449186, "grad_norm": 0.15156723462407826, "learning_rate": 6.882921589688507e-06, "loss": 0.0003, "step": 5909 }, { "epoch": 0.38087259135142104, "grad_norm": 0.0022827647541258317, "learning_rate": 6.882205513784461e-06, "loss": 0.0, "step": 5910 }, { "epoch": 0.38093703679835017, "grad_norm": 0.001730530080398193, "learning_rate": 6.881489437880415e-06, "loss": 0.0, "step": 5911 }, { "epoch": 0.38100148224527935, "grad_norm": 0.0014464979963454568, "learning_rate": 6.88077336197637e-06, "loss": 0.0, "step": 5912 }, { "epoch": 0.38106592769220854, "grad_norm": 0.0013056269425528884, "learning_rate": 6.880057286072325e-06, "loss": 0.0, "step": 5913 }, { "epoch": 0.3811303731391377, "grad_norm": 0.000246217387139311, "learning_rate": 6.879341210168279e-06, "loss": 0.0, "step": 5914 }, { "epoch": 0.3811948185860669, "grad_norm": 0.0029653896752515046, "learning_rate": 6.878625134264233e-06, "loss": 0.0, "step": 5915 }, { "epoch": 0.3812592640329961, "grad_norm": 0.004897105078602228, "learning_rate": 6.8779090583601875e-06, "loss": 0.0001, "step": 5916 }, { "epoch": 0.3813237094799252, "grad_norm": 0.0006053366262376849, "learning_rate": 6.877192982456141e-06, "loss": 0.0, "step": 5917 }, { "epoch": 0.3813881549268544, "grad_norm": 0.0014126298196204864, "learning_rate": 6.876476906552095e-06, "loss": 0.0, "step": 5918 }, { "epoch": 0.3814526003737836, "grad_norm": 0.020675918183340834, "learning_rate": 6.8757608306480495e-06, "loss": 0.0002, "step": 5919 }, { "epoch": 0.3815170458207128, "grad_norm": 0.0027330285181974686, "learning_rate": 6.875044754744004e-06, "loss": 0.0, "step": 5920 }, { "epoch": 0.38158149126764196, "grad_norm": 0.020531958035086353, "learning_rate": 6.874328678839957e-06, "loss": 0.0, "step": 5921 }, { "epoch": 0.3816459367145711, "grad_norm": 0.49557236434042445, "learning_rate": 6.873612602935912e-06, "loss": 0.002, "step": 5922 }, { "epoch": 0.3817103821615003, "grad_norm": 0.0013880445470229727, "learning_rate": 6.872896527031866e-06, "loss": 0.0, "step": 5923 }, { "epoch": 0.38177482760842946, "grad_norm": 0.001162689409539696, "learning_rate": 6.87218045112782e-06, "loss": 0.0, "step": 5924 }, { "epoch": 0.38183927305535864, "grad_norm": 0.11035111520052028, "learning_rate": 6.8714643752237745e-06, "loss": 0.0017, "step": 5925 }, { "epoch": 0.3819037185022878, "grad_norm": 0.0007650463249493047, "learning_rate": 6.870748299319728e-06, "loss": 0.0, "step": 5926 }, { "epoch": 0.381968163949217, "grad_norm": 0.019925055939437754, "learning_rate": 6.870032223415682e-06, "loss": 0.0001, "step": 5927 }, { "epoch": 0.38203260939614614, "grad_norm": 0.014557531030572302, "learning_rate": 6.8693161475116365e-06, "loss": 0.0, "step": 5928 }, { "epoch": 0.3820970548430753, "grad_norm": 0.0002573063824412536, "learning_rate": 6.868600071607591e-06, "loss": 0.0, "step": 5929 }, { "epoch": 0.3821615002900045, "grad_norm": 0.0003335346718924549, "learning_rate": 6.867883995703545e-06, "loss": 0.0, "step": 5930 }, { "epoch": 0.3822259457369337, "grad_norm": 0.0008118441796438887, "learning_rate": 6.867167919799499e-06, "loss": 0.0, "step": 5931 }, { "epoch": 0.3822903911838629, "grad_norm": 0.005326369326512274, "learning_rate": 6.866451843895453e-06, "loss": 0.0, "step": 5932 }, { "epoch": 0.38235483663079206, "grad_norm": 0.005278252984737066, "learning_rate": 6.865735767991407e-06, "loss": 0.0, "step": 5933 }, { "epoch": 0.3824192820777212, "grad_norm": 0.03136676899392069, "learning_rate": 6.8650196920873615e-06, "loss": 0.0001, "step": 5934 }, { "epoch": 0.3824837275246504, "grad_norm": 0.0009959009747822732, "learning_rate": 6.864303616183317e-06, "loss": 0.0, "step": 5935 }, { "epoch": 0.38254817297157956, "grad_norm": 0.00042236544398543236, "learning_rate": 6.863587540279271e-06, "loss": 0.0, "step": 5936 }, { "epoch": 0.38261261841850874, "grad_norm": 0.0006273076220670741, "learning_rate": 6.862871464375224e-06, "loss": 0.0, "step": 5937 }, { "epoch": 0.3826770638654379, "grad_norm": 0.013919804187017291, "learning_rate": 6.862155388471179e-06, "loss": 0.0, "step": 5938 }, { "epoch": 0.38274150931236706, "grad_norm": 0.006399515087818912, "learning_rate": 6.861439312567133e-06, "loss": 0.0, "step": 5939 }, { "epoch": 0.38280595475929624, "grad_norm": 0.0009560729498972583, "learning_rate": 6.860723236663087e-06, "loss": 0.0, "step": 5940 }, { "epoch": 0.3828704002062254, "grad_norm": 0.0574728807346303, "learning_rate": 6.8600071607590416e-06, "loss": 0.0002, "step": 5941 }, { "epoch": 0.3829348456531546, "grad_norm": 0.005010008731110634, "learning_rate": 6.859291084854995e-06, "loss": 0.0, "step": 5942 }, { "epoch": 0.3829992911000838, "grad_norm": 0.010932127051166089, "learning_rate": 6.858575008950949e-06, "loss": 0.0001, "step": 5943 }, { "epoch": 0.383063736547013, "grad_norm": 0.24358331776633155, "learning_rate": 6.857858933046904e-06, "loss": 0.0002, "step": 5944 }, { "epoch": 0.3831281819939421, "grad_norm": 0.005314563855596481, "learning_rate": 6.857142857142858e-06, "loss": 0.0, "step": 5945 }, { "epoch": 0.3831926274408713, "grad_norm": 0.005676835426711824, "learning_rate": 6.856426781238812e-06, "loss": 0.0, "step": 5946 }, { "epoch": 0.3832570728878005, "grad_norm": 0.0003028851649869988, "learning_rate": 6.855710705334766e-06, "loss": 0.0, "step": 5947 }, { "epoch": 0.38332151833472966, "grad_norm": 0.29716372253412965, "learning_rate": 6.85499462943072e-06, "loss": 0.0332, "step": 5948 }, { "epoch": 0.38338596378165885, "grad_norm": 0.001941066789563445, "learning_rate": 6.854278553526674e-06, "loss": 0.0, "step": 5949 }, { "epoch": 0.383450409228588, "grad_norm": 0.21121318779958853, "learning_rate": 6.8535624776226286e-06, "loss": 0.0013, "step": 5950 }, { "epoch": 0.38351485467551716, "grad_norm": 0.023995077734359705, "learning_rate": 6.852846401718583e-06, "loss": 0.0001, "step": 5951 }, { "epoch": 0.38357930012244634, "grad_norm": 0.05211629073163899, "learning_rate": 6.852130325814536e-06, "loss": 0.0004, "step": 5952 }, { "epoch": 0.3836437455693755, "grad_norm": 0.000493550789505138, "learning_rate": 6.851414249910491e-06, "loss": 0.0, "step": 5953 }, { "epoch": 0.3837081910163047, "grad_norm": 0.0003923175907974961, "learning_rate": 6.850698174006445e-06, "loss": 0.0, "step": 5954 }, { "epoch": 0.3837726364632339, "grad_norm": 0.009923200087034062, "learning_rate": 6.849982098102399e-06, "loss": 0.0, "step": 5955 }, { "epoch": 0.383837081910163, "grad_norm": 0.03499818907090998, "learning_rate": 6.8492660221983535e-06, "loss": 0.0001, "step": 5956 }, { "epoch": 0.3839015273570922, "grad_norm": 0.0037882630463259115, "learning_rate": 6.848549946294307e-06, "loss": 0.0, "step": 5957 }, { "epoch": 0.3839659728040214, "grad_norm": 0.00421029141255445, "learning_rate": 6.847833870390261e-06, "loss": 0.0, "step": 5958 }, { "epoch": 0.3840304182509506, "grad_norm": 0.0005011436316182269, "learning_rate": 6.847117794486216e-06, "loss": 0.0, "step": 5959 }, { "epoch": 0.38409486369787976, "grad_norm": 0.014270384782207152, "learning_rate": 6.846401718582171e-06, "loss": 0.0, "step": 5960 }, { "epoch": 0.38415930914480895, "grad_norm": 0.32118606075803946, "learning_rate": 6.845685642678125e-06, "loss": 0.0023, "step": 5961 }, { "epoch": 0.3842237545917381, "grad_norm": 0.0011372247327576077, "learning_rate": 6.844969566774079e-06, "loss": 0.0, "step": 5962 }, { "epoch": 0.38428820003866726, "grad_norm": 0.00047110262412149026, "learning_rate": 6.844253490870033e-06, "loss": 0.0, "step": 5963 }, { "epoch": 0.38435264548559644, "grad_norm": 0.004175332756483595, "learning_rate": 6.843537414965987e-06, "loss": 0.0, "step": 5964 }, { "epoch": 0.38441709093252563, "grad_norm": 0.00842632735103172, "learning_rate": 6.842821339061941e-06, "loss": 0.0001, "step": 5965 }, { "epoch": 0.3844815363794548, "grad_norm": 0.001385774924796425, "learning_rate": 6.842105263157896e-06, "loss": 0.0, "step": 5966 }, { "epoch": 0.38454598182638394, "grad_norm": 0.00960274895131003, "learning_rate": 6.84138918725385e-06, "loss": 0.0001, "step": 5967 }, { "epoch": 0.3846104272733131, "grad_norm": 0.0014732819087795253, "learning_rate": 6.840673111349803e-06, "loss": 0.0, "step": 5968 }, { "epoch": 0.3846748727202423, "grad_norm": 0.0019206416515151602, "learning_rate": 6.839957035445758e-06, "loss": 0.0, "step": 5969 }, { "epoch": 0.3847393181671715, "grad_norm": 0.010897749206447984, "learning_rate": 6.839240959541712e-06, "loss": 0.0001, "step": 5970 }, { "epoch": 0.3848037636141007, "grad_norm": 0.003925340168642234, "learning_rate": 6.838524883637666e-06, "loss": 0.0, "step": 5971 }, { "epoch": 0.38486820906102986, "grad_norm": 0.012405496974591106, "learning_rate": 6.837808807733621e-06, "loss": 0.0, "step": 5972 }, { "epoch": 0.384932654507959, "grad_norm": 0.06288781766963841, "learning_rate": 6.837092731829574e-06, "loss": 0.0001, "step": 5973 }, { "epoch": 0.3849970999548882, "grad_norm": 0.09090745498393638, "learning_rate": 6.836376655925528e-06, "loss": 0.0, "step": 5974 }, { "epoch": 0.38506154540181736, "grad_norm": 0.0006160974128895556, "learning_rate": 6.835660580021483e-06, "loss": 0.0, "step": 5975 }, { "epoch": 0.38512599084874655, "grad_norm": 0.0007932396092801955, "learning_rate": 6.834944504117437e-06, "loss": 0.0, "step": 5976 }, { "epoch": 0.38519043629567573, "grad_norm": 0.05446525313130436, "learning_rate": 6.834228428213391e-06, "loss": 0.0001, "step": 5977 }, { "epoch": 0.38525488174260486, "grad_norm": 0.006795695546119066, "learning_rate": 6.833512352309345e-06, "loss": 0.0, "step": 5978 }, { "epoch": 0.38531932718953404, "grad_norm": 0.2266163268767816, "learning_rate": 6.832796276405299e-06, "loss": 0.0017, "step": 5979 }, { "epoch": 0.38538377263646323, "grad_norm": 0.1509233551229267, "learning_rate": 6.832080200501253e-06, "loss": 0.0018, "step": 5980 }, { "epoch": 0.3854482180833924, "grad_norm": 0.0038038159387376977, "learning_rate": 6.831364124597208e-06, "loss": 0.0, "step": 5981 }, { "epoch": 0.3855126635303216, "grad_norm": 0.11071187248613129, "learning_rate": 6.830648048693163e-06, "loss": 0.0001, "step": 5982 }, { "epoch": 0.3855771089772508, "grad_norm": 0.008326152462626822, "learning_rate": 6.829931972789117e-06, "loss": 0.0001, "step": 5983 }, { "epoch": 0.3856415544241799, "grad_norm": 0.0029565684972451584, "learning_rate": 6.8292158968850705e-06, "loss": 0.0, "step": 5984 }, { "epoch": 0.3857059998711091, "grad_norm": 0.5216882714265499, "learning_rate": 6.828499820981025e-06, "loss": 0.0014, "step": 5985 }, { "epoch": 0.3857704453180383, "grad_norm": 0.002250543081538071, "learning_rate": 6.827783745076979e-06, "loss": 0.0, "step": 5986 }, { "epoch": 0.38583489076496746, "grad_norm": 0.2361621589188941, "learning_rate": 6.827067669172933e-06, "loss": 0.0011, "step": 5987 }, { "epoch": 0.38589933621189665, "grad_norm": 0.031054470860727275, "learning_rate": 6.826351593268888e-06, "loss": 0.0001, "step": 5988 }, { "epoch": 0.3859637816588258, "grad_norm": 0.014908333265461695, "learning_rate": 6.825635517364841e-06, "loss": 0.0001, "step": 5989 }, { "epoch": 0.38602822710575496, "grad_norm": 0.042477362343784304, "learning_rate": 6.8249194414607954e-06, "loss": 0.0001, "step": 5990 }, { "epoch": 0.38609267255268415, "grad_norm": 0.011556256262070665, "learning_rate": 6.82420336555675e-06, "loss": 0.0, "step": 5991 }, { "epoch": 0.38615711799961333, "grad_norm": 0.0008571054739306326, "learning_rate": 6.823487289652704e-06, "loss": 0.0, "step": 5992 }, { "epoch": 0.3862215634465425, "grad_norm": 0.00012599860103708978, "learning_rate": 6.822771213748658e-06, "loss": 0.0, "step": 5993 }, { "epoch": 0.3862860088934717, "grad_norm": 0.016258295239014172, "learning_rate": 6.822055137844612e-06, "loss": 0.0001, "step": 5994 }, { "epoch": 0.38635045434040083, "grad_norm": 0.007365234092956695, "learning_rate": 6.821339061940566e-06, "loss": 0.0, "step": 5995 }, { "epoch": 0.38641489978733, "grad_norm": 0.010457210348021977, "learning_rate": 6.82062298603652e-06, "loss": 0.0, "step": 5996 }, { "epoch": 0.3864793452342592, "grad_norm": 0.0017911283307170947, "learning_rate": 6.819906910132475e-06, "loss": 0.0, "step": 5997 }, { "epoch": 0.3865437906811884, "grad_norm": 0.021666974075202527, "learning_rate": 6.819190834228428e-06, "loss": 0.0001, "step": 5998 }, { "epoch": 0.38660823612811757, "grad_norm": 0.09290394888785891, "learning_rate": 6.8184747583243824e-06, "loss": 0.001, "step": 5999 }, { "epoch": 0.38667268157504675, "grad_norm": 0.04081432866768783, "learning_rate": 6.817758682420337e-06, "loss": 0.0, "step": 6000 }, { "epoch": 0.3867371270219759, "grad_norm": 0.0030581764431921246, "learning_rate": 6.817042606516291e-06, "loss": 0.0, "step": 6001 }, { "epoch": 0.38680157246890506, "grad_norm": 0.04727984482479228, "learning_rate": 6.816326530612245e-06, "loss": 0.0001, "step": 6002 }, { "epoch": 0.38686601791583425, "grad_norm": 0.07452768727150452, "learning_rate": 6.815610454708199e-06, "loss": 0.0001, "step": 6003 }, { "epoch": 0.38693046336276343, "grad_norm": 0.20755748626077528, "learning_rate": 6.814894378804153e-06, "loss": 0.0005, "step": 6004 }, { "epoch": 0.3869949088096926, "grad_norm": 0.006755925785793144, "learning_rate": 6.814178302900108e-06, "loss": 0.0, "step": 6005 }, { "epoch": 0.38705935425662175, "grad_norm": 0.17547190254891795, "learning_rate": 6.8134622269960625e-06, "loss": 0.0002, "step": 6006 }, { "epoch": 0.38712379970355093, "grad_norm": 0.012661615225328758, "learning_rate": 6.812746151092017e-06, "loss": 0.0002, "step": 6007 }, { "epoch": 0.3871882451504801, "grad_norm": 0.003090067111566923, "learning_rate": 6.812030075187971e-06, "loss": 0.0, "step": 6008 }, { "epoch": 0.3872526905974093, "grad_norm": 0.19586265328413646, "learning_rate": 6.811313999283925e-06, "loss": 0.0012, "step": 6009 }, { "epoch": 0.3873171360443385, "grad_norm": 4.473975694094967, "learning_rate": 6.810597923379879e-06, "loss": 0.0279, "step": 6010 }, { "epoch": 0.38738158149126767, "grad_norm": 0.2445356530106942, "learning_rate": 6.809881847475833e-06, "loss": 0.0051, "step": 6011 }, { "epoch": 0.3874460269381968, "grad_norm": 0.032982293469763065, "learning_rate": 6.8091657715717875e-06, "loss": 0.0003, "step": 6012 }, { "epoch": 0.387510472385126, "grad_norm": 0.009209936588663899, "learning_rate": 6.808449695667742e-06, "loss": 0.0, "step": 6013 }, { "epoch": 0.38757491783205517, "grad_norm": 0.00518529117122379, "learning_rate": 6.807733619763695e-06, "loss": 0.0001, "step": 6014 }, { "epoch": 0.38763936327898435, "grad_norm": 0.07908431500910323, "learning_rate": 6.8070175438596495e-06, "loss": 0.0003, "step": 6015 }, { "epoch": 0.38770380872591353, "grad_norm": 0.27541477016357735, "learning_rate": 6.806301467955604e-06, "loss": 0.0022, "step": 6016 }, { "epoch": 0.38776825417284266, "grad_norm": 0.006634024515463209, "learning_rate": 6.805585392051558e-06, "loss": 0.0, "step": 6017 }, { "epoch": 0.38783269961977185, "grad_norm": 0.011857648203153355, "learning_rate": 6.804869316147512e-06, "loss": 0.0, "step": 6018 }, { "epoch": 0.38789714506670103, "grad_norm": 0.0028286459308458976, "learning_rate": 6.804153240243466e-06, "loss": 0.0, "step": 6019 }, { "epoch": 0.3879615905136302, "grad_norm": 0.01959234457512042, "learning_rate": 6.80343716433942e-06, "loss": 0.0, "step": 6020 }, { "epoch": 0.3880260359605594, "grad_norm": 0.5422197019045307, "learning_rate": 6.8027210884353745e-06, "loss": 0.0033, "step": 6021 }, { "epoch": 0.3880904814074886, "grad_norm": 0.2587556781468276, "learning_rate": 6.802005012531329e-06, "loss": 0.0003, "step": 6022 }, { "epoch": 0.3881549268544177, "grad_norm": 0.008435287658794083, "learning_rate": 6.801288936627283e-06, "loss": 0.0, "step": 6023 }, { "epoch": 0.3882193723013469, "grad_norm": 0.01238562473581905, "learning_rate": 6.8005728607232365e-06, "loss": 0.0, "step": 6024 }, { "epoch": 0.3882838177482761, "grad_norm": 1.987109665357907, "learning_rate": 6.799856784819191e-06, "loss": 0.0106, "step": 6025 }, { "epoch": 0.38834826319520527, "grad_norm": 0.04223822440592762, "learning_rate": 6.799140708915145e-06, "loss": 0.0001, "step": 6026 }, { "epoch": 0.38841270864213445, "grad_norm": 0.002983615328333293, "learning_rate": 6.798424633011099e-06, "loss": 0.0, "step": 6027 }, { "epoch": 0.3884771540890636, "grad_norm": 0.14209061717448843, "learning_rate": 6.797708557107054e-06, "loss": 0.0002, "step": 6028 }, { "epoch": 0.38854159953599277, "grad_norm": 0.07302858039418998, "learning_rate": 6.796992481203009e-06, "loss": 0.0001, "step": 6029 }, { "epoch": 0.38860604498292195, "grad_norm": 0.03682687955726151, "learning_rate": 6.796276405298962e-06, "loss": 0.0, "step": 6030 }, { "epoch": 0.38867049042985113, "grad_norm": 0.009217186042876992, "learning_rate": 6.795560329394917e-06, "loss": 0.0001, "step": 6031 }, { "epoch": 0.3887349358767803, "grad_norm": 0.00352494553698736, "learning_rate": 6.794844253490871e-06, "loss": 0.0, "step": 6032 }, { "epoch": 0.3887993813237095, "grad_norm": 0.029486892316410652, "learning_rate": 6.794128177586825e-06, "loss": 0.0, "step": 6033 }, { "epoch": 0.38886382677063863, "grad_norm": 0.00390147836480369, "learning_rate": 6.7934121016827795e-06, "loss": 0.0, "step": 6034 }, { "epoch": 0.3889282722175678, "grad_norm": 0.013006879238566681, "learning_rate": 6.792696025778733e-06, "loss": 0.0, "step": 6035 }, { "epoch": 0.388992717664497, "grad_norm": 0.11093205133006082, "learning_rate": 6.791979949874687e-06, "loss": 0.0002, "step": 6036 }, { "epoch": 0.3890571631114262, "grad_norm": 0.06791830821969097, "learning_rate": 6.7912638739706415e-06, "loss": 0.0001, "step": 6037 }, { "epoch": 0.38912160855835537, "grad_norm": 0.003451845629040953, "learning_rate": 6.790547798066596e-06, "loss": 0.0, "step": 6038 }, { "epoch": 0.38918605400528455, "grad_norm": 0.09567374634863354, "learning_rate": 6.78983172216255e-06, "loss": 0.0001, "step": 6039 }, { "epoch": 0.3892504994522137, "grad_norm": 0.08102874455585664, "learning_rate": 6.789115646258504e-06, "loss": 0.0001, "step": 6040 }, { "epoch": 0.38931494489914287, "grad_norm": 0.09263870092366862, "learning_rate": 6.788399570354458e-06, "loss": 0.0002, "step": 6041 }, { "epoch": 0.38937939034607205, "grad_norm": 0.058662957804021545, "learning_rate": 6.787683494450412e-06, "loss": 0.0001, "step": 6042 }, { "epoch": 0.38944383579300124, "grad_norm": 0.028517596008494257, "learning_rate": 6.7869674185463665e-06, "loss": 0.0, "step": 6043 }, { "epoch": 0.3895082812399304, "grad_norm": 0.4768442821733463, "learning_rate": 6.786251342642321e-06, "loss": 0.0004, "step": 6044 }, { "epoch": 0.38957272668685955, "grad_norm": 0.07491564763077205, "learning_rate": 6.785535266738274e-06, "loss": 0.0002, "step": 6045 }, { "epoch": 0.38963717213378873, "grad_norm": 0.13349859351138746, "learning_rate": 6.7848191908342285e-06, "loss": 0.0002, "step": 6046 }, { "epoch": 0.3897016175807179, "grad_norm": 0.01829255571236091, "learning_rate": 6.784103114930183e-06, "loss": 0.0001, "step": 6047 }, { "epoch": 0.3897660630276471, "grad_norm": 0.005630099985821416, "learning_rate": 6.783387039026137e-06, "loss": 0.0, "step": 6048 }, { "epoch": 0.3898305084745763, "grad_norm": 0.008533366939692776, "learning_rate": 6.7826709631220914e-06, "loss": 0.0, "step": 6049 }, { "epoch": 0.38989495392150547, "grad_norm": 0.21604861616295312, "learning_rate": 6.781954887218045e-06, "loss": 0.0008, "step": 6050 }, { "epoch": 0.3899593993684346, "grad_norm": 0.04892616443141344, "learning_rate": 6.781238811313999e-06, "loss": 0.0001, "step": 6051 }, { "epoch": 0.3900238448153638, "grad_norm": 0.003492646098469076, "learning_rate": 6.780522735409954e-06, "loss": 0.0, "step": 6052 }, { "epoch": 0.39008829026229297, "grad_norm": 0.17101291092330484, "learning_rate": 6.779806659505909e-06, "loss": 0.0007, "step": 6053 }, { "epoch": 0.39015273570922215, "grad_norm": 0.0006625030070719406, "learning_rate": 6.779090583601863e-06, "loss": 0.0, "step": 6054 }, { "epoch": 0.39021718115615134, "grad_norm": 0.0407639185230089, "learning_rate": 6.778374507697817e-06, "loss": 0.0001, "step": 6055 }, { "epoch": 0.39028162660308047, "grad_norm": 0.017097571873581942, "learning_rate": 6.777658431793771e-06, "loss": 0.0, "step": 6056 }, { "epoch": 0.39034607205000965, "grad_norm": 0.0012163750468424425, "learning_rate": 6.776942355889725e-06, "loss": 0.0, "step": 6057 }, { "epoch": 0.39041051749693884, "grad_norm": 0.008539081604194343, "learning_rate": 6.776226279985679e-06, "loss": 0.0, "step": 6058 }, { "epoch": 0.390474962943868, "grad_norm": 0.02127820832464942, "learning_rate": 6.7755102040816336e-06, "loss": 0.0016, "step": 6059 }, { "epoch": 0.3905394083907972, "grad_norm": 0.05713715860478264, "learning_rate": 6.774794128177588e-06, "loss": 0.0002, "step": 6060 }, { "epoch": 0.3906038538377264, "grad_norm": 0.040865544077726086, "learning_rate": 6.774078052273541e-06, "loss": 0.0003, "step": 6061 }, { "epoch": 0.3906682992846555, "grad_norm": 0.0008292018234182559, "learning_rate": 6.773361976369496e-06, "loss": 0.0, "step": 6062 }, { "epoch": 0.3907327447315847, "grad_norm": 0.10168170810440107, "learning_rate": 6.77264590046545e-06, "loss": 0.0004, "step": 6063 }, { "epoch": 0.3907971901785139, "grad_norm": 0.003782680501955245, "learning_rate": 6.771929824561404e-06, "loss": 0.0, "step": 6064 }, { "epoch": 0.39086163562544307, "grad_norm": 0.07099556273375927, "learning_rate": 6.7712137486573585e-06, "loss": 0.0001, "step": 6065 }, { "epoch": 0.39092608107237226, "grad_norm": 0.0079087437697562, "learning_rate": 6.770497672753312e-06, "loss": 0.0, "step": 6066 }, { "epoch": 0.3909905265193014, "grad_norm": 0.0015870679381851005, "learning_rate": 6.769781596849266e-06, "loss": 0.0, "step": 6067 }, { "epoch": 0.39105497196623057, "grad_norm": 0.23510664546596716, "learning_rate": 6.7690655209452206e-06, "loss": 0.001, "step": 6068 }, { "epoch": 0.39111941741315975, "grad_norm": 0.00031801601169482763, "learning_rate": 6.768349445041175e-06, "loss": 0.0, "step": 6069 }, { "epoch": 0.39118386286008894, "grad_norm": 0.004575363429178179, "learning_rate": 6.767633369137129e-06, "loss": 0.0, "step": 6070 }, { "epoch": 0.3912483083070181, "grad_norm": 0.060214690750501815, "learning_rate": 6.766917293233083e-06, "loss": 0.0002, "step": 6071 }, { "epoch": 0.3913127537539473, "grad_norm": 0.0010421286728905228, "learning_rate": 6.766201217329037e-06, "loss": 0.0, "step": 6072 }, { "epoch": 0.39137719920087644, "grad_norm": 0.4869503651905972, "learning_rate": 6.765485141424991e-06, "loss": 0.0004, "step": 6073 }, { "epoch": 0.3914416446478056, "grad_norm": 0.21821732891056522, "learning_rate": 6.7647690655209455e-06, "loss": 0.0003, "step": 6074 }, { "epoch": 0.3915060900947348, "grad_norm": 0.006221830004070171, "learning_rate": 6.764052989616901e-06, "loss": 0.0, "step": 6075 }, { "epoch": 0.391570535541664, "grad_norm": 0.0037303944601452836, "learning_rate": 6.763336913712855e-06, "loss": 0.0, "step": 6076 }, { "epoch": 0.3916349809885932, "grad_norm": 0.00043963797693598783, "learning_rate": 6.762620837808808e-06, "loss": 0.0, "step": 6077 }, { "epoch": 0.39169942643552236, "grad_norm": 0.3138164567095846, "learning_rate": 6.761904761904763e-06, "loss": 0.0012, "step": 6078 }, { "epoch": 0.3917638718824515, "grad_norm": 0.011132950687805643, "learning_rate": 6.761188686000717e-06, "loss": 0.0, "step": 6079 }, { "epoch": 0.39182831732938067, "grad_norm": 0.46684292043449677, "learning_rate": 6.760472610096671e-06, "loss": 0.0011, "step": 6080 }, { "epoch": 0.39189276277630986, "grad_norm": 0.0036284189679277653, "learning_rate": 6.759756534192626e-06, "loss": 0.0, "step": 6081 }, { "epoch": 0.39195720822323904, "grad_norm": 0.00039873388616156857, "learning_rate": 6.759040458288579e-06, "loss": 0.0, "step": 6082 }, { "epoch": 0.3920216536701682, "grad_norm": 0.009515007808803775, "learning_rate": 6.758324382384533e-06, "loss": 0.0, "step": 6083 }, { "epoch": 0.39208609911709735, "grad_norm": 0.05785692576468786, "learning_rate": 6.757608306480488e-06, "loss": 0.0002, "step": 6084 }, { "epoch": 0.39215054456402654, "grad_norm": 0.00735022972663383, "learning_rate": 6.756892230576442e-06, "loss": 0.0, "step": 6085 }, { "epoch": 0.3922149900109557, "grad_norm": 0.0009415286435785793, "learning_rate": 6.756176154672396e-06, "loss": 0.0, "step": 6086 }, { "epoch": 0.3922794354578849, "grad_norm": 0.02875433070604497, "learning_rate": 6.75546007876835e-06, "loss": 0.0, "step": 6087 }, { "epoch": 0.3923438809048141, "grad_norm": 0.04135847135380983, "learning_rate": 6.754744002864304e-06, "loss": 0.0, "step": 6088 }, { "epoch": 0.3924083263517433, "grad_norm": 0.004634106539941361, "learning_rate": 6.754027926960258e-06, "loss": 0.0, "step": 6089 }, { "epoch": 0.3924727717986724, "grad_norm": 0.37803271956724577, "learning_rate": 6.753311851056213e-06, "loss": 0.003, "step": 6090 }, { "epoch": 0.3925372172456016, "grad_norm": 0.0013209306774567687, "learning_rate": 6.752595775152166e-06, "loss": 0.0, "step": 6091 }, { "epoch": 0.3926016626925308, "grad_norm": 0.008602022225160995, "learning_rate": 6.75187969924812e-06, "loss": 0.0, "step": 6092 }, { "epoch": 0.39266610813945996, "grad_norm": 0.0026656787122006495, "learning_rate": 6.751163623344075e-06, "loss": 0.0, "step": 6093 }, { "epoch": 0.39273055358638914, "grad_norm": 0.12914930173706923, "learning_rate": 6.750447547440029e-06, "loss": 0.0003, "step": 6094 }, { "epoch": 0.39279499903331827, "grad_norm": 0.10282103505984658, "learning_rate": 6.749731471535983e-06, "loss": 0.0001, "step": 6095 }, { "epoch": 0.39285944448024746, "grad_norm": 0.013621648850027368, "learning_rate": 6.749015395631937e-06, "loss": 0.0, "step": 6096 }, { "epoch": 0.39292388992717664, "grad_norm": 0.038448006543109814, "learning_rate": 6.748299319727891e-06, "loss": 0.0001, "step": 6097 }, { "epoch": 0.3929883353741058, "grad_norm": 0.044204079857528784, "learning_rate": 6.747583243823845e-06, "loss": 0.0006, "step": 6098 }, { "epoch": 0.393052780821035, "grad_norm": 0.046062306311024155, "learning_rate": 6.7468671679198004e-06, "loss": 0.0007, "step": 6099 }, { "epoch": 0.3931172262679642, "grad_norm": 0.007857325413024335, "learning_rate": 6.746151092015755e-06, "loss": 0.0001, "step": 6100 }, { "epoch": 0.3931816717148933, "grad_norm": 0.01512873154302197, "learning_rate": 6.745435016111709e-06, "loss": 0.0001, "step": 6101 }, { "epoch": 0.3932461171618225, "grad_norm": 0.0032276045978629686, "learning_rate": 6.744718940207663e-06, "loss": 0.0, "step": 6102 }, { "epoch": 0.3933105626087517, "grad_norm": 0.0009057025388732807, "learning_rate": 6.744002864303617e-06, "loss": 0.0, "step": 6103 }, { "epoch": 0.3933750080556809, "grad_norm": 0.03844354321300612, "learning_rate": 6.743286788399571e-06, "loss": 0.0001, "step": 6104 }, { "epoch": 0.39343945350261006, "grad_norm": 0.0012272262348689547, "learning_rate": 6.742570712495525e-06, "loss": 0.0, "step": 6105 }, { "epoch": 0.3935038989495392, "grad_norm": 0.001039082765129518, "learning_rate": 6.74185463659148e-06, "loss": 0.0, "step": 6106 }, { "epoch": 0.3935683443964684, "grad_norm": 0.11303522260833805, "learning_rate": 6.741138560687433e-06, "loss": 0.0003, "step": 6107 }, { "epoch": 0.39363278984339756, "grad_norm": 0.14047053251100844, "learning_rate": 6.7404224847833874e-06, "loss": 0.0009, "step": 6108 }, { "epoch": 0.39369723529032674, "grad_norm": 0.00028781397467615895, "learning_rate": 6.739706408879342e-06, "loss": 0.0, "step": 6109 }, { "epoch": 0.3937616807372559, "grad_norm": 0.0004087873048829891, "learning_rate": 6.738990332975296e-06, "loss": 0.0, "step": 6110 }, { "epoch": 0.3938261261841851, "grad_norm": 0.00040317603909862624, "learning_rate": 6.73827425707125e-06, "loss": 0.0, "step": 6111 }, { "epoch": 0.39389057163111424, "grad_norm": 0.00040317603909862624, "learning_rate": 6.73827425707125e-06, "loss": 0.0043, "step": 6112 }, { "epoch": 0.3939550170780434, "grad_norm": 0.0003019461882047249, "learning_rate": 6.737558181167204e-06, "loss": 0.0, "step": 6113 }, { "epoch": 0.3940194625249726, "grad_norm": 0.006519207135955577, "learning_rate": 6.736842105263158e-06, "loss": 0.0, "step": 6114 }, { "epoch": 0.3940839079719018, "grad_norm": 0.0011064302177114479, "learning_rate": 6.736126029359112e-06, "loss": 0.0, "step": 6115 }, { "epoch": 0.394148353418831, "grad_norm": 0.0011027793071489889, "learning_rate": 6.735409953455067e-06, "loss": 0.0, "step": 6116 }, { "epoch": 0.39421279886576016, "grad_norm": 0.003786054726373903, "learning_rate": 6.734693877551021e-06, "loss": 0.0, "step": 6117 }, { "epoch": 0.3942772443126893, "grad_norm": 0.28807663983357745, "learning_rate": 6.7339778016469744e-06, "loss": 0.0002, "step": 6118 }, { "epoch": 0.3943416897596185, "grad_norm": 0.19951675602869184, "learning_rate": 6.733261725742929e-06, "loss": 0.0021, "step": 6119 }, { "epoch": 0.39440613520654766, "grad_norm": 0.0014406398795617093, "learning_rate": 6.732545649838883e-06, "loss": 0.0, "step": 6120 }, { "epoch": 0.39447058065347684, "grad_norm": 0.05896528389795827, "learning_rate": 6.731829573934837e-06, "loss": 0.0002, "step": 6121 }, { "epoch": 0.394535026100406, "grad_norm": 0.0004485638853455591, "learning_rate": 6.731113498030792e-06, "loss": 0.0, "step": 6122 }, { "epoch": 0.39459947154733516, "grad_norm": 0.000655233336787404, "learning_rate": 6.730397422126747e-06, "loss": 0.0, "step": 6123 }, { "epoch": 0.39466391699426434, "grad_norm": 1.2452657214560217, "learning_rate": 6.7296813462227e-06, "loss": 0.0059, "step": 6124 }, { "epoch": 0.3947283624411935, "grad_norm": 0.006466952664939254, "learning_rate": 6.7289652703186545e-06, "loss": 0.0, "step": 6125 }, { "epoch": 0.3947928078881227, "grad_norm": 0.11695563902920848, "learning_rate": 6.728249194414609e-06, "loss": 0.0009, "step": 6126 }, { "epoch": 0.3948572533350519, "grad_norm": 0.0054357978734342165, "learning_rate": 6.727533118510563e-06, "loss": 0.0, "step": 6127 }, { "epoch": 0.3949216987819811, "grad_norm": 0.010678078004628366, "learning_rate": 6.726817042606517e-06, "loss": 0.0, "step": 6128 }, { "epoch": 0.3949861442289102, "grad_norm": 0.004239491570611307, "learning_rate": 6.726100966702471e-06, "loss": 0.0, "step": 6129 }, { "epoch": 0.3950505896758394, "grad_norm": 0.4474075958280737, "learning_rate": 6.725384890798425e-06, "loss": 0.0036, "step": 6130 }, { "epoch": 0.3951150351227686, "grad_norm": 0.008688036460937971, "learning_rate": 6.7246688148943795e-06, "loss": 0.0, "step": 6131 }, { "epoch": 0.39517948056969776, "grad_norm": 0.0029086975973019197, "learning_rate": 6.723952738990334e-06, "loss": 0.0, "step": 6132 }, { "epoch": 0.39524392601662695, "grad_norm": 0.00804262993046448, "learning_rate": 6.723236663086288e-06, "loss": 0.0001, "step": 6133 }, { "epoch": 0.3953083714635561, "grad_norm": 0.001383582353238081, "learning_rate": 6.7225205871822415e-06, "loss": 0.0, "step": 6134 }, { "epoch": 0.39537281691048526, "grad_norm": 0.002657203034535678, "learning_rate": 6.721804511278196e-06, "loss": 0.0, "step": 6135 }, { "epoch": 0.39543726235741444, "grad_norm": 0.22318623860445258, "learning_rate": 6.72108843537415e-06, "loss": 0.0007, "step": 6136 }, { "epoch": 0.3955017078043436, "grad_norm": 0.060305021991659505, "learning_rate": 6.720372359470104e-06, "loss": 0.0001, "step": 6137 }, { "epoch": 0.3955661532512728, "grad_norm": 0.005253792241862154, "learning_rate": 6.719656283566059e-06, "loss": 0.0, "step": 6138 }, { "epoch": 0.395630598698202, "grad_norm": 0.0004886858584370133, "learning_rate": 6.718940207662012e-06, "loss": 0.0, "step": 6139 }, { "epoch": 0.3956950441451311, "grad_norm": 0.005571864209276483, "learning_rate": 6.7182241317579665e-06, "loss": 0.0, "step": 6140 }, { "epoch": 0.3957594895920603, "grad_norm": 0.0005897999561558652, "learning_rate": 6.717508055853921e-06, "loss": 0.0, "step": 6141 }, { "epoch": 0.3958239350389895, "grad_norm": 0.018974760515579954, "learning_rate": 6.716791979949875e-06, "loss": 0.0, "step": 6142 }, { "epoch": 0.3958883804859187, "grad_norm": 0.005510618869750594, "learning_rate": 6.716075904045829e-06, "loss": 0.0, "step": 6143 }, { "epoch": 0.39595282593284786, "grad_norm": 0.005582761552538098, "learning_rate": 6.715359828141783e-06, "loss": 0.0, "step": 6144 }, { "epoch": 0.396017271379777, "grad_norm": 0.0009583769130436984, "learning_rate": 6.714643752237737e-06, "loss": 0.0, "step": 6145 }, { "epoch": 0.3960817168267062, "grad_norm": 0.0028119638281370062, "learning_rate": 6.713927676333692e-06, "loss": 0.0, "step": 6146 }, { "epoch": 0.39614616227363536, "grad_norm": 0.0350255340781061, "learning_rate": 6.7132116004296465e-06, "loss": 0.0001, "step": 6147 }, { "epoch": 0.39621060772056454, "grad_norm": 0.0018648797668935515, "learning_rate": 6.712495524525601e-06, "loss": 0.0, "step": 6148 }, { "epoch": 0.39627505316749373, "grad_norm": 0.0003898422824717099, "learning_rate": 6.711779448621555e-06, "loss": 0.0, "step": 6149 }, { "epoch": 0.3963394986144229, "grad_norm": 0.005369384247029605, "learning_rate": 6.711063372717509e-06, "loss": 0.0, "step": 6150 }, { "epoch": 0.39640394406135204, "grad_norm": 0.0025898006320916687, "learning_rate": 6.710347296813463e-06, "loss": 0.0, "step": 6151 }, { "epoch": 0.3964683895082812, "grad_norm": 0.05196281984407206, "learning_rate": 6.709631220909417e-06, "loss": 0.0005, "step": 6152 }, { "epoch": 0.3965328349552104, "grad_norm": 0.07452371645686365, "learning_rate": 6.7089151450053715e-06, "loss": 0.0, "step": 6153 }, { "epoch": 0.3965972804021396, "grad_norm": 0.19650088216253572, "learning_rate": 6.708199069101326e-06, "loss": 0.0024, "step": 6154 }, { "epoch": 0.3966617258490688, "grad_norm": 0.12044238442729664, "learning_rate": 6.707482993197279e-06, "loss": 0.0003, "step": 6155 }, { "epoch": 0.39672617129599796, "grad_norm": 0.3482266044446364, "learning_rate": 6.7067669172932335e-06, "loss": 0.0012, "step": 6156 }, { "epoch": 0.3967906167429271, "grad_norm": 0.011913191458644948, "learning_rate": 6.706050841389188e-06, "loss": 0.0, "step": 6157 }, { "epoch": 0.3968550621898563, "grad_norm": 0.006744656352361722, "learning_rate": 6.705334765485142e-06, "loss": 0.0, "step": 6158 }, { "epoch": 0.39691950763678546, "grad_norm": 0.0047692479281992144, "learning_rate": 6.7046186895810964e-06, "loss": 0.0, "step": 6159 }, { "epoch": 0.39698395308371465, "grad_norm": 0.39355205601269155, "learning_rate": 6.70390261367705e-06, "loss": 0.0013, "step": 6160 }, { "epoch": 0.39704839853064383, "grad_norm": 0.20822374123710258, "learning_rate": 6.703186537773004e-06, "loss": 0.001, "step": 6161 }, { "epoch": 0.39711284397757296, "grad_norm": 0.00042890634310612283, "learning_rate": 6.7024704618689585e-06, "loss": 0.0, "step": 6162 }, { "epoch": 0.39717728942450214, "grad_norm": 0.32812195731069266, "learning_rate": 6.701754385964913e-06, "loss": 0.0003, "step": 6163 }, { "epoch": 0.39724173487143133, "grad_norm": 0.19006787676608586, "learning_rate": 6.701038310060867e-06, "loss": 0.0004, "step": 6164 }, { "epoch": 0.3973061803183605, "grad_norm": 0.17109802580415745, "learning_rate": 6.7003222341568205e-06, "loss": 0.0005, "step": 6165 }, { "epoch": 0.3973706257652897, "grad_norm": 0.009018840277326589, "learning_rate": 6.699606158252775e-06, "loss": 0.0001, "step": 6166 }, { "epoch": 0.3974350712122189, "grad_norm": 0.0005974862827873541, "learning_rate": 6.698890082348729e-06, "loss": 0.0, "step": 6167 }, { "epoch": 0.397499516659148, "grad_norm": 0.0005127417844990209, "learning_rate": 6.6981740064446834e-06, "loss": 0.0, "step": 6168 }, { "epoch": 0.3975639621060772, "grad_norm": 0.013986048446789382, "learning_rate": 6.697457930540638e-06, "loss": 0.0001, "step": 6169 }, { "epoch": 0.3976284075530064, "grad_norm": 0.2875345027993739, "learning_rate": 6.696741854636593e-06, "loss": 0.0023, "step": 6170 }, { "epoch": 0.39769285299993556, "grad_norm": 0.00013710756560736585, "learning_rate": 6.696025778732546e-06, "loss": 0.0, "step": 6171 }, { "epoch": 0.39775729844686475, "grad_norm": 0.0073792311350031, "learning_rate": 6.695309702828501e-06, "loss": 0.0, "step": 6172 }, { "epoch": 0.3978217438937939, "grad_norm": 0.003827701575672846, "learning_rate": 6.694593626924455e-06, "loss": 0.0, "step": 6173 }, { "epoch": 0.39788618934072306, "grad_norm": 0.0034136686566082096, "learning_rate": 6.693877551020409e-06, "loss": 0.0, "step": 6174 }, { "epoch": 0.39795063478765225, "grad_norm": 0.02060817741945624, "learning_rate": 6.6931614751163635e-06, "loss": 0.0002, "step": 6175 }, { "epoch": 0.39801508023458143, "grad_norm": 0.0006820699695873835, "learning_rate": 6.692445399212317e-06, "loss": 0.0, "step": 6176 }, { "epoch": 0.3980795256815106, "grad_norm": 0.007219121822066729, "learning_rate": 6.691729323308271e-06, "loss": 0.0, "step": 6177 }, { "epoch": 0.3981439711284398, "grad_norm": 0.0012653471260716737, "learning_rate": 6.6910132474042256e-06, "loss": 0.0, "step": 6178 }, { "epoch": 0.39820841657536893, "grad_norm": 0.017223988011398236, "learning_rate": 6.69029717150018e-06, "loss": 0.0001, "step": 6179 }, { "epoch": 0.3982728620222981, "grad_norm": 0.6595386799147767, "learning_rate": 6.689581095596134e-06, "loss": 0.0037, "step": 6180 }, { "epoch": 0.3983373074692273, "grad_norm": 0.016393956086872264, "learning_rate": 6.688865019692088e-06, "loss": 0.0, "step": 6181 }, { "epoch": 0.3984017529161565, "grad_norm": 0.006429886962872136, "learning_rate": 6.688148943788042e-06, "loss": 0.0, "step": 6182 }, { "epoch": 0.39846619836308567, "grad_norm": 0.03475362813153784, "learning_rate": 6.687432867883996e-06, "loss": 0.0, "step": 6183 }, { "epoch": 0.3985306438100148, "grad_norm": 0.002141625646072484, "learning_rate": 6.6867167919799505e-06, "loss": 0.0, "step": 6184 }, { "epoch": 0.398595089256944, "grad_norm": 0.009711719164443825, "learning_rate": 6.686000716075905e-06, "loss": 0.0001, "step": 6185 }, { "epoch": 0.39865953470387316, "grad_norm": 0.007483122794986775, "learning_rate": 6.685284640171858e-06, "loss": 0.0001, "step": 6186 }, { "epoch": 0.39872398015080235, "grad_norm": 0.0013225585110462972, "learning_rate": 6.6845685642678126e-06, "loss": 0.0, "step": 6187 }, { "epoch": 0.39878842559773153, "grad_norm": 0.0007087233184781159, "learning_rate": 6.683852488363767e-06, "loss": 0.0, "step": 6188 }, { "epoch": 0.3988528710446607, "grad_norm": 0.005808769746201201, "learning_rate": 6.683136412459721e-06, "loss": 0.0, "step": 6189 }, { "epoch": 0.39891731649158985, "grad_norm": 0.03269624359247558, "learning_rate": 6.682420336555675e-06, "loss": 0.0001, "step": 6190 }, { "epoch": 0.39898176193851903, "grad_norm": 0.06306389772063731, "learning_rate": 6.681704260651629e-06, "loss": 0.0003, "step": 6191 }, { "epoch": 0.3990462073854482, "grad_norm": 0.0007749667094711631, "learning_rate": 6.680988184747583e-06, "loss": 0.0, "step": 6192 }, { "epoch": 0.3991106528323774, "grad_norm": 1.1743724405160312, "learning_rate": 6.680272108843538e-06, "loss": 0.0051, "step": 6193 }, { "epoch": 0.3991750982793066, "grad_norm": 0.0006233756383026622, "learning_rate": 6.679556032939493e-06, "loss": 0.0, "step": 6194 }, { "epoch": 0.39923954372623577, "grad_norm": 0.1845996974583024, "learning_rate": 6.678839957035447e-06, "loss": 0.0012, "step": 6195 }, { "epoch": 0.3993039891731649, "grad_norm": 0.04961964414596718, "learning_rate": 6.678123881131401e-06, "loss": 0.0001, "step": 6196 }, { "epoch": 0.3993684346200941, "grad_norm": 0.002305596368655089, "learning_rate": 6.677407805227355e-06, "loss": 0.0, "step": 6197 }, { "epoch": 0.39943288006702327, "grad_norm": 0.03989655149228882, "learning_rate": 6.676691729323309e-06, "loss": 0.0, "step": 6198 }, { "epoch": 0.39949732551395245, "grad_norm": 0.11639607532249155, "learning_rate": 6.675975653419263e-06, "loss": 0.0017, "step": 6199 }, { "epoch": 0.39956177096088163, "grad_norm": 0.08423818726228123, "learning_rate": 6.675259577515218e-06, "loss": 0.0017, "step": 6200 }, { "epoch": 0.39962621640781076, "grad_norm": 0.0009415812576501265, "learning_rate": 6.674543501611172e-06, "loss": 0.0, "step": 6201 }, { "epoch": 0.39969066185473995, "grad_norm": 0.03231412558563971, "learning_rate": 6.673827425707125e-06, "loss": 0.0, "step": 6202 }, { "epoch": 0.39975510730166913, "grad_norm": 0.022533085776875975, "learning_rate": 6.67311134980308e-06, "loss": 0.0001, "step": 6203 }, { "epoch": 0.3998195527485983, "grad_norm": 0.0004932371042244677, "learning_rate": 6.672395273899034e-06, "loss": 0.0, "step": 6204 }, { "epoch": 0.3998839981955275, "grad_norm": 0.0016411312170160164, "learning_rate": 6.671679197994988e-06, "loss": 0.0, "step": 6205 }, { "epoch": 0.3999484436424567, "grad_norm": 0.00045268622319586543, "learning_rate": 6.670963122090942e-06, "loss": 0.0, "step": 6206 }, { "epoch": 0.4000128890893858, "grad_norm": 0.06922491009062504, "learning_rate": 6.670247046186896e-06, "loss": 0.0001, "step": 6207 }, { "epoch": 0.400077334536315, "grad_norm": 0.012176174861812075, "learning_rate": 6.66953097028285e-06, "loss": 0.0, "step": 6208 }, { "epoch": 0.4001417799832442, "grad_norm": 0.02192805518688391, "learning_rate": 6.668814894378805e-06, "loss": 0.0, "step": 6209 }, { "epoch": 0.40020622543017337, "grad_norm": 0.01031527857257621, "learning_rate": 6.668098818474759e-06, "loss": 0.0, "step": 6210 }, { "epoch": 0.40027067087710255, "grad_norm": 0.2088355398948857, "learning_rate": 6.667382742570712e-06, "loss": 0.0004, "step": 6211 }, { "epoch": 0.4003351163240317, "grad_norm": 0.006589784635158972, "learning_rate": 6.666666666666667e-06, "loss": 0.0, "step": 6212 }, { "epoch": 0.40039956177096087, "grad_norm": 0.590162759370787, "learning_rate": 6.665950590762621e-06, "loss": 0.0016, "step": 6213 }, { "epoch": 0.40046400721789005, "grad_norm": 0.4982781116509973, "learning_rate": 6.665234514858575e-06, "loss": 0.0037, "step": 6214 }, { "epoch": 0.40052845266481923, "grad_norm": 0.12883591900735675, "learning_rate": 6.6645184389545295e-06, "loss": 0.0, "step": 6215 }, { "epoch": 0.4005928981117484, "grad_norm": 0.15875782090355953, "learning_rate": 6.663802363050485e-06, "loss": 0.003, "step": 6216 }, { "epoch": 0.4006573435586776, "grad_norm": 0.021264245585666988, "learning_rate": 6.663086287146439e-06, "loss": 0.0, "step": 6217 }, { "epoch": 0.40072178900560673, "grad_norm": 0.088981737988645, "learning_rate": 6.6623702112423924e-06, "loss": 0.0001, "step": 6218 }, { "epoch": 0.4007862344525359, "grad_norm": 0.002934769665413855, "learning_rate": 6.661654135338347e-06, "loss": 0.0, "step": 6219 }, { "epoch": 0.4008506798994651, "grad_norm": 0.15586118601461513, "learning_rate": 6.660938059434301e-06, "loss": 0.0006, "step": 6220 }, { "epoch": 0.4009151253463943, "grad_norm": 0.28255903730610116, "learning_rate": 6.660221983530255e-06, "loss": 0.0004, "step": 6221 }, { "epoch": 0.40097957079332347, "grad_norm": 0.028550108747655678, "learning_rate": 6.659505907626209e-06, "loss": 0.0001, "step": 6222 }, { "epoch": 0.4010440162402526, "grad_norm": 0.27616440845200096, "learning_rate": 6.658789831722163e-06, "loss": 0.002, "step": 6223 }, { "epoch": 0.4011084616871818, "grad_norm": 0.008006083560591043, "learning_rate": 6.658073755818117e-06, "loss": 0.0, "step": 6224 }, { "epoch": 0.40117290713411097, "grad_norm": 0.07311599032007556, "learning_rate": 6.657357679914072e-06, "loss": 0.0002, "step": 6225 }, { "epoch": 0.40123735258104015, "grad_norm": 0.0009471099356845897, "learning_rate": 6.656641604010026e-06, "loss": 0.0, "step": 6226 }, { "epoch": 0.40130179802796934, "grad_norm": 0.00239955307892596, "learning_rate": 6.6559255281059794e-06, "loss": 0.0, "step": 6227 }, { "epoch": 0.4013662434748985, "grad_norm": 0.03330112643076966, "learning_rate": 6.655209452201934e-06, "loss": 0.0, "step": 6228 }, { "epoch": 0.40143068892182765, "grad_norm": 0.003579085964833083, "learning_rate": 6.654493376297888e-06, "loss": 0.0, "step": 6229 }, { "epoch": 0.40149513436875683, "grad_norm": 0.0001288371734857651, "learning_rate": 6.653777300393842e-06, "loss": 0.0, "step": 6230 }, { "epoch": 0.401559579815686, "grad_norm": 0.0001809409710459574, "learning_rate": 6.653061224489797e-06, "loss": 0.0, "step": 6231 }, { "epoch": 0.4016240252626152, "grad_norm": 0.0018872319576630108, "learning_rate": 6.65234514858575e-06, "loss": 0.0, "step": 6232 }, { "epoch": 0.4016884707095444, "grad_norm": 0.00037414685857723075, "learning_rate": 6.651629072681704e-06, "loss": 0.0, "step": 6233 }, { "epoch": 0.40175291615647357, "grad_norm": 0.005488616829534191, "learning_rate": 6.650912996777659e-06, "loss": 0.0, "step": 6234 }, { "epoch": 0.4018173616034027, "grad_norm": 0.0043051522679139476, "learning_rate": 6.650196920873613e-06, "loss": 0.0, "step": 6235 }, { "epoch": 0.4018818070503319, "grad_norm": 0.18108599082511184, "learning_rate": 6.649480844969567e-06, "loss": 0.0005, "step": 6236 }, { "epoch": 0.40194625249726107, "grad_norm": 0.0002171191700035768, "learning_rate": 6.648764769065521e-06, "loss": 0.0, "step": 6237 }, { "epoch": 0.40201069794419025, "grad_norm": 0.0021956388300873758, "learning_rate": 6.648048693161475e-06, "loss": 0.0, "step": 6238 }, { "epoch": 0.40207514339111944, "grad_norm": 0.009351226362029775, "learning_rate": 6.647332617257429e-06, "loss": 0.0001, "step": 6239 }, { "epoch": 0.40213958883804857, "grad_norm": 0.00015364747358863305, "learning_rate": 6.6466165413533845e-06, "loss": 0.0, "step": 6240 }, { "epoch": 0.40220403428497775, "grad_norm": 0.0046499385674727165, "learning_rate": 6.645900465449339e-06, "loss": 0.0, "step": 6241 }, { "epoch": 0.40226847973190694, "grad_norm": 0.0010105762902176386, "learning_rate": 6.645184389545293e-06, "loss": 0.0, "step": 6242 }, { "epoch": 0.4023329251788361, "grad_norm": 0.039143908131220026, "learning_rate": 6.6444683136412465e-06, "loss": 0.0001, "step": 6243 }, { "epoch": 0.4023973706257653, "grad_norm": 0.0024909498493526964, "learning_rate": 6.643752237737201e-06, "loss": 0.0, "step": 6244 }, { "epoch": 0.4024618160726945, "grad_norm": 0.030333604358856733, "learning_rate": 6.643036161833155e-06, "loss": 0.0, "step": 6245 }, { "epoch": 0.4025262615196236, "grad_norm": 0.0003121790638401315, "learning_rate": 6.642320085929109e-06, "loss": 0.0, "step": 6246 }, { "epoch": 0.4025907069665528, "grad_norm": 0.05674777091108059, "learning_rate": 6.641604010025064e-06, "loss": 0.0, "step": 6247 }, { "epoch": 0.402655152413482, "grad_norm": 0.029250476772840036, "learning_rate": 6.640887934121017e-06, "loss": 0.0, "step": 6248 }, { "epoch": 0.40271959786041117, "grad_norm": 9.698981643813166e-05, "learning_rate": 6.6401718582169715e-06, "loss": 0.0, "step": 6249 }, { "epoch": 0.40278404330734036, "grad_norm": 0.27877300990720555, "learning_rate": 6.639455782312926e-06, "loss": 0.0026, "step": 6250 }, { "epoch": 0.4028484887542695, "grad_norm": 0.0027097435262068716, "learning_rate": 6.63873970640888e-06, "loss": 0.0, "step": 6251 }, { "epoch": 0.40291293420119867, "grad_norm": 0.05563554116756958, "learning_rate": 6.638023630504834e-06, "loss": 0.0001, "step": 6252 }, { "epoch": 0.40297737964812785, "grad_norm": 0.03642531035428272, "learning_rate": 6.637307554600788e-06, "loss": 0.0001, "step": 6253 }, { "epoch": 0.40304182509505704, "grad_norm": 0.029172964117816117, "learning_rate": 6.636591478696742e-06, "loss": 0.0001, "step": 6254 }, { "epoch": 0.4031062705419862, "grad_norm": 0.002905543593009424, "learning_rate": 6.635875402792696e-06, "loss": 0.0, "step": 6255 }, { "epoch": 0.4031707159889154, "grad_norm": 0.012390442078684512, "learning_rate": 6.635159326888651e-06, "loss": 0.0, "step": 6256 }, { "epoch": 0.40323516143584454, "grad_norm": 0.021038528188060623, "learning_rate": 6.634443250984605e-06, "loss": 0.0002, "step": 6257 }, { "epoch": 0.4032996068827737, "grad_norm": 0.3627454641874893, "learning_rate": 6.6337271750805585e-06, "loss": 0.0011, "step": 6258 }, { "epoch": 0.4033640523297029, "grad_norm": 0.004103812286494867, "learning_rate": 6.633011099176513e-06, "loss": 0.0, "step": 6259 }, { "epoch": 0.4034284977766321, "grad_norm": 0.46436891426010923, "learning_rate": 6.632295023272467e-06, "loss": 0.0023, "step": 6260 }, { "epoch": 0.4034929432235613, "grad_norm": 0.05719079838357991, "learning_rate": 6.631578947368421e-06, "loss": 0.0001, "step": 6261 }, { "epoch": 0.4035573886704904, "grad_norm": 0.001427135059155809, "learning_rate": 6.630862871464376e-06, "loss": 0.0, "step": 6262 }, { "epoch": 0.4036218341174196, "grad_norm": 0.00043134871254642114, "learning_rate": 6.630146795560331e-06, "loss": 0.0, "step": 6263 }, { "epoch": 0.40368627956434877, "grad_norm": 0.005838851823537722, "learning_rate": 6.629430719656284e-06, "loss": 0.0, "step": 6264 }, { "epoch": 0.40375072501127796, "grad_norm": 0.0031162694259560825, "learning_rate": 6.6287146437522385e-06, "loss": 0.0, "step": 6265 }, { "epoch": 0.40381517045820714, "grad_norm": 0.16612453815009942, "learning_rate": 6.627998567848193e-06, "loss": 0.0005, "step": 6266 }, { "epoch": 0.4038796159051363, "grad_norm": 8.110659193095382e-05, "learning_rate": 6.627282491944147e-06, "loss": 0.0, "step": 6267 }, { "epoch": 0.40394406135206545, "grad_norm": 0.09589340309400302, "learning_rate": 6.6265664160401014e-06, "loss": 0.0001, "step": 6268 }, { "epoch": 0.40400850679899464, "grad_norm": 0.0001381144958984862, "learning_rate": 6.625850340136055e-06, "loss": 0.0, "step": 6269 }, { "epoch": 0.4040729522459238, "grad_norm": 0.0010787557333930568, "learning_rate": 6.625134264232009e-06, "loss": 0.0, "step": 6270 }, { "epoch": 0.404137397692853, "grad_norm": 0.0003107536158255425, "learning_rate": 6.6244181883279635e-06, "loss": 0.0, "step": 6271 }, { "epoch": 0.4042018431397822, "grad_norm": 0.008608214525285639, "learning_rate": 6.623702112423918e-06, "loss": 0.0, "step": 6272 }, { "epoch": 0.4042662885867114, "grad_norm": 0.00040518102927398483, "learning_rate": 6.622986036519872e-06, "loss": 0.0, "step": 6273 }, { "epoch": 0.4043307340336405, "grad_norm": 0.36758800740593717, "learning_rate": 6.6222699606158255e-06, "loss": 0.0007, "step": 6274 }, { "epoch": 0.4043951794805697, "grad_norm": 0.012724112433642078, "learning_rate": 6.62155388471178e-06, "loss": 0.0001, "step": 6275 }, { "epoch": 0.4044596249274989, "grad_norm": 0.0034801321594823327, "learning_rate": 6.620837808807734e-06, "loss": 0.0015, "step": 6276 }, { "epoch": 0.40452407037442806, "grad_norm": 0.0022402441000457106, "learning_rate": 6.6201217329036884e-06, "loss": 0.0, "step": 6277 }, { "epoch": 0.40458851582135724, "grad_norm": 0.001198815179465232, "learning_rate": 6.619405656999643e-06, "loss": 0.0, "step": 6278 }, { "epoch": 0.40465296126828637, "grad_norm": 0.02394095008108672, "learning_rate": 6.618689581095596e-06, "loss": 0.0001, "step": 6279 }, { "epoch": 0.40471740671521556, "grad_norm": 0.008677542084958455, "learning_rate": 6.6179735051915505e-06, "loss": 0.0001, "step": 6280 }, { "epoch": 0.40478185216214474, "grad_norm": 0.006015602815613465, "learning_rate": 6.617257429287505e-06, "loss": 0.0, "step": 6281 }, { "epoch": 0.4048462976090739, "grad_norm": 0.00022256715957847932, "learning_rate": 6.616541353383459e-06, "loss": 0.0, "step": 6282 }, { "epoch": 0.4049107430560031, "grad_norm": 0.0021269805569372334, "learning_rate": 6.6158252774794125e-06, "loss": 0.0, "step": 6283 }, { "epoch": 0.4049751885029323, "grad_norm": 0.0019124917188764595, "learning_rate": 6.615109201575367e-06, "loss": 0.0, "step": 6284 }, { "epoch": 0.4050396339498614, "grad_norm": 0.0006371406991821277, "learning_rate": 6.614393125671321e-06, "loss": 0.0, "step": 6285 }, { "epoch": 0.4051040793967906, "grad_norm": 0.012221513144843947, "learning_rate": 6.613677049767276e-06, "loss": 0.0001, "step": 6286 }, { "epoch": 0.4051685248437198, "grad_norm": 0.003629192668777277, "learning_rate": 6.612960973863231e-06, "loss": 0.0, "step": 6287 }, { "epoch": 0.405232970290649, "grad_norm": 0.03566383629410653, "learning_rate": 6.612244897959185e-06, "loss": 0.0001, "step": 6288 }, { "epoch": 0.40529741573757816, "grad_norm": 0.011285903547055491, "learning_rate": 6.611528822055139e-06, "loss": 0.0, "step": 6289 }, { "epoch": 0.4053618611845073, "grad_norm": 0.0027852915169590963, "learning_rate": 6.610812746151093e-06, "loss": 0.0, "step": 6290 }, { "epoch": 0.4054263066314365, "grad_norm": 0.10941760050982087, "learning_rate": 6.610096670247047e-06, "loss": 0.0009, "step": 6291 }, { "epoch": 0.40549075207836566, "grad_norm": 0.002644033621041055, "learning_rate": 6.609380594343001e-06, "loss": 0.0, "step": 6292 }, { "epoch": 0.40555519752529484, "grad_norm": 0.00019279917264443554, "learning_rate": 6.6086645184389555e-06, "loss": 0.0, "step": 6293 }, { "epoch": 0.405619642972224, "grad_norm": 0.0005731582641601562, "learning_rate": 6.60794844253491e-06, "loss": 0.0, "step": 6294 }, { "epoch": 0.4056840884191532, "grad_norm": 0.006801553876835982, "learning_rate": 6.607232366630863e-06, "loss": 0.0, "step": 6295 }, { "epoch": 0.40574853386608234, "grad_norm": 0.003933302052609157, "learning_rate": 6.6065162907268176e-06, "loss": 0.0, "step": 6296 }, { "epoch": 0.4058129793130115, "grad_norm": 0.0006329129133961246, "learning_rate": 6.605800214822772e-06, "loss": 0.0, "step": 6297 }, { "epoch": 0.4058774247599407, "grad_norm": 0.0004284577227252947, "learning_rate": 6.605084138918726e-06, "loss": 0.0, "step": 6298 }, { "epoch": 0.4059418702068699, "grad_norm": 0.01899693347187632, "learning_rate": 6.60436806301468e-06, "loss": 0.0001, "step": 6299 }, { "epoch": 0.4060063156537991, "grad_norm": 0.008696248565383551, "learning_rate": 6.603651987110634e-06, "loss": 0.0001, "step": 6300 }, { "epoch": 0.4060707611007282, "grad_norm": 0.00046881937012844134, "learning_rate": 6.602935911206588e-06, "loss": 0.0, "step": 6301 }, { "epoch": 0.4061352065476574, "grad_norm": 0.0021209476661694926, "learning_rate": 6.6022198353025425e-06, "loss": 0.0, "step": 6302 }, { "epoch": 0.4061996519945866, "grad_norm": 0.0002067565192842928, "learning_rate": 6.601503759398497e-06, "loss": 0.0, "step": 6303 }, { "epoch": 0.40626409744151576, "grad_norm": 0.005868360340397581, "learning_rate": 6.60078768349445e-06, "loss": 0.0, "step": 6304 }, { "epoch": 0.40632854288844494, "grad_norm": 0.0006698871174751843, "learning_rate": 6.6000716075904046e-06, "loss": 0.0, "step": 6305 }, { "epoch": 0.40639298833537413, "grad_norm": 0.008249500911233668, "learning_rate": 6.599355531686359e-06, "loss": 0.0, "step": 6306 }, { "epoch": 0.40645743378230326, "grad_norm": 0.0003297776276702966, "learning_rate": 6.598639455782313e-06, "loss": 0.0, "step": 6307 }, { "epoch": 0.40652187922923244, "grad_norm": 0.763014566614935, "learning_rate": 6.5979233798782675e-06, "loss": 0.0032, "step": 6308 }, { "epoch": 0.4065863246761616, "grad_norm": 0.0030389339003442026, "learning_rate": 6.597207303974221e-06, "loss": 0.0, "step": 6309 }, { "epoch": 0.4066507701230908, "grad_norm": 0.002413791142186581, "learning_rate": 6.596491228070177e-06, "loss": 0.0, "step": 6310 }, { "epoch": 0.40671521557002, "grad_norm": 0.037547443349476, "learning_rate": 6.59577515216613e-06, "loss": 0.0001, "step": 6311 }, { "epoch": 0.4067796610169492, "grad_norm": 0.0012816616887669533, "learning_rate": 6.595059076262085e-06, "loss": 0.0, "step": 6312 }, { "epoch": 0.4068441064638783, "grad_norm": 0.4274243489749732, "learning_rate": 6.594343000358039e-06, "loss": 0.0025, "step": 6313 }, { "epoch": 0.4069085519108075, "grad_norm": 0.0009116007250358199, "learning_rate": 6.593626924453993e-06, "loss": 0.0, "step": 6314 }, { "epoch": 0.4069729973577367, "grad_norm": 0.4122105153435198, "learning_rate": 6.592910848549947e-06, "loss": 0.0007, "step": 6315 }, { "epoch": 0.40703744280466586, "grad_norm": 0.032060547766800215, "learning_rate": 6.592194772645901e-06, "loss": 0.0001, "step": 6316 }, { "epoch": 0.40710188825159505, "grad_norm": 0.0014138136504818558, "learning_rate": 6.591478696741855e-06, "loss": 0.0, "step": 6317 }, { "epoch": 0.4071663336985242, "grad_norm": 0.020313332069530073, "learning_rate": 6.59076262083781e-06, "loss": 0.0, "step": 6318 }, { "epoch": 0.40723077914545336, "grad_norm": 0.0225892251709381, "learning_rate": 6.590046544933764e-06, "loss": 0.0, "step": 6319 }, { "epoch": 0.40729522459238254, "grad_norm": 0.06057017977650917, "learning_rate": 6.589330469029717e-06, "loss": 0.0002, "step": 6320 }, { "epoch": 0.4073596700393117, "grad_norm": 0.005657030130166821, "learning_rate": 6.588614393125672e-06, "loss": 0.0, "step": 6321 }, { "epoch": 0.4074241154862409, "grad_norm": 0.27193709574173774, "learning_rate": 6.587898317221626e-06, "loss": 0.0003, "step": 6322 }, { "epoch": 0.4074885609331701, "grad_norm": 0.005401159223127921, "learning_rate": 6.58718224131758e-06, "loss": 0.0, "step": 6323 }, { "epoch": 0.4075530063800992, "grad_norm": 0.190735859372399, "learning_rate": 6.5864661654135345e-06, "loss": 0.0008, "step": 6324 }, { "epoch": 0.4076174518270284, "grad_norm": 0.02942071707522714, "learning_rate": 6.585750089509488e-06, "loss": 0.0, "step": 6325 }, { "epoch": 0.4076818972739576, "grad_norm": 0.04759147327995806, "learning_rate": 6.585034013605442e-06, "loss": 0.0002, "step": 6326 }, { "epoch": 0.4077463427208868, "grad_norm": 0.0783865454767804, "learning_rate": 6.584317937701397e-06, "loss": 0.0001, "step": 6327 }, { "epoch": 0.40781078816781596, "grad_norm": 0.009902174526617688, "learning_rate": 6.583601861797351e-06, "loss": 0.0015, "step": 6328 }, { "epoch": 0.4078752336147451, "grad_norm": 0.3001118059233158, "learning_rate": 6.582885785893305e-06, "loss": 0.0004, "step": 6329 }, { "epoch": 0.4079396790616743, "grad_norm": 0.023572379667072773, "learning_rate": 6.582169709989259e-06, "loss": 0.0001, "step": 6330 }, { "epoch": 0.40800412450860346, "grad_norm": 0.005017211603975058, "learning_rate": 6.581453634085213e-06, "loss": 0.0, "step": 6331 }, { "epoch": 0.40806856995553265, "grad_norm": 0.03636838824935784, "learning_rate": 6.580737558181167e-06, "loss": 0.0, "step": 6332 }, { "epoch": 0.40813301540246183, "grad_norm": 0.1252912317573341, "learning_rate": 6.580021482277122e-06, "loss": 0.0001, "step": 6333 }, { "epoch": 0.408197460849391, "grad_norm": 0.0002858440281521137, "learning_rate": 6.579305406373077e-06, "loss": 0.0, "step": 6334 }, { "epoch": 0.40826190629632014, "grad_norm": 0.008682301258015307, "learning_rate": 6.578589330469031e-06, "loss": 0.0001, "step": 6335 }, { "epoch": 0.4083263517432493, "grad_norm": 0.0008786334737352812, "learning_rate": 6.5778732545649844e-06, "loss": 0.0, "step": 6336 }, { "epoch": 0.4083907971901785, "grad_norm": 0.054658848205107594, "learning_rate": 6.577157178660939e-06, "loss": 0.0001, "step": 6337 }, { "epoch": 0.4084552426371077, "grad_norm": 0.009756242819836084, "learning_rate": 6.576441102756893e-06, "loss": 0.0, "step": 6338 }, { "epoch": 0.4085196880840369, "grad_norm": 0.24148468360646674, "learning_rate": 6.575725026852847e-06, "loss": 0.0006, "step": 6339 }, { "epoch": 0.40858413353096606, "grad_norm": 0.007297027798895586, "learning_rate": 6.575008950948802e-06, "loss": 0.0, "step": 6340 }, { "epoch": 0.4086485789778952, "grad_norm": 0.003341223520785786, "learning_rate": 6.574292875044755e-06, "loss": 0.0, "step": 6341 }, { "epoch": 0.4087130244248244, "grad_norm": 0.0011016815095849727, "learning_rate": 6.573576799140709e-06, "loss": 0.0, "step": 6342 }, { "epoch": 0.40877746987175356, "grad_norm": 0.027504932317853434, "learning_rate": 6.572860723236664e-06, "loss": 0.0001, "step": 6343 }, { "epoch": 0.40884191531868275, "grad_norm": 0.01037361559934303, "learning_rate": 6.572144647332618e-06, "loss": 0.0, "step": 6344 }, { "epoch": 0.40890636076561193, "grad_norm": 0.2539894847889759, "learning_rate": 6.571428571428572e-06, "loss": 0.0018, "step": 6345 }, { "epoch": 0.40897080621254106, "grad_norm": 0.01065426405305903, "learning_rate": 6.570712495524526e-06, "loss": 0.0001, "step": 6346 }, { "epoch": 0.40903525165947024, "grad_norm": 0.0003099955461148386, "learning_rate": 6.56999641962048e-06, "loss": 0.0, "step": 6347 }, { "epoch": 0.40909969710639943, "grad_norm": 0.0006434004654581663, "learning_rate": 6.569280343716434e-06, "loss": 0.0, "step": 6348 }, { "epoch": 0.4091641425533286, "grad_norm": 0.004250379747714681, "learning_rate": 6.568564267812389e-06, "loss": 0.0, "step": 6349 }, { "epoch": 0.4092285880002578, "grad_norm": 0.0016217436448476318, "learning_rate": 6.567848191908343e-06, "loss": 0.0, "step": 6350 }, { "epoch": 0.409293033447187, "grad_norm": 0.10920754880728271, "learning_rate": 6.567132116004296e-06, "loss": 0.0014, "step": 6351 }, { "epoch": 0.4093574788941161, "grad_norm": 0.0021528177987047534, "learning_rate": 6.566416040100251e-06, "loss": 0.0, "step": 6352 }, { "epoch": 0.4094219243410453, "grad_norm": 0.002116073833938389, "learning_rate": 6.565699964196205e-06, "loss": 0.0, "step": 6353 }, { "epoch": 0.4094863697879745, "grad_norm": 0.29838004832477744, "learning_rate": 6.564983888292159e-06, "loss": 0.0012, "step": 6354 }, { "epoch": 0.40955081523490366, "grad_norm": 0.001972656939178585, "learning_rate": 6.5642678123881136e-06, "loss": 0.0, "step": 6355 }, { "epoch": 0.40961526068183285, "grad_norm": 0.051000456672673784, "learning_rate": 6.563551736484067e-06, "loss": 0.0004, "step": 6356 }, { "epoch": 0.409679706128762, "grad_norm": 0.002971262747549498, "learning_rate": 6.562835660580022e-06, "loss": 0.0, "step": 6357 }, { "epoch": 0.40974415157569116, "grad_norm": 0.001269607342146986, "learning_rate": 6.5621195846759765e-06, "loss": 0.0, "step": 6358 }, { "epoch": 0.40980859702262035, "grad_norm": 0.0013160284007964068, "learning_rate": 6.561403508771931e-06, "loss": 0.0, "step": 6359 }, { "epoch": 0.40987304246954953, "grad_norm": 0.003960992563566679, "learning_rate": 6.560687432867885e-06, "loss": 0.0, "step": 6360 }, { "epoch": 0.4099374879164787, "grad_norm": 0.000711062058098579, "learning_rate": 6.559971356963839e-06, "loss": 0.0, "step": 6361 }, { "epoch": 0.4100019333634079, "grad_norm": 0.0006166932865461904, "learning_rate": 6.559255281059793e-06, "loss": 0.0, "step": 6362 }, { "epoch": 0.41006637881033703, "grad_norm": 0.0008622193241519165, "learning_rate": 6.558539205155747e-06, "loss": 0.0, "step": 6363 }, { "epoch": 0.4101308242572662, "grad_norm": 0.022749650694368222, "learning_rate": 6.557823129251701e-06, "loss": 0.0, "step": 6364 }, { "epoch": 0.4101952697041954, "grad_norm": 0.0001210514390543481, "learning_rate": 6.557107053347656e-06, "loss": 0.0, "step": 6365 }, { "epoch": 0.4102597151511246, "grad_norm": 0.30534664406291645, "learning_rate": 6.55639097744361e-06, "loss": 0.0007, "step": 6366 }, { "epoch": 0.41032416059805377, "grad_norm": 0.0009480997115783098, "learning_rate": 6.5556749015395635e-06, "loss": 0.0, "step": 6367 }, { "epoch": 0.4103886060449829, "grad_norm": 0.0030546703447422857, "learning_rate": 6.554958825635518e-06, "loss": 0.0, "step": 6368 }, { "epoch": 0.4104530514919121, "grad_norm": 3.549411554795448e-05, "learning_rate": 6.554242749731472e-06, "loss": 0.0, "step": 6369 }, { "epoch": 0.41051749693884126, "grad_norm": 0.06431834983958828, "learning_rate": 6.553526673827426e-06, "loss": 0.0006, "step": 6370 }, { "epoch": 0.41058194238577045, "grad_norm": 0.0015877173040344272, "learning_rate": 6.552810597923381e-06, "loss": 0.0, "step": 6371 }, { "epoch": 0.41064638783269963, "grad_norm": 0.3677186573657693, "learning_rate": 6.552094522019334e-06, "loss": 0.0003, "step": 6372 }, { "epoch": 0.4107108332796288, "grad_norm": 0.015533271144025876, "learning_rate": 6.551378446115288e-06, "loss": 0.0001, "step": 6373 }, { "epoch": 0.41077527872655795, "grad_norm": 0.0001311708652832042, "learning_rate": 6.550662370211243e-06, "loss": 0.0, "step": 6374 }, { "epoch": 0.41083972417348713, "grad_norm": 0.03579685409460785, "learning_rate": 6.549946294307197e-06, "loss": 0.0001, "step": 6375 }, { "epoch": 0.4109041696204163, "grad_norm": 0.010680600901401511, "learning_rate": 6.5492302184031505e-06, "loss": 0.0, "step": 6376 }, { "epoch": 0.4109686150673455, "grad_norm": 0.0014740273723926735, "learning_rate": 6.548514142499105e-06, "loss": 0.0, "step": 6377 }, { "epoch": 0.4110330605142747, "grad_norm": 0.17344039901681835, "learning_rate": 6.547798066595059e-06, "loss": 0.0016, "step": 6378 }, { "epoch": 0.41109750596120387, "grad_norm": 2.5766377467299022e-05, "learning_rate": 6.547081990691013e-06, "loss": 0.0, "step": 6379 }, { "epoch": 0.411161951408133, "grad_norm": 0.4452945638691878, "learning_rate": 6.5463659147869685e-06, "loss": 0.0052, "step": 6380 }, { "epoch": 0.4112263968550622, "grad_norm": 0.0008473765258398587, "learning_rate": 6.545649838882923e-06, "loss": 0.0, "step": 6381 }, { "epoch": 0.41129084230199137, "grad_norm": 0.000153973536380325, "learning_rate": 6.544933762978877e-06, "loss": 0.0, "step": 6382 }, { "epoch": 0.41135528774892055, "grad_norm": 0.0008808541464115172, "learning_rate": 6.5442176870748305e-06, "loss": 0.0, "step": 6383 }, { "epoch": 0.41141973319584974, "grad_norm": 0.04910274289866252, "learning_rate": 6.543501611170785e-06, "loss": 0.0001, "step": 6384 }, { "epoch": 0.41148417864277886, "grad_norm": 0.6920763852250857, "learning_rate": 6.542785535266739e-06, "loss": 0.0039, "step": 6385 }, { "epoch": 0.41154862408970805, "grad_norm": 0.10766323025445916, "learning_rate": 6.5420694593626934e-06, "loss": 0.0002, "step": 6386 }, { "epoch": 0.41161306953663723, "grad_norm": 0.0026613364721375463, "learning_rate": 6.541353383458648e-06, "loss": 0.0, "step": 6387 }, { "epoch": 0.4116775149835664, "grad_norm": 0.007791129047614624, "learning_rate": 6.540637307554601e-06, "loss": 0.0, "step": 6388 }, { "epoch": 0.4117419604304956, "grad_norm": 0.0023654626963615804, "learning_rate": 6.5399212316505555e-06, "loss": 0.0, "step": 6389 }, { "epoch": 0.4118064058774248, "grad_norm": 0.007302414206805107, "learning_rate": 6.53920515574651e-06, "loss": 0.0001, "step": 6390 }, { "epoch": 0.4118708513243539, "grad_norm": 0.045215327175843444, "learning_rate": 6.538489079842464e-06, "loss": 0.0, "step": 6391 }, { "epoch": 0.4119352967712831, "grad_norm": 0.0042823292236435235, "learning_rate": 6.5377730039384175e-06, "loss": 0.0, "step": 6392 }, { "epoch": 0.4119997422182123, "grad_norm": 0.009907462456016054, "learning_rate": 6.537056928034372e-06, "loss": 0.0, "step": 6393 }, { "epoch": 0.41206418766514147, "grad_norm": 1.2236248238341578, "learning_rate": 6.536340852130326e-06, "loss": 0.0013, "step": 6394 }, { "epoch": 0.41212863311207065, "grad_norm": 0.04217290112438516, "learning_rate": 6.5356247762262804e-06, "loss": 0.0002, "step": 6395 }, { "epoch": 0.4121930785589998, "grad_norm": 0.004078594013241326, "learning_rate": 6.534908700322235e-06, "loss": 0.0, "step": 6396 }, { "epoch": 0.41225752400592897, "grad_norm": 0.06957932522015041, "learning_rate": 6.534192624418188e-06, "loss": 0.0001, "step": 6397 }, { "epoch": 0.41232196945285815, "grad_norm": 0.3205191480641675, "learning_rate": 6.5334765485141425e-06, "loss": 0.0037, "step": 6398 }, { "epoch": 0.41238641489978733, "grad_norm": 0.010984131143033277, "learning_rate": 6.532760472610097e-06, "loss": 0.0001, "step": 6399 }, { "epoch": 0.4124508603467165, "grad_norm": 0.3163248357698386, "learning_rate": 6.532044396706051e-06, "loss": 0.0022, "step": 6400 }, { "epoch": 0.4125153057936457, "grad_norm": 0.0038705281878985156, "learning_rate": 6.531328320802005e-06, "loss": 0.0, "step": 6401 }, { "epoch": 0.41257975124057483, "grad_norm": 0.000454026447775615, "learning_rate": 6.530612244897959e-06, "loss": 0.0, "step": 6402 }, { "epoch": 0.412644196687504, "grad_norm": 0.018020890708024186, "learning_rate": 6.529896168993915e-06, "loss": 0.0001, "step": 6403 }, { "epoch": 0.4127086421344332, "grad_norm": 0.0011114438458940314, "learning_rate": 6.529180093089868e-06, "loss": 0.0, "step": 6404 }, { "epoch": 0.4127730875813624, "grad_norm": 1.4026249680627463, "learning_rate": 6.528464017185823e-06, "loss": 0.0068, "step": 6405 }, { "epoch": 0.41283753302829157, "grad_norm": 0.001006156064450549, "learning_rate": 6.527747941281777e-06, "loss": 0.0, "step": 6406 }, { "epoch": 0.4129019784752207, "grad_norm": 0.7100981903682718, "learning_rate": 6.527031865377731e-06, "loss": 0.0024, "step": 6407 }, { "epoch": 0.4129664239221499, "grad_norm": 0.00165587619774657, "learning_rate": 6.526315789473685e-06, "loss": 0.0, "step": 6408 }, { "epoch": 0.41303086936907907, "grad_norm": 0.27788876991628714, "learning_rate": 6.525599713569639e-06, "loss": 0.0012, "step": 6409 }, { "epoch": 0.41309531481600825, "grad_norm": 0.0005789179176985119, "learning_rate": 6.524883637665593e-06, "loss": 0.0, "step": 6410 }, { "epoch": 0.41315976026293744, "grad_norm": 0.005029905999029246, "learning_rate": 6.5241675617615475e-06, "loss": 0.0001, "step": 6411 }, { "epoch": 0.4132242057098666, "grad_norm": 0.0034671998073162475, "learning_rate": 6.523451485857502e-06, "loss": 0.0, "step": 6412 }, { "epoch": 0.41328865115679575, "grad_norm": 0.005740657103420485, "learning_rate": 6.522735409953455e-06, "loss": 0.0, "step": 6413 }, { "epoch": 0.41335309660372493, "grad_norm": 0.00789786293433695, "learning_rate": 6.5220193340494096e-06, "loss": 0.0, "step": 6414 }, { "epoch": 0.4134175420506541, "grad_norm": 0.04233837478909208, "learning_rate": 6.521303258145364e-06, "loss": 0.0, "step": 6415 }, { "epoch": 0.4134819874975833, "grad_norm": 0.031302123632725096, "learning_rate": 6.520587182241318e-06, "loss": 0.0001, "step": 6416 }, { "epoch": 0.4135464329445125, "grad_norm": 0.011742804966906406, "learning_rate": 6.5198711063372725e-06, "loss": 0.0, "step": 6417 }, { "epoch": 0.41361087839144167, "grad_norm": 0.000555710875784751, "learning_rate": 6.519155030433226e-06, "loss": 0.0, "step": 6418 }, { "epoch": 0.4136753238383708, "grad_norm": 0.1739312573212879, "learning_rate": 6.51843895452918e-06, "loss": 0.0005, "step": 6419 }, { "epoch": 0.4137397692853, "grad_norm": 0.004575221895012938, "learning_rate": 6.5177228786251345e-06, "loss": 0.0, "step": 6420 }, { "epoch": 0.41380421473222917, "grad_norm": 0.00011079464582073852, "learning_rate": 6.517006802721089e-06, "loss": 0.0, "step": 6421 }, { "epoch": 0.41386866017915835, "grad_norm": 0.005995697259705144, "learning_rate": 6.516290726817043e-06, "loss": 0.0, "step": 6422 }, { "epoch": 0.41393310562608754, "grad_norm": 0.0034095905482489468, "learning_rate": 6.5155746509129966e-06, "loss": 0.0, "step": 6423 }, { "epoch": 0.41399755107301667, "grad_norm": 0.00577780198281472, "learning_rate": 6.514858575008951e-06, "loss": 0.0, "step": 6424 }, { "epoch": 0.41406199651994585, "grad_norm": 0.46146711537749807, "learning_rate": 6.514142499104905e-06, "loss": 0.0012, "step": 6425 }, { "epoch": 0.41412644196687504, "grad_norm": 0.0009439974016115886, "learning_rate": 6.5134264232008595e-06, "loss": 0.0, "step": 6426 }, { "epoch": 0.4141908874138042, "grad_norm": 0.03680339894602934, "learning_rate": 6.512710347296815e-06, "loss": 0.0, "step": 6427 }, { "epoch": 0.4142553328607334, "grad_norm": 0.0009701746796509103, "learning_rate": 6.511994271392769e-06, "loss": 0.0, "step": 6428 }, { "epoch": 0.4143197783076626, "grad_norm": 0.0010483196330762464, "learning_rate": 6.511278195488722e-06, "loss": 0.0, "step": 6429 }, { "epoch": 0.4143842237545917, "grad_norm": 0.0006480988198016862, "learning_rate": 6.510562119584677e-06, "loss": 0.0, "step": 6430 }, { "epoch": 0.4144486692015209, "grad_norm": 0.07760918026875319, "learning_rate": 6.509846043680631e-06, "loss": 0.0002, "step": 6431 }, { "epoch": 0.4145131146484501, "grad_norm": 0.12619666898186474, "learning_rate": 6.509129967776585e-06, "loss": 0.0003, "step": 6432 }, { "epoch": 0.41457756009537927, "grad_norm": 0.06872515229381396, "learning_rate": 6.5084138918725396e-06, "loss": 0.0001, "step": 6433 }, { "epoch": 0.41464200554230846, "grad_norm": 0.005405009744319238, "learning_rate": 6.507697815968493e-06, "loss": 0.0, "step": 6434 }, { "epoch": 0.4147064509892376, "grad_norm": 0.0006958994388959971, "learning_rate": 6.506981740064447e-06, "loss": 0.0, "step": 6435 }, { "epoch": 0.41477089643616677, "grad_norm": 0.00012114286637400527, "learning_rate": 6.506265664160402e-06, "loss": 0.0, "step": 6436 }, { "epoch": 0.41483534188309595, "grad_norm": 0.016547558757412303, "learning_rate": 6.505549588256356e-06, "loss": 0.0, "step": 6437 }, { "epoch": 0.41489978733002514, "grad_norm": 0.013458047052151412, "learning_rate": 6.50483351235231e-06, "loss": 0.0, "step": 6438 }, { "epoch": 0.4149642327769543, "grad_norm": 0.0021026141925494257, "learning_rate": 6.504117436448264e-06, "loss": 0.0, "step": 6439 }, { "epoch": 0.4150286782238835, "grad_norm": 0.00038938549783562923, "learning_rate": 6.503401360544218e-06, "loss": 0.0, "step": 6440 }, { "epoch": 0.41509312367081264, "grad_norm": 0.0020553606478648863, "learning_rate": 6.502685284640172e-06, "loss": 0.0, "step": 6441 }, { "epoch": 0.4151575691177418, "grad_norm": 0.0011236673450610109, "learning_rate": 6.5019692087361265e-06, "loss": 0.0, "step": 6442 }, { "epoch": 0.415222014564671, "grad_norm": 0.0008312099867464999, "learning_rate": 6.501253132832081e-06, "loss": 0.0, "step": 6443 }, { "epoch": 0.4152864600116002, "grad_norm": 0.016113894443693048, "learning_rate": 6.500537056928034e-06, "loss": 0.0, "step": 6444 }, { "epoch": 0.4153509054585294, "grad_norm": 0.001129702907296028, "learning_rate": 6.499820981023989e-06, "loss": 0.0, "step": 6445 }, { "epoch": 0.4154153509054585, "grad_norm": 0.0036121507193785907, "learning_rate": 6.499104905119943e-06, "loss": 0.0, "step": 6446 }, { "epoch": 0.4154797963523877, "grad_norm": 0.4637558492255657, "learning_rate": 6.498388829215897e-06, "loss": 0.0018, "step": 6447 }, { "epoch": 0.41554424179931687, "grad_norm": 0.002091072027737738, "learning_rate": 6.4976727533118515e-06, "loss": 0.0, "step": 6448 }, { "epoch": 0.41560868724624606, "grad_norm": 0.005425796910282488, "learning_rate": 6.496956677407805e-06, "loss": 0.0, "step": 6449 }, { "epoch": 0.41567313269317524, "grad_norm": 0.0006827120071557798, "learning_rate": 6.49624060150376e-06, "loss": 0.0, "step": 6450 }, { "epoch": 0.4157375781401044, "grad_norm": 0.0009370663355223362, "learning_rate": 6.495524525599714e-06, "loss": 0.0, "step": 6451 }, { "epoch": 0.41580202358703355, "grad_norm": 0.005268021478741142, "learning_rate": 6.494808449695669e-06, "loss": 0.0, "step": 6452 }, { "epoch": 0.41586646903396274, "grad_norm": 0.0018895030845587368, "learning_rate": 6.494092373791623e-06, "loss": 0.0, "step": 6453 }, { "epoch": 0.4159309144808919, "grad_norm": 0.005175601673834675, "learning_rate": 6.493376297887577e-06, "loss": 0.0, "step": 6454 }, { "epoch": 0.4159953599278211, "grad_norm": 0.4407284520497393, "learning_rate": 6.492660221983531e-06, "loss": 0.0032, "step": 6455 }, { "epoch": 0.4160598053747503, "grad_norm": 0.0007340834875259358, "learning_rate": 6.491944146079485e-06, "loss": 0.0, "step": 6456 }, { "epoch": 0.4161242508216795, "grad_norm": 0.010261719996274735, "learning_rate": 6.491228070175439e-06, "loss": 0.0, "step": 6457 }, { "epoch": 0.4161886962686086, "grad_norm": 0.0008106852255236457, "learning_rate": 6.490511994271394e-06, "loss": 0.0, "step": 6458 }, { "epoch": 0.4162531417155378, "grad_norm": 0.010090045530888448, "learning_rate": 6.489795918367348e-06, "loss": 0.0, "step": 6459 }, { "epoch": 0.416317587162467, "grad_norm": 0.008465445410221185, "learning_rate": 6.489079842463301e-06, "loss": 0.0, "step": 6460 }, { "epoch": 0.41638203260939616, "grad_norm": 0.02899654347956042, "learning_rate": 6.488363766559256e-06, "loss": 0.0001, "step": 6461 }, { "epoch": 0.41644647805632534, "grad_norm": 0.001056778135030049, "learning_rate": 6.48764769065521e-06, "loss": 0.0, "step": 6462 }, { "epoch": 0.41651092350325447, "grad_norm": 0.3688163746079339, "learning_rate": 6.486931614751164e-06, "loss": 0.0006, "step": 6463 }, { "epoch": 0.41657536895018366, "grad_norm": 0.005144527660957336, "learning_rate": 6.486215538847119e-06, "loss": 0.0, "step": 6464 }, { "epoch": 0.41663981439711284, "grad_norm": 0.017784187385494993, "learning_rate": 6.485499462943072e-06, "loss": 0.0002, "step": 6465 }, { "epoch": 0.416704259844042, "grad_norm": 0.06417901822958386, "learning_rate": 6.484783387039026e-06, "loss": 0.0001, "step": 6466 }, { "epoch": 0.4167687052909712, "grad_norm": 0.0006121339328757751, "learning_rate": 6.484067311134981e-06, "loss": 0.0, "step": 6467 }, { "epoch": 0.4168331507379004, "grad_norm": 0.18229139305275863, "learning_rate": 6.483351235230935e-06, "loss": 0.0004, "step": 6468 }, { "epoch": 0.4168975961848295, "grad_norm": 0.041040044281895095, "learning_rate": 6.482635159326889e-06, "loss": 0.0001, "step": 6469 }, { "epoch": 0.4169620416317587, "grad_norm": 0.021999025416732904, "learning_rate": 6.481919083422843e-06, "loss": 0.0001, "step": 6470 }, { "epoch": 0.4170264870786879, "grad_norm": 0.002544240789084966, "learning_rate": 6.481203007518797e-06, "loss": 0.0, "step": 6471 }, { "epoch": 0.4170909325256171, "grad_norm": 0.0026852858379955535, "learning_rate": 6.480486931614751e-06, "loss": 0.0, "step": 6472 }, { "epoch": 0.41715537797254626, "grad_norm": 0.43469944439074326, "learning_rate": 6.479770855710706e-06, "loss": 0.0012, "step": 6473 }, { "epoch": 0.4172198234194754, "grad_norm": 0.347051651967674, "learning_rate": 6.479054779806661e-06, "loss": 0.0006, "step": 6474 }, { "epoch": 0.4172842688664046, "grad_norm": 0.25698550840535966, "learning_rate": 6.478338703902615e-06, "loss": 0.0005, "step": 6475 }, { "epoch": 0.41734871431333376, "grad_norm": 0.2126062926753528, "learning_rate": 6.4776226279985685e-06, "loss": 0.0019, "step": 6476 }, { "epoch": 0.41741315976026294, "grad_norm": 0.28151246910276284, "learning_rate": 6.476906552094523e-06, "loss": 0.0012, "step": 6477 }, { "epoch": 0.4174776052071921, "grad_norm": 0.0017983784302793271, "learning_rate": 6.476190476190477e-06, "loss": 0.0, "step": 6478 }, { "epoch": 0.4175420506541213, "grad_norm": 0.029167512556955478, "learning_rate": 6.475474400286431e-06, "loss": 0.0002, "step": 6479 }, { "epoch": 0.41760649610105044, "grad_norm": 6.0273887407271015e-05, "learning_rate": 6.474758324382386e-06, "loss": 0.0, "step": 6480 }, { "epoch": 0.4176709415479796, "grad_norm": 0.003980392115880526, "learning_rate": 6.474042248478339e-06, "loss": 0.0, "step": 6481 }, { "epoch": 0.4177353869949088, "grad_norm": 0.0011862001687265543, "learning_rate": 6.473326172574293e-06, "loss": 0.0, "step": 6482 }, { "epoch": 0.417799832441838, "grad_norm": 0.28784543359479664, "learning_rate": 6.472610096670248e-06, "loss": 0.0004, "step": 6483 }, { "epoch": 0.4178642778887672, "grad_norm": 0.002859249041893461, "learning_rate": 6.471894020766202e-06, "loss": 0.0, "step": 6484 }, { "epoch": 0.4179287233356963, "grad_norm": 0.0022861943502021026, "learning_rate": 6.471177944862156e-06, "loss": 0.0, "step": 6485 }, { "epoch": 0.4179931687826255, "grad_norm": 0.0020342299031710404, "learning_rate": 6.47046186895811e-06, "loss": 0.0, "step": 6486 }, { "epoch": 0.4180576142295547, "grad_norm": 8.07128737944767e-05, "learning_rate": 6.469745793054064e-06, "loss": 0.0, "step": 6487 }, { "epoch": 0.41812205967648386, "grad_norm": 0.1435686221463562, "learning_rate": 6.469029717150018e-06, "loss": 0.0017, "step": 6488 }, { "epoch": 0.41818650512341304, "grad_norm": 9.83520528507581e-05, "learning_rate": 6.468313641245973e-06, "loss": 0.0, "step": 6489 }, { "epoch": 0.41825095057034223, "grad_norm": 5.192385256500709e-05, "learning_rate": 6.467597565341926e-06, "loss": 0.0, "step": 6490 }, { "epoch": 0.41831539601727136, "grad_norm": 0.005567069063858515, "learning_rate": 6.46688148943788e-06, "loss": 0.0, "step": 6491 }, { "epoch": 0.41837984146420054, "grad_norm": 0.00021615406446108586, "learning_rate": 6.466165413533835e-06, "loss": 0.0, "step": 6492 }, { "epoch": 0.4184442869111297, "grad_norm": 0.0021729118234671005, "learning_rate": 6.465449337629789e-06, "loss": 0.0, "step": 6493 }, { "epoch": 0.4185087323580589, "grad_norm": 0.006202733531252822, "learning_rate": 6.464733261725743e-06, "loss": 0.0, "step": 6494 }, { "epoch": 0.4185731778049881, "grad_norm": 0.0008673489649001512, "learning_rate": 6.464017185821697e-06, "loss": 0.0, "step": 6495 }, { "epoch": 0.4186376232519173, "grad_norm": 0.01283363549355941, "learning_rate": 6.463301109917651e-06, "loss": 0.0, "step": 6496 }, { "epoch": 0.4187020686988464, "grad_norm": 0.0971751521920101, "learning_rate": 6.462585034013606e-06, "loss": 0.0001, "step": 6497 }, { "epoch": 0.4187665141457756, "grad_norm": 0.037483021995562936, "learning_rate": 6.4618689581095605e-06, "loss": 0.0002, "step": 6498 }, { "epoch": 0.4188309595927048, "grad_norm": 0.002619854872960852, "learning_rate": 6.461152882205515e-06, "loss": 0.0, "step": 6499 }, { "epoch": 0.41889540503963396, "grad_norm": 0.4253153570613154, "learning_rate": 6.460436806301469e-06, "loss": 0.0033, "step": 6500 }, { "epoch": 0.41895985048656315, "grad_norm": 0.30405405417039943, "learning_rate": 6.459720730397423e-06, "loss": 0.0014, "step": 6501 }, { "epoch": 0.4190242959334923, "grad_norm": 0.061062530101131295, "learning_rate": 6.459004654493377e-06, "loss": 0.0, "step": 6502 }, { "epoch": 0.41908874138042146, "grad_norm": 0.0022728235938479116, "learning_rate": 6.458288578589331e-06, "loss": 0.0, "step": 6503 }, { "epoch": 0.41915318682735064, "grad_norm": 0.018741473727967656, "learning_rate": 6.4575725026852854e-06, "loss": 0.0015, "step": 6504 }, { "epoch": 0.41921763227427983, "grad_norm": 0.00014293611391536147, "learning_rate": 6.45685642678124e-06, "loss": 0.0, "step": 6505 }, { "epoch": 0.419282077721209, "grad_norm": 0.1856076392009028, "learning_rate": 6.456140350877193e-06, "loss": 0.0007, "step": 6506 }, { "epoch": 0.4193465231681382, "grad_norm": 0.0038470830664775165, "learning_rate": 6.4554242749731475e-06, "loss": 0.0, "step": 6507 }, { "epoch": 0.4194109686150673, "grad_norm": 0.0002562884533311774, "learning_rate": 6.454708199069102e-06, "loss": 0.0, "step": 6508 }, { "epoch": 0.4194754140619965, "grad_norm": 0.1724127247382639, "learning_rate": 6.453992123165056e-06, "loss": 0.0002, "step": 6509 }, { "epoch": 0.4195398595089257, "grad_norm": 0.00018884989328603747, "learning_rate": 6.45327604726101e-06, "loss": 0.0, "step": 6510 }, { "epoch": 0.4196043049558549, "grad_norm": 0.005897050359999245, "learning_rate": 6.452559971356964e-06, "loss": 0.0, "step": 6511 }, { "epoch": 0.41966875040278406, "grad_norm": 0.08656624785462537, "learning_rate": 6.451843895452918e-06, "loss": 0.001, "step": 6512 }, { "epoch": 0.4197331958497132, "grad_norm": 0.0008088807705909768, "learning_rate": 6.4511278195488724e-06, "loss": 0.0, "step": 6513 }, { "epoch": 0.4197976412966424, "grad_norm": 0.04547051038808133, "learning_rate": 6.450411743644827e-06, "loss": 0.0, "step": 6514 }, { "epoch": 0.41986208674357156, "grad_norm": 6.935269685480056e-05, "learning_rate": 6.449695667740781e-06, "loss": 0.0, "step": 6515 }, { "epoch": 0.41992653219050075, "grad_norm": 0.013221494184816038, "learning_rate": 6.4489795918367345e-06, "loss": 0.0, "step": 6516 }, { "epoch": 0.41999097763742993, "grad_norm": 0.004337338260439512, "learning_rate": 6.448263515932689e-06, "loss": 0.0, "step": 6517 }, { "epoch": 0.4200554230843591, "grad_norm": 0.0007477333484548366, "learning_rate": 6.447547440028643e-06, "loss": 0.0, "step": 6518 }, { "epoch": 0.42011986853128824, "grad_norm": 0.22266532645213608, "learning_rate": 6.446831364124597e-06, "loss": 0.0007, "step": 6519 }, { "epoch": 0.4201843139782174, "grad_norm": 0.001061479785357458, "learning_rate": 6.4461152882205525e-06, "loss": 0.0, "step": 6520 }, { "epoch": 0.4202487594251466, "grad_norm": 0.01430381819889332, "learning_rate": 6.445399212316507e-06, "loss": 0.0001, "step": 6521 }, { "epoch": 0.4203132048720758, "grad_norm": 0.2277960656102833, "learning_rate": 6.44468313641246e-06, "loss": 0.0082, "step": 6522 }, { "epoch": 0.420377650319005, "grad_norm": 0.009277152777889666, "learning_rate": 6.443967060508415e-06, "loss": 0.0001, "step": 6523 }, { "epoch": 0.4204420957659341, "grad_norm": 0.014832602078491002, "learning_rate": 6.443250984604369e-06, "loss": 0.0, "step": 6524 }, { "epoch": 0.4205065412128633, "grad_norm": 0.000828742706597286, "learning_rate": 6.442534908700323e-06, "loss": 0.0, "step": 6525 }, { "epoch": 0.4205709866597925, "grad_norm": 0.00023983841067913176, "learning_rate": 6.4418188327962775e-06, "loss": 0.0, "step": 6526 }, { "epoch": 0.42063543210672166, "grad_norm": 0.0060839587052440545, "learning_rate": 6.441102756892231e-06, "loss": 0.0001, "step": 6527 }, { "epoch": 0.42069987755365085, "grad_norm": 0.12049660035438453, "learning_rate": 6.440386680988185e-06, "loss": 0.0004, "step": 6528 }, { "epoch": 0.42076432300058003, "grad_norm": 0.0002864290770283571, "learning_rate": 6.4396706050841395e-06, "loss": 0.0, "step": 6529 }, { "epoch": 0.42082876844750916, "grad_norm": 0.008323643209694061, "learning_rate": 6.438954529180094e-06, "loss": 0.0, "step": 6530 }, { "epoch": 0.42089321389443834, "grad_norm": 0.0008086087638468122, "learning_rate": 6.438238453276048e-06, "loss": 0.0, "step": 6531 }, { "epoch": 0.42095765934136753, "grad_norm": 0.005185411844307165, "learning_rate": 6.4375223773720016e-06, "loss": 0.0, "step": 6532 }, { "epoch": 0.4210221047882967, "grad_norm": 0.0006719877031553268, "learning_rate": 6.436806301467956e-06, "loss": 0.0, "step": 6533 }, { "epoch": 0.4210865502352259, "grad_norm": 2.6934940144879813e-05, "learning_rate": 6.43609022556391e-06, "loss": 0.0, "step": 6534 }, { "epoch": 0.4211509956821551, "grad_norm": 0.00015571736547918388, "learning_rate": 6.4353741496598645e-06, "loss": 0.0, "step": 6535 }, { "epoch": 0.4212154411290842, "grad_norm": 0.00315484098134091, "learning_rate": 6.434658073755819e-06, "loss": 0.0, "step": 6536 }, { "epoch": 0.4212798865760134, "grad_norm": 0.005351614273942891, "learning_rate": 6.433941997851772e-06, "loss": 0.0001, "step": 6537 }, { "epoch": 0.4213443320229426, "grad_norm": 0.010094460236600215, "learning_rate": 6.4332259219477265e-06, "loss": 0.0, "step": 6538 }, { "epoch": 0.42140877746987176, "grad_norm": 0.010835629956553901, "learning_rate": 6.432509846043681e-06, "loss": 0.0002, "step": 6539 }, { "epoch": 0.42147322291680095, "grad_norm": 0.00319520008482836, "learning_rate": 6.431793770139635e-06, "loss": 0.0, "step": 6540 }, { "epoch": 0.4215376683637301, "grad_norm": 0.010853856846134343, "learning_rate": 6.431077694235589e-06, "loss": 0.0, "step": 6541 }, { "epoch": 0.42160211381065926, "grad_norm": 0.0114451280760472, "learning_rate": 6.430361618331543e-06, "loss": 0.0, "step": 6542 }, { "epoch": 0.42166655925758845, "grad_norm": 0.02084433664255218, "learning_rate": 6.429645542427498e-06, "loss": 0.0001, "step": 6543 }, { "epoch": 0.42173100470451763, "grad_norm": 0.0003189129349114517, "learning_rate": 6.428929466523452e-06, "loss": 0.0, "step": 6544 }, { "epoch": 0.4217954501514468, "grad_norm": 0.0035933766135850926, "learning_rate": 6.428213390619407e-06, "loss": 0.0, "step": 6545 }, { "epoch": 0.421859895598376, "grad_norm": 0.024217145235977393, "learning_rate": 6.427497314715361e-06, "loss": 0.0001, "step": 6546 }, { "epoch": 0.42192434104530513, "grad_norm": 0.007078477096816555, "learning_rate": 6.426781238811315e-06, "loss": 0.0, "step": 6547 }, { "epoch": 0.4219887864922343, "grad_norm": 0.16918780173284304, "learning_rate": 6.426065162907269e-06, "loss": 0.0017, "step": 6548 }, { "epoch": 0.4220532319391635, "grad_norm": 0.00042276683359260573, "learning_rate": 6.425349087003223e-06, "loss": 0.0, "step": 6549 }, { "epoch": 0.4221176773860927, "grad_norm": 0.005739171150647594, "learning_rate": 6.424633011099177e-06, "loss": 0.0, "step": 6550 }, { "epoch": 0.42218212283302187, "grad_norm": 0.00012378405994403383, "learning_rate": 6.4239169351951316e-06, "loss": 0.0, "step": 6551 }, { "epoch": 0.422246568279951, "grad_norm": 0.00235690386382886, "learning_rate": 6.423200859291086e-06, "loss": 0.0, "step": 6552 }, { "epoch": 0.4223110137268802, "grad_norm": 7.031785182137648e-05, "learning_rate": 6.422484783387039e-06, "loss": 0.0, "step": 6553 }, { "epoch": 0.42237545917380936, "grad_norm": 0.0026047197359357352, "learning_rate": 6.421768707482994e-06, "loss": 0.0, "step": 6554 }, { "epoch": 0.42243990462073855, "grad_norm": 0.12465103807190193, "learning_rate": 6.421052631578948e-06, "loss": 0.0001, "step": 6555 }, { "epoch": 0.42250435006766773, "grad_norm": 0.0011322028151738962, "learning_rate": 6.420336555674902e-06, "loss": 0.0, "step": 6556 }, { "epoch": 0.4225687955145969, "grad_norm": 0.005403863207130577, "learning_rate": 6.4196204797708565e-06, "loss": 0.0001, "step": 6557 }, { "epoch": 0.42263324096152605, "grad_norm": 0.09145143117725939, "learning_rate": 6.41890440386681e-06, "loss": 0.0008, "step": 6558 }, { "epoch": 0.42269768640845523, "grad_norm": 0.0007512461604921746, "learning_rate": 6.418188327962764e-06, "loss": 0.0, "step": 6559 }, { "epoch": 0.4227621318553844, "grad_norm": 0.001425860074836207, "learning_rate": 6.4174722520587185e-06, "loss": 0.0, "step": 6560 }, { "epoch": 0.4228265773023136, "grad_norm": 1.1363235783018837, "learning_rate": 6.416756176154673e-06, "loss": 0.0049, "step": 6561 }, { "epoch": 0.4228910227492428, "grad_norm": 0.001521368662538063, "learning_rate": 6.416040100250627e-06, "loss": 0.0, "step": 6562 }, { "epoch": 0.4229554681961719, "grad_norm": 0.0006320624923546955, "learning_rate": 6.415324024346581e-06, "loss": 0.0, "step": 6563 }, { "epoch": 0.4230199136431011, "grad_norm": 0.0170555938090754, "learning_rate": 6.414607948442535e-06, "loss": 0.0, "step": 6564 }, { "epoch": 0.4230843590900303, "grad_norm": 0.007227821830375058, "learning_rate": 6.413891872538489e-06, "loss": 0.0, "step": 6565 }, { "epoch": 0.42314880453695947, "grad_norm": 0.3460220977460297, "learning_rate": 6.4131757966344435e-06, "loss": 0.0011, "step": 6566 }, { "epoch": 0.42321324998388865, "grad_norm": 0.0028084542812121535, "learning_rate": 6.412459720730399e-06, "loss": 0.0, "step": 6567 }, { "epoch": 0.42327769543081784, "grad_norm": 0.049689493945287815, "learning_rate": 6.411743644826353e-06, "loss": 0.0002, "step": 6568 }, { "epoch": 0.42334214087774696, "grad_norm": 0.018442280869811974, "learning_rate": 6.411027568922306e-06, "loss": 0.0, "step": 6569 }, { "epoch": 0.42340658632467615, "grad_norm": 0.016176521402883174, "learning_rate": 6.410311493018261e-06, "loss": 0.0001, "step": 6570 }, { "epoch": 0.42347103177160533, "grad_norm": 0.0045036594846243954, "learning_rate": 6.409595417114215e-06, "loss": 0.0001, "step": 6571 }, { "epoch": 0.4235354772185345, "grad_norm": 0.000763490671469306, "learning_rate": 6.408879341210169e-06, "loss": 0.0, "step": 6572 }, { "epoch": 0.4235999226654637, "grad_norm": 0.01358307692632028, "learning_rate": 6.408163265306124e-06, "loss": 0.0, "step": 6573 }, { "epoch": 0.4236643681123929, "grad_norm": 0.07061323450311378, "learning_rate": 6.407447189402077e-06, "loss": 0.0001, "step": 6574 }, { "epoch": 0.423728813559322, "grad_norm": 0.0009675888097842737, "learning_rate": 6.406731113498031e-06, "loss": 0.0, "step": 6575 }, { "epoch": 0.4237932590062512, "grad_norm": 0.05086051961970249, "learning_rate": 6.406015037593986e-06, "loss": 0.0002, "step": 6576 }, { "epoch": 0.4238577044531804, "grad_norm": 0.016505252166751583, "learning_rate": 6.40529896168994e-06, "loss": 0.0, "step": 6577 }, { "epoch": 0.42392214990010957, "grad_norm": 0.0004049548514622685, "learning_rate": 6.404582885785894e-06, "loss": 0.0015, "step": 6578 }, { "epoch": 0.42398659534703875, "grad_norm": 0.623974793736584, "learning_rate": 6.403866809881848e-06, "loss": 0.0023, "step": 6579 }, { "epoch": 0.4240510407939679, "grad_norm": 4.7767857672534735, "learning_rate": 6.403150733977802e-06, "loss": 0.0369, "step": 6580 }, { "epoch": 0.42411548624089707, "grad_norm": 0.0056384048468675904, "learning_rate": 6.402434658073756e-06, "loss": 0.0, "step": 6581 }, { "epoch": 0.42417993168782625, "grad_norm": 0.041177984595075684, "learning_rate": 6.401718582169711e-06, "loss": 0.0, "step": 6582 }, { "epoch": 0.42424437713475543, "grad_norm": 0.0028160901279018036, "learning_rate": 6.401002506265664e-06, "loss": 0.0, "step": 6583 }, { "epoch": 0.4243088225816846, "grad_norm": 0.03627116547304098, "learning_rate": 6.400286430361618e-06, "loss": 0.0003, "step": 6584 }, { "epoch": 0.4243732680286138, "grad_norm": 0.12108684335516592, "learning_rate": 6.399570354457573e-06, "loss": 0.0001, "step": 6585 }, { "epoch": 0.42443771347554293, "grad_norm": 0.05032057374908024, "learning_rate": 6.398854278553527e-06, "loss": 0.0004, "step": 6586 }, { "epoch": 0.4245021589224721, "grad_norm": 0.23586408913024615, "learning_rate": 6.398138202649481e-06, "loss": 0.0008, "step": 6587 }, { "epoch": 0.4245666043694013, "grad_norm": 0.042724430901290876, "learning_rate": 6.397422126745435e-06, "loss": 0.0, "step": 6588 }, { "epoch": 0.4246310498163305, "grad_norm": 0.007525076075820635, "learning_rate": 6.396706050841389e-06, "loss": 0.0, "step": 6589 }, { "epoch": 0.42469549526325967, "grad_norm": 0.03318998658871642, "learning_rate": 6.395989974937344e-06, "loss": 0.0, "step": 6590 }, { "epoch": 0.4247599407101888, "grad_norm": 0.032235455008593907, "learning_rate": 6.395273899033298e-06, "loss": 0.0002, "step": 6591 }, { "epoch": 0.424824386157118, "grad_norm": 0.0873344546653782, "learning_rate": 6.394557823129253e-06, "loss": 0.0016, "step": 6592 }, { "epoch": 0.42488883160404717, "grad_norm": 0.012324949783537603, "learning_rate": 6.393841747225207e-06, "loss": 0.0, "step": 6593 }, { "epoch": 0.42495327705097635, "grad_norm": 0.029404377397181828, "learning_rate": 6.393125671321161e-06, "loss": 0.0003, "step": 6594 }, { "epoch": 0.42501772249790554, "grad_norm": 0.004150097631532944, "learning_rate": 6.392409595417115e-06, "loss": 0.0, "step": 6595 }, { "epoch": 0.4250821679448347, "grad_norm": 0.426125046406366, "learning_rate": 6.391693519513069e-06, "loss": 0.0014, "step": 6596 }, { "epoch": 0.42514661339176385, "grad_norm": 0.007814042117983378, "learning_rate": 6.390977443609023e-06, "loss": 0.0001, "step": 6597 }, { "epoch": 0.42521105883869303, "grad_norm": 0.016080993757632295, "learning_rate": 6.390261367704978e-06, "loss": 0.0, "step": 6598 }, { "epoch": 0.4252755042856222, "grad_norm": 0.001690795443047797, "learning_rate": 6.389545291800931e-06, "loss": 0.0, "step": 6599 }, { "epoch": 0.4253399497325514, "grad_norm": 0.16194163111905666, "learning_rate": 6.388829215896885e-06, "loss": 0.0003, "step": 6600 }, { "epoch": 0.4254043951794806, "grad_norm": 0.011428128871439189, "learning_rate": 6.38811313999284e-06, "loss": 0.0, "step": 6601 }, { "epoch": 0.4254688406264097, "grad_norm": 0.08761187696328585, "learning_rate": 6.387397064088794e-06, "loss": 0.0017, "step": 6602 }, { "epoch": 0.4255332860733389, "grad_norm": 0.2612283581799962, "learning_rate": 6.386680988184748e-06, "loss": 0.0003, "step": 6603 }, { "epoch": 0.4255977315202681, "grad_norm": 0.019915802669471502, "learning_rate": 6.385964912280702e-06, "loss": 0.0, "step": 6604 }, { "epoch": 0.42566217696719727, "grad_norm": 0.02653936073343681, "learning_rate": 6.385248836376656e-06, "loss": 0.0, "step": 6605 }, { "epoch": 0.42572662241412645, "grad_norm": 0.00020666814500324758, "learning_rate": 6.38453276047261e-06, "loss": 0.0, "step": 6606 }, { "epoch": 0.42579106786105564, "grad_norm": 0.02526154973886526, "learning_rate": 6.383816684568565e-06, "loss": 0.0, "step": 6607 }, { "epoch": 0.42585551330798477, "grad_norm": 0.007133051405255623, "learning_rate": 6.383100608664519e-06, "loss": 0.0, "step": 6608 }, { "epoch": 0.42591995875491395, "grad_norm": 0.0021597927325183465, "learning_rate": 6.382384532760472e-06, "loss": 0.0, "step": 6609 }, { "epoch": 0.42598440420184314, "grad_norm": 0.023160755933042062, "learning_rate": 6.381668456856427e-06, "loss": 0.0001, "step": 6610 }, { "epoch": 0.4260488496487723, "grad_norm": 0.07757443802956861, "learning_rate": 6.380952380952381e-06, "loss": 0.0001, "step": 6611 }, { "epoch": 0.4261132950957015, "grad_norm": 0.06177029019459271, "learning_rate": 6.380236305048335e-06, "loss": 0.0002, "step": 6612 }, { "epoch": 0.4261777405426307, "grad_norm": 0.22313402180370537, "learning_rate": 6.3795202291442905e-06, "loss": 0.0012, "step": 6613 }, { "epoch": 0.4262421859895598, "grad_norm": 0.000392622305714873, "learning_rate": 6.378804153240245e-06, "loss": 0.0, "step": 6614 }, { "epoch": 0.426306631436489, "grad_norm": 0.00409946774879405, "learning_rate": 6.378088077336198e-06, "loss": 0.0, "step": 6615 }, { "epoch": 0.4263710768834182, "grad_norm": 0.0188214650348023, "learning_rate": 6.3773720014321525e-06, "loss": 0.0001, "step": 6616 }, { "epoch": 0.42643552233034737, "grad_norm": 0.05915115781377536, "learning_rate": 6.376655925528107e-06, "loss": 0.0005, "step": 6617 }, { "epoch": 0.42649996777727656, "grad_norm": 0.004750804011270957, "learning_rate": 6.375939849624061e-06, "loss": 0.0, "step": 6618 }, { "epoch": 0.4265644132242057, "grad_norm": 0.0009950607837926515, "learning_rate": 6.375223773720015e-06, "loss": 0.0, "step": 6619 }, { "epoch": 0.42662885867113487, "grad_norm": 0.0003103790189822709, "learning_rate": 6.374507697815969e-06, "loss": 0.0, "step": 6620 }, { "epoch": 0.42669330411806405, "grad_norm": 0.006672502559690551, "learning_rate": 6.373791621911923e-06, "loss": 0.0, "step": 6621 }, { "epoch": 0.42675774956499324, "grad_norm": 0.0012132214678914587, "learning_rate": 6.3730755460078774e-06, "loss": 0.0, "step": 6622 }, { "epoch": 0.4268221950119224, "grad_norm": 0.5459795569131483, "learning_rate": 6.372359470103832e-06, "loss": 0.001, "step": 6623 }, { "epoch": 0.4268866404588516, "grad_norm": 0.1584840858127378, "learning_rate": 6.371643394199786e-06, "loss": 0.0018, "step": 6624 }, { "epoch": 0.42695108590578074, "grad_norm": 0.02230071107940539, "learning_rate": 6.3709273182957395e-06, "loss": 0.0, "step": 6625 }, { "epoch": 0.4270155313527099, "grad_norm": 0.2010288563453443, "learning_rate": 6.370211242391694e-06, "loss": 0.0002, "step": 6626 }, { "epoch": 0.4270799767996391, "grad_norm": 0.0005539057811006905, "learning_rate": 6.369495166487648e-06, "loss": 0.0, "step": 6627 }, { "epoch": 0.4271444222465683, "grad_norm": 8.601880093504294e-05, "learning_rate": 6.368779090583602e-06, "loss": 0.0, "step": 6628 }, { "epoch": 0.4272088676934975, "grad_norm": 0.00694744794728398, "learning_rate": 6.368063014679557e-06, "loss": 0.0, "step": 6629 }, { "epoch": 0.4272733131404266, "grad_norm": 0.00011373609568961924, "learning_rate": 6.36734693877551e-06, "loss": 0.0, "step": 6630 }, { "epoch": 0.4273377585873558, "grad_norm": 0.2741989161173644, "learning_rate": 6.3666308628714644e-06, "loss": 0.0024, "step": 6631 }, { "epoch": 0.42740220403428497, "grad_norm": 0.121421209699953, "learning_rate": 6.365914786967419e-06, "loss": 0.0003, "step": 6632 }, { "epoch": 0.42746664948121416, "grad_norm": 0.0009714433687004506, "learning_rate": 6.365198711063373e-06, "loss": 0.0, "step": 6633 }, { "epoch": 0.42753109492814334, "grad_norm": 0.10040541532491891, "learning_rate": 6.364482635159327e-06, "loss": 0.0007, "step": 6634 }, { "epoch": 0.4275955403750725, "grad_norm": 6.250609963284367e-05, "learning_rate": 6.363766559255281e-06, "loss": 0.0, "step": 6635 }, { "epoch": 0.42765998582200165, "grad_norm": 0.0007349660592156871, "learning_rate": 6.363050483351235e-06, "loss": 0.0, "step": 6636 }, { "epoch": 0.42772443126893084, "grad_norm": 0.00017060009315793165, "learning_rate": 6.36233440744719e-06, "loss": 0.0, "step": 6637 }, { "epoch": 0.42778887671586, "grad_norm": 0.002033436154090894, "learning_rate": 6.3616183315431445e-06, "loss": 0.0, "step": 6638 }, { "epoch": 0.4278533221627892, "grad_norm": 0.001637918147364348, "learning_rate": 6.360902255639099e-06, "loss": 0.0, "step": 6639 }, { "epoch": 0.4279177676097184, "grad_norm": 0.19539891238751805, "learning_rate": 6.360186179735053e-06, "loss": 0.0004, "step": 6640 }, { "epoch": 0.4279822130566475, "grad_norm": 0.0018548896809036603, "learning_rate": 6.359470103831007e-06, "loss": 0.0, "step": 6641 }, { "epoch": 0.4280466585035767, "grad_norm": 0.0013334698909709792, "learning_rate": 6.358754027926961e-06, "loss": 0.0, "step": 6642 }, { "epoch": 0.4281111039505059, "grad_norm": 0.0028419573034907998, "learning_rate": 6.358037952022915e-06, "loss": 0.0, "step": 6643 }, { "epoch": 0.4281755493974351, "grad_norm": 0.006881097440326114, "learning_rate": 6.3573218761188695e-06, "loss": 0.0, "step": 6644 }, { "epoch": 0.42823999484436426, "grad_norm": 0.001116479305550302, "learning_rate": 6.356605800214824e-06, "loss": 0.0, "step": 6645 }, { "epoch": 0.42830444029129344, "grad_norm": 0.00025453688824276247, "learning_rate": 6.355889724310777e-06, "loss": 0.0, "step": 6646 }, { "epoch": 0.42836888573822257, "grad_norm": 0.005216877980391289, "learning_rate": 6.3551736484067315e-06, "loss": 0.0, "step": 6647 }, { "epoch": 0.42843333118515176, "grad_norm": 0.001896177419504992, "learning_rate": 6.354457572502686e-06, "loss": 0.0, "step": 6648 }, { "epoch": 0.42849777663208094, "grad_norm": 0.0002611695027218609, "learning_rate": 6.35374149659864e-06, "loss": 0.0, "step": 6649 }, { "epoch": 0.4285622220790101, "grad_norm": 0.22977676603252645, "learning_rate": 6.353025420694594e-06, "loss": 0.0003, "step": 6650 }, { "epoch": 0.4286266675259393, "grad_norm": 0.0017587722789177331, "learning_rate": 6.352309344790548e-06, "loss": 0.0, "step": 6651 }, { "epoch": 0.4286911129728685, "grad_norm": 0.0020766966092716993, "learning_rate": 6.351593268886502e-06, "loss": 0.0, "step": 6652 }, { "epoch": 0.4287555584197976, "grad_norm": 0.40843597414743027, "learning_rate": 6.3508771929824565e-06, "loss": 0.0014, "step": 6653 }, { "epoch": 0.4288200038667268, "grad_norm": 0.013422847530458746, "learning_rate": 6.350161117078411e-06, "loss": 0.0, "step": 6654 }, { "epoch": 0.428884449313656, "grad_norm": 0.7281807546593764, "learning_rate": 6.349445041174365e-06, "loss": 0.005, "step": 6655 }, { "epoch": 0.4289488947605852, "grad_norm": 0.0007956452725589806, "learning_rate": 6.3487289652703185e-06, "loss": 0.0, "step": 6656 }, { "epoch": 0.42901334020751436, "grad_norm": 0.008950084640330636, "learning_rate": 6.348012889366273e-06, "loss": 0.0, "step": 6657 }, { "epoch": 0.4290777856544435, "grad_norm": 0.17089821406758554, "learning_rate": 6.347296813462227e-06, "loss": 0.0018, "step": 6658 }, { "epoch": 0.4291422311013727, "grad_norm": 0.0030694031207953013, "learning_rate": 6.346580737558181e-06, "loss": 0.0, "step": 6659 }, { "epoch": 0.42920667654830186, "grad_norm": 0.02444291912822385, "learning_rate": 6.3458646616541366e-06, "loss": 0.0, "step": 6660 }, { "epoch": 0.42927112199523104, "grad_norm": 0.002884947247548892, "learning_rate": 6.345148585750091e-06, "loss": 0.0, "step": 6661 }, { "epoch": 0.4293355674421602, "grad_norm": 0.005064100957951929, "learning_rate": 6.344432509846044e-06, "loss": 0.0, "step": 6662 }, { "epoch": 0.4294000128890894, "grad_norm": 0.023081917055650943, "learning_rate": 6.343716433941999e-06, "loss": 0.0002, "step": 6663 }, { "epoch": 0.42946445833601854, "grad_norm": 0.0009968242300762613, "learning_rate": 6.343000358037953e-06, "loss": 0.0, "step": 6664 }, { "epoch": 0.4295289037829477, "grad_norm": 0.002290510605043889, "learning_rate": 6.342284282133907e-06, "loss": 0.0, "step": 6665 }, { "epoch": 0.4295933492298769, "grad_norm": 0.00029481900990017935, "learning_rate": 6.3415682062298615e-06, "loss": 0.0, "step": 6666 }, { "epoch": 0.4296577946768061, "grad_norm": 0.002503810212365343, "learning_rate": 6.340852130325815e-06, "loss": 0.0, "step": 6667 }, { "epoch": 0.4297222401237353, "grad_norm": 0.08980436602116373, "learning_rate": 6.340136054421769e-06, "loss": 0.0002, "step": 6668 }, { "epoch": 0.4297866855706644, "grad_norm": 0.011429338482220037, "learning_rate": 6.3394199785177236e-06, "loss": 0.0, "step": 6669 }, { "epoch": 0.4298511310175936, "grad_norm": 0.009484022902911424, "learning_rate": 6.338703902613678e-06, "loss": 0.0, "step": 6670 }, { "epoch": 0.4299155764645228, "grad_norm": 0.02464525902804644, "learning_rate": 6.337987826709632e-06, "loss": 0.0002, "step": 6671 }, { "epoch": 0.42998002191145196, "grad_norm": 0.0070729571841776796, "learning_rate": 6.337271750805586e-06, "loss": 0.0, "step": 6672 }, { "epoch": 0.43004446735838114, "grad_norm": 0.0005698593327859449, "learning_rate": 6.33655567490154e-06, "loss": 0.0, "step": 6673 }, { "epoch": 0.43010891280531033, "grad_norm": 0.007270899548970992, "learning_rate": 6.335839598997494e-06, "loss": 0.0, "step": 6674 }, { "epoch": 0.43017335825223946, "grad_norm": 0.0009913103341804657, "learning_rate": 6.3351235230934485e-06, "loss": 0.0, "step": 6675 }, { "epoch": 0.43023780369916864, "grad_norm": 0.002595574146712181, "learning_rate": 6.334407447189402e-06, "loss": 0.0, "step": 6676 }, { "epoch": 0.4303022491460978, "grad_norm": 0.14062348338474917, "learning_rate": 6.333691371285356e-06, "loss": 0.001, "step": 6677 }, { "epoch": 0.430366694593027, "grad_norm": 0.000567302968704546, "learning_rate": 6.3329752953813105e-06, "loss": 0.0, "step": 6678 }, { "epoch": 0.4304311400399562, "grad_norm": 0.0054263714955121115, "learning_rate": 6.332259219477265e-06, "loss": 0.0, "step": 6679 }, { "epoch": 0.4304955854868854, "grad_norm": 0.016467133939146297, "learning_rate": 6.331543143573219e-06, "loss": 0.0001, "step": 6680 }, { "epoch": 0.4305600309338145, "grad_norm": 0.0013694414101803056, "learning_rate": 6.330827067669173e-06, "loss": 0.0, "step": 6681 }, { "epoch": 0.4306244763807437, "grad_norm": 0.0037395349074488956, "learning_rate": 6.330110991765127e-06, "loss": 0.0, "step": 6682 }, { "epoch": 0.4306889218276729, "grad_norm": 0.29080137264148787, "learning_rate": 6.329394915861082e-06, "loss": 0.0009, "step": 6683 }, { "epoch": 0.43075336727460206, "grad_norm": 0.002323678893051278, "learning_rate": 6.328678839957036e-06, "loss": 0.0, "step": 6684 }, { "epoch": 0.43081781272153125, "grad_norm": 0.08255165084440778, "learning_rate": 6.327962764052991e-06, "loss": 0.0001, "step": 6685 }, { "epoch": 0.4308822581684604, "grad_norm": 0.1910078417668657, "learning_rate": 6.327246688148945e-06, "loss": 0.0004, "step": 6686 }, { "epoch": 0.43094670361538956, "grad_norm": 0.0015552556160902183, "learning_rate": 6.326530612244899e-06, "loss": 0.0, "step": 6687 }, { "epoch": 0.43101114906231874, "grad_norm": 0.000931274017975527, "learning_rate": 6.325814536340853e-06, "loss": 0.0, "step": 6688 }, { "epoch": 0.43107559450924793, "grad_norm": 0.026236450627385367, "learning_rate": 6.325098460436807e-06, "loss": 0.0002, "step": 6689 }, { "epoch": 0.4311400399561771, "grad_norm": 0.007944311984236513, "learning_rate": 6.324382384532761e-06, "loss": 0.0, "step": 6690 }, { "epoch": 0.4312044854031063, "grad_norm": 0.0017483911802880752, "learning_rate": 6.323666308628716e-06, "loss": 0.0, "step": 6691 }, { "epoch": 0.4312689308500354, "grad_norm": 0.8183681514879361, "learning_rate": 6.322950232724669e-06, "loss": 0.0008, "step": 6692 }, { "epoch": 0.4313333762969646, "grad_norm": 0.013792580667864334, "learning_rate": 6.322234156820623e-06, "loss": 0.0, "step": 6693 }, { "epoch": 0.4313978217438938, "grad_norm": 0.015097850842103971, "learning_rate": 6.321518080916578e-06, "loss": 0.0, "step": 6694 }, { "epoch": 0.431462267190823, "grad_norm": 0.006704856775930077, "learning_rate": 6.320802005012532e-06, "loss": 0.0, "step": 6695 }, { "epoch": 0.43152671263775216, "grad_norm": 0.0008574753945796366, "learning_rate": 6.320085929108486e-06, "loss": 0.0, "step": 6696 }, { "epoch": 0.4315911580846813, "grad_norm": 0.18016405765260005, "learning_rate": 6.31936985320444e-06, "loss": 0.0019, "step": 6697 }, { "epoch": 0.4316556035316105, "grad_norm": 4.158763383683872, "learning_rate": 6.318653777300394e-06, "loss": 0.0134, "step": 6698 }, { "epoch": 0.43172004897853966, "grad_norm": 0.04242575207314043, "learning_rate": 6.317937701396348e-06, "loss": 0.0001, "step": 6699 }, { "epoch": 0.43178449442546885, "grad_norm": 0.0038851543432019396, "learning_rate": 6.317221625492303e-06, "loss": 0.0, "step": 6700 }, { "epoch": 0.43184893987239803, "grad_norm": 0.15580466106012691, "learning_rate": 6.316505549588257e-06, "loss": 0.0009, "step": 6701 }, { "epoch": 0.4319133853193272, "grad_norm": 0.005339445096540471, "learning_rate": 6.31578947368421e-06, "loss": 0.0, "step": 6702 }, { "epoch": 0.43197783076625634, "grad_norm": 0.0005897817405355401, "learning_rate": 6.315073397780165e-06, "loss": 0.0, "step": 6703 }, { "epoch": 0.4320422762131855, "grad_norm": 0.00029861629602622337, "learning_rate": 6.314357321876119e-06, "loss": 0.0, "step": 6704 }, { "epoch": 0.4321067216601147, "grad_norm": 0.0011596472343831097, "learning_rate": 6.313641245972073e-06, "loss": 0.0, "step": 6705 }, { "epoch": 0.4321711671070439, "grad_norm": 0.21832649072057134, "learning_rate": 6.3129251700680275e-06, "loss": 0.0002, "step": 6706 }, { "epoch": 0.4322356125539731, "grad_norm": 8.891694569460509e-05, "learning_rate": 6.312209094163983e-06, "loss": 0.0, "step": 6707 }, { "epoch": 0.4323000580009022, "grad_norm": 0.0010427760358763495, "learning_rate": 6.311493018259936e-06, "loss": 0.0, "step": 6708 }, { "epoch": 0.4323645034478314, "grad_norm": 0.044251419583747786, "learning_rate": 6.31077694235589e-06, "loss": 0.0, "step": 6709 }, { "epoch": 0.4324289488947606, "grad_norm": 0.000149714890507064, "learning_rate": 6.310060866451845e-06, "loss": 0.0, "step": 6710 }, { "epoch": 0.43249339434168976, "grad_norm": 0.20032516548059928, "learning_rate": 6.309344790547799e-06, "loss": 0.0002, "step": 6711 }, { "epoch": 0.43255783978861895, "grad_norm": 0.002627819770958189, "learning_rate": 6.308628714643753e-06, "loss": 0.0, "step": 6712 }, { "epoch": 0.43262228523554813, "grad_norm": 0.01592602803687255, "learning_rate": 6.307912638739707e-06, "loss": 0.0, "step": 6713 }, { "epoch": 0.43268673068247726, "grad_norm": 0.004949941235307722, "learning_rate": 6.307196562835661e-06, "loss": 0.0, "step": 6714 }, { "epoch": 0.43275117612940645, "grad_norm": 0.4457405776687715, "learning_rate": 6.306480486931615e-06, "loss": 0.0016, "step": 6715 }, { "epoch": 0.43281562157633563, "grad_norm": 0.0001713559314944813, "learning_rate": 6.30576441102757e-06, "loss": 0.0, "step": 6716 }, { "epoch": 0.4328800670232648, "grad_norm": 0.52003317085252, "learning_rate": 6.305048335123524e-06, "loss": 0.0034, "step": 6717 }, { "epoch": 0.432944512470194, "grad_norm": 0.24328987666119062, "learning_rate": 6.304332259219477e-06, "loss": 0.0004, "step": 6718 }, { "epoch": 0.4330089579171232, "grad_norm": 6.05488712972811e-05, "learning_rate": 6.303616183315432e-06, "loss": 0.0, "step": 6719 }, { "epoch": 0.4330734033640523, "grad_norm": 0.9154862295455783, "learning_rate": 6.302900107411386e-06, "loss": 0.0044, "step": 6720 }, { "epoch": 0.4331378488109815, "grad_norm": 0.00011452810699505064, "learning_rate": 6.30218403150734e-06, "loss": 0.0, "step": 6721 }, { "epoch": 0.4332022942579107, "grad_norm": 0.2684594141416232, "learning_rate": 6.301467955603295e-06, "loss": 0.0047, "step": 6722 }, { "epoch": 0.43326673970483986, "grad_norm": 0.02207255766112272, "learning_rate": 6.300751879699248e-06, "loss": 0.0, "step": 6723 }, { "epoch": 0.43333118515176905, "grad_norm": 0.0029896027307358413, "learning_rate": 6.300035803795202e-06, "loss": 0.0, "step": 6724 }, { "epoch": 0.4333956305986982, "grad_norm": 0.16950898046179497, "learning_rate": 6.299319727891157e-06, "loss": 0.0003, "step": 6725 }, { "epoch": 0.43346007604562736, "grad_norm": 0.29806840851315397, "learning_rate": 6.298603651987111e-06, "loss": 0.0017, "step": 6726 }, { "epoch": 0.43352452149255655, "grad_norm": 0.016236714793457938, "learning_rate": 6.297887576083065e-06, "loss": 0.0, "step": 6727 }, { "epoch": 0.43358896693948573, "grad_norm": 0.009315682470924137, "learning_rate": 6.297171500179019e-06, "loss": 0.0, "step": 6728 }, { "epoch": 0.4336534123864149, "grad_norm": 0.03930989690288691, "learning_rate": 6.296455424274973e-06, "loss": 0.0001, "step": 6729 }, { "epoch": 0.4337178578333441, "grad_norm": 0.17229336517397006, "learning_rate": 6.295739348370928e-06, "loss": 0.0018, "step": 6730 }, { "epoch": 0.43378230328027323, "grad_norm": 0.010794653699986811, "learning_rate": 6.2950232724668825e-06, "loss": 0.0, "step": 6731 }, { "epoch": 0.4338467487272024, "grad_norm": 0.09056349817328786, "learning_rate": 6.294307196562837e-06, "loss": 0.0001, "step": 6732 }, { "epoch": 0.4339111941741316, "grad_norm": 0.0032560131138640283, "learning_rate": 6.293591120658791e-06, "loss": 0.0, "step": 6733 }, { "epoch": 0.4339756396210608, "grad_norm": 0.2929552456604397, "learning_rate": 6.2928750447547445e-06, "loss": 0.0024, "step": 6734 }, { "epoch": 0.43404008506798997, "grad_norm": 0.004358307485739401, "learning_rate": 6.292158968850699e-06, "loss": 0.0, "step": 6735 }, { "epoch": 0.4341045305149191, "grad_norm": 0.7401282000325132, "learning_rate": 6.291442892946653e-06, "loss": 0.0031, "step": 6736 }, { "epoch": 0.4341689759618483, "grad_norm": 0.024909603939854592, "learning_rate": 6.290726817042607e-06, "loss": 0.0001, "step": 6737 }, { "epoch": 0.43423342140877746, "grad_norm": 0.005705359287954765, "learning_rate": 6.290010741138562e-06, "loss": 0.0, "step": 6738 }, { "epoch": 0.43429786685570665, "grad_norm": 0.1156991258673197, "learning_rate": 6.289294665234515e-06, "loss": 0.0001, "step": 6739 }, { "epoch": 0.43436231230263583, "grad_norm": 0.008699033419681219, "learning_rate": 6.2885785893304694e-06, "loss": 0.0, "step": 6740 }, { "epoch": 0.434426757749565, "grad_norm": 0.6171667602531093, "learning_rate": 6.287862513426424e-06, "loss": 0.0039, "step": 6741 }, { "epoch": 0.43449120319649415, "grad_norm": 0.16432638744027941, "learning_rate": 6.287146437522378e-06, "loss": 0.0004, "step": 6742 }, { "epoch": 0.43455564864342333, "grad_norm": 0.02447242080053091, "learning_rate": 6.286430361618332e-06, "loss": 0.0, "step": 6743 }, { "epoch": 0.4346200940903525, "grad_norm": 0.0921131984266586, "learning_rate": 6.285714285714286e-06, "loss": 0.0001, "step": 6744 }, { "epoch": 0.4346845395372817, "grad_norm": 0.022735265434989764, "learning_rate": 6.28499820981024e-06, "loss": 0.0, "step": 6745 }, { "epoch": 0.4347489849842109, "grad_norm": 0.0013172423891951862, "learning_rate": 6.284282133906194e-06, "loss": 0.0, "step": 6746 }, { "epoch": 0.43481343043114, "grad_norm": 0.007643909237297049, "learning_rate": 6.283566058002149e-06, "loss": 0.0, "step": 6747 }, { "epoch": 0.4348778758780692, "grad_norm": 0.04607741987970149, "learning_rate": 6.282849982098103e-06, "loss": 0.0001, "step": 6748 }, { "epoch": 0.4349423213249984, "grad_norm": 0.06911486318143999, "learning_rate": 6.2821339061940564e-06, "loss": 0.0, "step": 6749 }, { "epoch": 0.43500676677192757, "grad_norm": 0.02393797398342579, "learning_rate": 6.281417830290011e-06, "loss": 0.0, "step": 6750 }, { "epoch": 0.43507121221885675, "grad_norm": 0.26460427623124094, "learning_rate": 6.280701754385965e-06, "loss": 0.0016, "step": 6751 }, { "epoch": 0.43513565766578594, "grad_norm": 0.00026218383385574675, "learning_rate": 6.279985678481919e-06, "loss": 0.0, "step": 6752 }, { "epoch": 0.43520010311271506, "grad_norm": 0.0026633181035528104, "learning_rate": 6.279269602577874e-06, "loss": 0.0, "step": 6753 }, { "epoch": 0.43526454855964425, "grad_norm": 0.11784700149040153, "learning_rate": 6.278553526673829e-06, "loss": 0.0006, "step": 6754 }, { "epoch": 0.43532899400657343, "grad_norm": 0.0017846129932184572, "learning_rate": 6.277837450769782e-06, "loss": 0.0, "step": 6755 }, { "epoch": 0.4353934394535026, "grad_norm": 0.07136146578848694, "learning_rate": 6.2771213748657365e-06, "loss": 0.0001, "step": 6756 }, { "epoch": 0.4354578849004318, "grad_norm": 0.17367550761601813, "learning_rate": 6.276405298961691e-06, "loss": 0.0003, "step": 6757 }, { "epoch": 0.435522330347361, "grad_norm": 0.06442347118931648, "learning_rate": 6.275689223057645e-06, "loss": 0.0009, "step": 6758 }, { "epoch": 0.4355867757942901, "grad_norm": 1.113797680383268, "learning_rate": 6.2749731471535994e-06, "loss": 0.0098, "step": 6759 }, { "epoch": 0.4356512212412193, "grad_norm": 0.005809118892805713, "learning_rate": 6.274257071249553e-06, "loss": 0.0, "step": 6760 }, { "epoch": 0.4357156666881485, "grad_norm": 1.4354561913321795, "learning_rate": 6.273540995345507e-06, "loss": 0.0023, "step": 6761 }, { "epoch": 0.43578011213507767, "grad_norm": 0.052269633952662035, "learning_rate": 6.2728249194414615e-06, "loss": 0.0005, "step": 6762 }, { "epoch": 0.43584455758200685, "grad_norm": 0.012401707536231203, "learning_rate": 6.272108843537416e-06, "loss": 0.0001, "step": 6763 }, { "epoch": 0.435909003028936, "grad_norm": 0.0037154901687754456, "learning_rate": 6.27139276763337e-06, "loss": 0.0, "step": 6764 }, { "epoch": 0.43597344847586517, "grad_norm": 0.0018004172015471341, "learning_rate": 6.2706766917293235e-06, "loss": 0.0, "step": 6765 }, { "epoch": 0.43603789392279435, "grad_norm": 0.03574004342730524, "learning_rate": 6.269960615825278e-06, "loss": 0.0001, "step": 6766 }, { "epoch": 0.43610233936972354, "grad_norm": 0.010076841435620492, "learning_rate": 6.269244539921232e-06, "loss": 0.0, "step": 6767 }, { "epoch": 0.4361667848166527, "grad_norm": 0.15066047090537446, "learning_rate": 6.268528464017186e-06, "loss": 0.0016, "step": 6768 }, { "epoch": 0.4362312302635819, "grad_norm": 0.15723401401466586, "learning_rate": 6.267812388113141e-06, "loss": 0.0007, "step": 6769 }, { "epoch": 0.43629567571051103, "grad_norm": 0.29885035164145274, "learning_rate": 6.267096312209094e-06, "loss": 0.0011, "step": 6770 }, { "epoch": 0.4363601211574402, "grad_norm": 0.06953103863759008, "learning_rate": 6.2663802363050485e-06, "loss": 0.0, "step": 6771 }, { "epoch": 0.4364245666043694, "grad_norm": 0.0003913824342631887, "learning_rate": 6.265664160401003e-06, "loss": 0.0, "step": 6772 }, { "epoch": 0.4364890120512986, "grad_norm": 5.8933252323782934e-05, "learning_rate": 6.264948084496957e-06, "loss": 0.0, "step": 6773 }, { "epoch": 0.43655345749822777, "grad_norm": 0.003684676938295499, "learning_rate": 6.2642320085929105e-06, "loss": 0.0, "step": 6774 }, { "epoch": 0.4366179029451569, "grad_norm": 0.00030307847703955963, "learning_rate": 6.263515932688865e-06, "loss": 0.0, "step": 6775 }, { "epoch": 0.4366823483920861, "grad_norm": 0.007013575093070469, "learning_rate": 6.262799856784819e-06, "loss": 0.0, "step": 6776 }, { "epoch": 0.43674679383901527, "grad_norm": 0.00314157343057512, "learning_rate": 6.262083780880774e-06, "loss": 0.0, "step": 6777 }, { "epoch": 0.43681123928594445, "grad_norm": 0.0032058113583623357, "learning_rate": 6.2613677049767286e-06, "loss": 0.0, "step": 6778 }, { "epoch": 0.43687568473287364, "grad_norm": 0.14535620811397174, "learning_rate": 6.260651629072683e-06, "loss": 0.0004, "step": 6779 }, { "epoch": 0.4369401301798028, "grad_norm": 0.0010641447845441464, "learning_rate": 6.259935553168637e-06, "loss": 0.0, "step": 6780 }, { "epoch": 0.43700457562673195, "grad_norm": 0.030892537690979292, "learning_rate": 6.259219477264591e-06, "loss": 0.0002, "step": 6781 }, { "epoch": 0.43706902107366113, "grad_norm": 0.5230201366789412, "learning_rate": 6.258503401360545e-06, "loss": 0.0012, "step": 6782 }, { "epoch": 0.4371334665205903, "grad_norm": 0.18978095448541774, "learning_rate": 6.257787325456499e-06, "loss": 0.0007, "step": 6783 }, { "epoch": 0.4371979119675195, "grad_norm": 0.00987531260439076, "learning_rate": 6.2570712495524535e-06, "loss": 0.0, "step": 6784 }, { "epoch": 0.4372623574144487, "grad_norm": 0.25283649745634335, "learning_rate": 6.256355173648408e-06, "loss": 0.0003, "step": 6785 }, { "epoch": 0.4373268028613778, "grad_norm": 0.0061907236244896895, "learning_rate": 6.255639097744361e-06, "loss": 0.0, "step": 6786 }, { "epoch": 0.437391248308307, "grad_norm": 0.2543641582962293, "learning_rate": 6.2549230218403156e-06, "loss": 0.0018, "step": 6787 }, { "epoch": 0.4374556937552362, "grad_norm": 0.008615261739393556, "learning_rate": 6.25420694593627e-06, "loss": 0.0, "step": 6788 }, { "epoch": 0.43752013920216537, "grad_norm": 0.01651753520272118, "learning_rate": 6.253490870032224e-06, "loss": 0.0002, "step": 6789 }, { "epoch": 0.43758458464909455, "grad_norm": 0.21448941922489909, "learning_rate": 6.252774794128178e-06, "loss": 0.0007, "step": 6790 }, { "epoch": 0.43764903009602374, "grad_norm": 3.2058399888517126e-05, "learning_rate": 6.252058718224132e-06, "loss": 0.0, "step": 6791 }, { "epoch": 0.43771347554295287, "grad_norm": 0.7354880075909804, "learning_rate": 6.251342642320086e-06, "loss": 0.0028, "step": 6792 }, { "epoch": 0.43777792098988205, "grad_norm": 0.0030102187090905903, "learning_rate": 6.2506265664160405e-06, "loss": 0.0, "step": 6793 }, { "epoch": 0.43784236643681124, "grad_norm": 0.01540114091165498, "learning_rate": 6.249910490511995e-06, "loss": 0.0001, "step": 6794 }, { "epoch": 0.4379068118837404, "grad_norm": 0.08756405771852145, "learning_rate": 6.249194414607948e-06, "loss": 0.0001, "step": 6795 }, { "epoch": 0.4379712573306696, "grad_norm": 0.004207340449658778, "learning_rate": 6.2484783387039025e-06, "loss": 0.0, "step": 6796 }, { "epoch": 0.4380357027775988, "grad_norm": 0.03770904945438654, "learning_rate": 6.247762262799857e-06, "loss": 0.0001, "step": 6797 }, { "epoch": 0.4381001482245279, "grad_norm": 0.0003550056645818315, "learning_rate": 6.247046186895811e-06, "loss": 0.0, "step": 6798 }, { "epoch": 0.4381645936714571, "grad_norm": 0.14162342297415867, "learning_rate": 6.2463301109917654e-06, "loss": 0.0019, "step": 6799 }, { "epoch": 0.4382290391183863, "grad_norm": 0.00033271973675798647, "learning_rate": 6.245614035087721e-06, "loss": 0.0, "step": 6800 }, { "epoch": 0.43829348456531547, "grad_norm": 1.0295957533116706, "learning_rate": 6.244897959183675e-06, "loss": 0.007, "step": 6801 }, { "epoch": 0.43835793001224466, "grad_norm": 0.011322654464795303, "learning_rate": 6.244181883279628e-06, "loss": 0.0001, "step": 6802 }, { "epoch": 0.4384223754591738, "grad_norm": 0.005792339884773891, "learning_rate": 6.243465807375583e-06, "loss": 0.0, "step": 6803 }, { "epoch": 0.43848682090610297, "grad_norm": 0.7715247421327011, "learning_rate": 6.242749731471537e-06, "loss": 0.0032, "step": 6804 }, { "epoch": 0.43855126635303215, "grad_norm": 0.029430314989175126, "learning_rate": 6.242033655567491e-06, "loss": 0.0001, "step": 6805 }, { "epoch": 0.43861571179996134, "grad_norm": 0.0044577108409088175, "learning_rate": 6.241317579663445e-06, "loss": 0.0, "step": 6806 }, { "epoch": 0.4386801572468905, "grad_norm": 0.0011134538704909022, "learning_rate": 6.240601503759399e-06, "loss": 0.0, "step": 6807 }, { "epoch": 0.4387446026938197, "grad_norm": 0.00017366062586490197, "learning_rate": 6.239885427855353e-06, "loss": 0.0, "step": 6808 }, { "epoch": 0.43880904814074884, "grad_norm": 0.006931424599151439, "learning_rate": 6.239169351951308e-06, "loss": 0.0, "step": 6809 }, { "epoch": 0.438873493587678, "grad_norm": 0.06903922706188115, "learning_rate": 6.238453276047262e-06, "loss": 0.0001, "step": 6810 }, { "epoch": 0.4389379390346072, "grad_norm": 0.0007704492971362574, "learning_rate": 6.237737200143215e-06, "loss": 0.0, "step": 6811 }, { "epoch": 0.4390023844815364, "grad_norm": 0.4028598105360153, "learning_rate": 6.23702112423917e-06, "loss": 0.0006, "step": 6812 }, { "epoch": 0.4390668299284656, "grad_norm": 0.008059346311853226, "learning_rate": 6.236305048335124e-06, "loss": 0.0, "step": 6813 }, { "epoch": 0.4391312753753947, "grad_norm": 0.2791621410064719, "learning_rate": 6.235588972431078e-06, "loss": 0.0007, "step": 6814 }, { "epoch": 0.4391957208223239, "grad_norm": 0.01712343117021725, "learning_rate": 6.2348728965270325e-06, "loss": 0.0, "step": 6815 }, { "epoch": 0.43926016626925307, "grad_norm": 0.00298623967276542, "learning_rate": 6.234156820622986e-06, "loss": 0.0, "step": 6816 }, { "epoch": 0.43932461171618226, "grad_norm": 0.0034664114644248696, "learning_rate": 6.23344074471894e-06, "loss": 0.0, "step": 6817 }, { "epoch": 0.43938905716311144, "grad_norm": 0.03522052287221309, "learning_rate": 6.232724668814895e-06, "loss": 0.0003, "step": 6818 }, { "epoch": 0.4394535026100406, "grad_norm": 0.05915418664026023, "learning_rate": 6.232008592910849e-06, "loss": 0.0001, "step": 6819 }, { "epoch": 0.43951794805696975, "grad_norm": 0.00684155053848704, "learning_rate": 6.231292517006803e-06, "loss": 0.0, "step": 6820 }, { "epoch": 0.43958239350389894, "grad_norm": 0.034032131900648584, "learning_rate": 6.230576441102757e-06, "loss": 0.0, "step": 6821 }, { "epoch": 0.4396468389508281, "grad_norm": 0.006321578377653734, "learning_rate": 6.229860365198711e-06, "loss": 0.0, "step": 6822 }, { "epoch": 0.4397112843977573, "grad_norm": 0.0879231922423166, "learning_rate": 6.229144289294665e-06, "loss": 0.0002, "step": 6823 }, { "epoch": 0.4397757298446865, "grad_norm": 0.05236788173857305, "learning_rate": 6.22842821339062e-06, "loss": 0.0001, "step": 6824 }, { "epoch": 0.4398401752916156, "grad_norm": 0.002943995240670316, "learning_rate": 6.227712137486575e-06, "loss": 0.0, "step": 6825 }, { "epoch": 0.4399046207385448, "grad_norm": 0.022349262684986156, "learning_rate": 6.226996061582529e-06, "loss": 0.0001, "step": 6826 }, { "epoch": 0.439969066185474, "grad_norm": 0.007797418739776279, "learning_rate": 6.226279985678482e-06, "loss": 0.0, "step": 6827 }, { "epoch": 0.4400335116324032, "grad_norm": 0.749392223462473, "learning_rate": 6.225563909774437e-06, "loss": 0.0043, "step": 6828 }, { "epoch": 0.44009795707933236, "grad_norm": 0.014145049204418786, "learning_rate": 6.224847833870391e-06, "loss": 0.0001, "step": 6829 }, { "epoch": 0.44016240252626154, "grad_norm": 0.22435268993820356, "learning_rate": 6.224131757966345e-06, "loss": 0.0003, "step": 6830 }, { "epoch": 0.44022684797319067, "grad_norm": 0.019355140889198114, "learning_rate": 6.2234156820623e-06, "loss": 0.0001, "step": 6831 }, { "epoch": 0.44029129342011986, "grad_norm": 0.15658782559037665, "learning_rate": 6.222699606158253e-06, "loss": 0.0001, "step": 6832 }, { "epoch": 0.44035573886704904, "grad_norm": 0.010001780505805401, "learning_rate": 6.221983530254207e-06, "loss": 0.0, "step": 6833 }, { "epoch": 0.4404201843139782, "grad_norm": 0.03181574062344294, "learning_rate": 6.221267454350162e-06, "loss": 0.0, "step": 6834 }, { "epoch": 0.4404846297609074, "grad_norm": 0.5931863116782081, "learning_rate": 6.220551378446116e-06, "loss": 0.0014, "step": 6835 }, { "epoch": 0.4405490752078366, "grad_norm": 0.4585510156011477, "learning_rate": 6.21983530254207e-06, "loss": 0.0013, "step": 6836 }, { "epoch": 0.4406135206547657, "grad_norm": 0.013945956708650028, "learning_rate": 6.219119226638024e-06, "loss": 0.0, "step": 6837 }, { "epoch": 0.4406779661016949, "grad_norm": 0.023095446256535816, "learning_rate": 6.218403150733978e-06, "loss": 0.0, "step": 6838 }, { "epoch": 0.4407424115486241, "grad_norm": 0.005353204918259331, "learning_rate": 6.217687074829932e-06, "loss": 0.0, "step": 6839 }, { "epoch": 0.4408068569955533, "grad_norm": 0.03502575011971063, "learning_rate": 6.216970998925887e-06, "loss": 0.0002, "step": 6840 }, { "epoch": 0.44087130244248246, "grad_norm": 0.12758947956133784, "learning_rate": 6.216254923021841e-06, "loss": 0.0005, "step": 6841 }, { "epoch": 0.4409357478894116, "grad_norm": 0.006109814671694446, "learning_rate": 6.215538847117794e-06, "loss": 0.0016, "step": 6842 }, { "epoch": 0.4410001933363408, "grad_norm": 0.0051795290955148395, "learning_rate": 6.214822771213749e-06, "loss": 0.0, "step": 6843 }, { "epoch": 0.44106463878326996, "grad_norm": 0.0002441407996229203, "learning_rate": 6.214106695309703e-06, "loss": 0.0, "step": 6844 }, { "epoch": 0.44112908423019914, "grad_norm": 0.1309906813181773, "learning_rate": 6.213390619405657e-06, "loss": 0.001, "step": 6845 }, { "epoch": 0.4411935296771283, "grad_norm": 0.0005050727517481047, "learning_rate": 6.2126745435016116e-06, "loss": 0.0, "step": 6846 }, { "epoch": 0.4412579751240575, "grad_norm": 0.005029458636800389, "learning_rate": 6.211958467597567e-06, "loss": 0.0, "step": 6847 }, { "epoch": 0.44132242057098664, "grad_norm": 0.015254279423934458, "learning_rate": 6.21124239169352e-06, "loss": 0.0001, "step": 6848 }, { "epoch": 0.4413868660179158, "grad_norm": 0.00022741485742966934, "learning_rate": 6.2105263157894745e-06, "loss": 0.0, "step": 6849 }, { "epoch": 0.441451311464845, "grad_norm": 0.22921487814058641, "learning_rate": 6.209810239885429e-06, "loss": 0.0005, "step": 6850 }, { "epoch": 0.4415157569117742, "grad_norm": 0.0032367808859397958, "learning_rate": 6.209094163981383e-06, "loss": 0.0, "step": 6851 }, { "epoch": 0.4415802023587034, "grad_norm": 0.7078035888784612, "learning_rate": 6.208378088077337e-06, "loss": 0.0009, "step": 6852 }, { "epoch": 0.4416446478056325, "grad_norm": 0.0015278402776132136, "learning_rate": 6.207662012173291e-06, "loss": 0.0, "step": 6853 }, { "epoch": 0.4417090932525617, "grad_norm": 0.005985628339181594, "learning_rate": 6.206945936269245e-06, "loss": 0.0001, "step": 6854 }, { "epoch": 0.4417735386994909, "grad_norm": 0.08176947446269071, "learning_rate": 6.206229860365199e-06, "loss": 0.0001, "step": 6855 }, { "epoch": 0.44183798414642006, "grad_norm": 3.5829103841858606e-05, "learning_rate": 6.205513784461154e-06, "loss": 0.0, "step": 6856 }, { "epoch": 0.44190242959334924, "grad_norm": 0.006482419460032692, "learning_rate": 6.204797708557108e-06, "loss": 0.0, "step": 6857 }, { "epoch": 0.44196687504027843, "grad_norm": 0.0007300640455878939, "learning_rate": 6.2040816326530614e-06, "loss": 0.0, "step": 6858 }, { "epoch": 0.44203132048720756, "grad_norm": 0.005625053429722351, "learning_rate": 6.203365556749016e-06, "loss": 0.0, "step": 6859 }, { "epoch": 0.44209576593413674, "grad_norm": 0.0861411175000196, "learning_rate": 6.20264948084497e-06, "loss": 0.0003, "step": 6860 }, { "epoch": 0.4421602113810659, "grad_norm": 0.004054258970734921, "learning_rate": 6.201933404940924e-06, "loss": 0.0, "step": 6861 }, { "epoch": 0.4422246568279951, "grad_norm": 0.009220336750607218, "learning_rate": 6.201217329036879e-06, "loss": 0.0001, "step": 6862 }, { "epoch": 0.4422891022749243, "grad_norm": 0.011453056003512678, "learning_rate": 6.200501253132832e-06, "loss": 0.0001, "step": 6863 }, { "epoch": 0.4423535477218534, "grad_norm": 0.0011940250474132966, "learning_rate": 6.199785177228786e-06, "loss": 0.0, "step": 6864 }, { "epoch": 0.4424179931687826, "grad_norm": 6.666298560305661e-05, "learning_rate": 6.199069101324741e-06, "loss": 0.0, "step": 6865 }, { "epoch": 0.4424824386157118, "grad_norm": 0.02623876561333548, "learning_rate": 6.198353025420695e-06, "loss": 0.0001, "step": 6866 }, { "epoch": 0.442546884062641, "grad_norm": 0.04930259728432476, "learning_rate": 6.1976369495166484e-06, "loss": 0.0005, "step": 6867 }, { "epoch": 0.44261132950957016, "grad_norm": 0.006099219300208031, "learning_rate": 6.196920873612603e-06, "loss": 0.0, "step": 6868 }, { "epoch": 0.44267577495649935, "grad_norm": 0.0012266434546431698, "learning_rate": 6.196204797708557e-06, "loss": 0.0, "step": 6869 }, { "epoch": 0.4427402204034285, "grad_norm": 0.9596212412780556, "learning_rate": 6.195488721804512e-06, "loss": 0.0027, "step": 6870 }, { "epoch": 0.44280466585035766, "grad_norm": 0.004923342711700275, "learning_rate": 6.1947726459004665e-06, "loss": 0.0, "step": 6871 }, { "epoch": 0.44286911129728684, "grad_norm": 0.001109321347760395, "learning_rate": 6.194056569996421e-06, "loss": 0.0, "step": 6872 }, { "epoch": 0.44293355674421603, "grad_norm": 0.0483371247384216, "learning_rate": 6.193340494092375e-06, "loss": 0.0002, "step": 6873 }, { "epoch": 0.4429980021911452, "grad_norm": 0.005611801523459228, "learning_rate": 6.1926244181883285e-06, "loss": 0.0, "step": 6874 }, { "epoch": 0.4430624476380744, "grad_norm": 0.004807293352621903, "learning_rate": 6.191908342284283e-06, "loss": 0.0, "step": 6875 }, { "epoch": 0.4431268930850035, "grad_norm": 0.0006191547115299766, "learning_rate": 6.191192266380237e-06, "loss": 0.0, "step": 6876 }, { "epoch": 0.4431913385319327, "grad_norm": 0.020956838583349454, "learning_rate": 6.1904761904761914e-06, "loss": 0.0, "step": 6877 }, { "epoch": 0.4432557839788619, "grad_norm": 0.0281989780759363, "learning_rate": 6.189760114572146e-06, "loss": 0.0003, "step": 6878 }, { "epoch": 0.4433202294257911, "grad_norm": 0.0013422409540706092, "learning_rate": 6.189044038668099e-06, "loss": 0.0, "step": 6879 }, { "epoch": 0.44338467487272026, "grad_norm": 0.20020187181101248, "learning_rate": 6.1883279627640535e-06, "loss": 0.0001, "step": 6880 }, { "epoch": 0.4434491203196494, "grad_norm": 0.0016931356119931837, "learning_rate": 6.187611886860008e-06, "loss": 0.0, "step": 6881 }, { "epoch": 0.4435135657665786, "grad_norm": 0.03510104421217682, "learning_rate": 6.186895810955962e-06, "loss": 0.0001, "step": 6882 }, { "epoch": 0.44357801121350776, "grad_norm": 0.0005879157791024848, "learning_rate": 6.1861797350519155e-06, "loss": 0.0, "step": 6883 }, { "epoch": 0.44364245666043695, "grad_norm": 0.015057599267392894, "learning_rate": 6.18546365914787e-06, "loss": 0.0001, "step": 6884 }, { "epoch": 0.44370690210736613, "grad_norm": 0.004394785597182588, "learning_rate": 6.184747583243824e-06, "loss": 0.0, "step": 6885 }, { "epoch": 0.4437713475542953, "grad_norm": 0.20035424785335368, "learning_rate": 6.184031507339778e-06, "loss": 0.001, "step": 6886 }, { "epoch": 0.44383579300122444, "grad_norm": 0.002954961677625816, "learning_rate": 6.183315431435733e-06, "loss": 0.0, "step": 6887 }, { "epoch": 0.44390023844815363, "grad_norm": 0.14626720861118214, "learning_rate": 6.182599355531686e-06, "loss": 0.0003, "step": 6888 }, { "epoch": 0.4439646838950828, "grad_norm": 0.004259351645734143, "learning_rate": 6.1818832796276405e-06, "loss": 0.0, "step": 6889 }, { "epoch": 0.444029129342012, "grad_norm": 0.0006302645757763032, "learning_rate": 6.181167203723595e-06, "loss": 0.0, "step": 6890 }, { "epoch": 0.4440935747889412, "grad_norm": 0.0005572306616088863, "learning_rate": 6.180451127819549e-06, "loss": 0.0, "step": 6891 }, { "epoch": 0.4441580202358703, "grad_norm": 0.0032139911123859584, "learning_rate": 6.179735051915503e-06, "loss": 0.0, "step": 6892 }, { "epoch": 0.4442224656827995, "grad_norm": 0.1437218464876338, "learning_rate": 6.179018976011457e-06, "loss": 0.0004, "step": 6893 }, { "epoch": 0.4442869111297287, "grad_norm": 0.00045007094295306676, "learning_rate": 6.178302900107413e-06, "loss": 0.0, "step": 6894 }, { "epoch": 0.44435135657665786, "grad_norm": 0.0007318578584300964, "learning_rate": 6.177586824203366e-06, "loss": 0.0, "step": 6895 }, { "epoch": 0.44441580202358705, "grad_norm": 0.003502809933033344, "learning_rate": 6.1768707482993206e-06, "loss": 0.0, "step": 6896 }, { "epoch": 0.44448024747051623, "grad_norm": 0.06071923318971043, "learning_rate": 6.176154672395275e-06, "loss": 0.0001, "step": 6897 }, { "epoch": 0.44454469291744536, "grad_norm": 0.0004200751505464457, "learning_rate": 6.175438596491229e-06, "loss": 0.0, "step": 6898 }, { "epoch": 0.44460913836437455, "grad_norm": 0.03086781277013602, "learning_rate": 6.174722520587183e-06, "loss": 0.0001, "step": 6899 }, { "epoch": 0.44467358381130373, "grad_norm": 0.18499814256496003, "learning_rate": 6.174006444683137e-06, "loss": 0.0007, "step": 6900 }, { "epoch": 0.4447380292582329, "grad_norm": 0.0012860809106687132, "learning_rate": 6.173290368779091e-06, "loss": 0.0, "step": 6901 }, { "epoch": 0.4448024747051621, "grad_norm": 0.011116800170123724, "learning_rate": 6.1725742928750455e-06, "loss": 0.0, "step": 6902 }, { "epoch": 0.4448669201520912, "grad_norm": 0.0315108827698995, "learning_rate": 6.171858216971e-06, "loss": 0.0002, "step": 6903 }, { "epoch": 0.4449313655990204, "grad_norm": 0.00132297479172432, "learning_rate": 6.171142141066953e-06, "loss": 0.0, "step": 6904 }, { "epoch": 0.4449958110459496, "grad_norm": 0.06148439258298901, "learning_rate": 6.1704260651629076e-06, "loss": 0.0002, "step": 6905 }, { "epoch": 0.4450602564928788, "grad_norm": 0.00796708085715272, "learning_rate": 6.169709989258862e-06, "loss": 0.0, "step": 6906 }, { "epoch": 0.44512470193980797, "grad_norm": 0.21879294518851708, "learning_rate": 6.168993913354816e-06, "loss": 0.0004, "step": 6907 }, { "epoch": 0.44518914738673715, "grad_norm": 6.952659856934143e-05, "learning_rate": 6.1682778374507705e-06, "loss": 0.0, "step": 6908 }, { "epoch": 0.4452535928336663, "grad_norm": 0.0029485427620246254, "learning_rate": 6.167561761546724e-06, "loss": 0.0, "step": 6909 }, { "epoch": 0.44531803828059546, "grad_norm": 0.00095567100834392, "learning_rate": 6.166845685642678e-06, "loss": 0.0, "step": 6910 }, { "epoch": 0.44538248372752465, "grad_norm": 0.00023555212651686218, "learning_rate": 6.1661296097386325e-06, "loss": 0.0, "step": 6911 }, { "epoch": 0.44544692917445383, "grad_norm": 0.003228572952052681, "learning_rate": 6.165413533834587e-06, "loss": 0.0, "step": 6912 }, { "epoch": 0.445511374621383, "grad_norm": 0.008805185668932146, "learning_rate": 6.164697457930541e-06, "loss": 0.0, "step": 6913 }, { "epoch": 0.4455758200683122, "grad_norm": 0.00209328623167358, "learning_rate": 6.1639813820264945e-06, "loss": 0.0, "step": 6914 }, { "epoch": 0.44564026551524133, "grad_norm": 4.6677382605160086e-05, "learning_rate": 6.163265306122449e-06, "loss": 0.0, "step": 6915 }, { "epoch": 0.4457047109621705, "grad_norm": 6.16949104243802e-05, "learning_rate": 6.162549230218403e-06, "loss": 0.0, "step": 6916 }, { "epoch": 0.4457691564090997, "grad_norm": 0.034629646127696145, "learning_rate": 6.161833154314358e-06, "loss": 0.0001, "step": 6917 }, { "epoch": 0.4458336018560289, "grad_norm": 0.0003335354281628114, "learning_rate": 6.161117078410313e-06, "loss": 0.0, "step": 6918 }, { "epoch": 0.44589804730295807, "grad_norm": 0.0004405627311754201, "learning_rate": 6.160401002506267e-06, "loss": 0.0, "step": 6919 }, { "epoch": 0.4459624927498872, "grad_norm": 0.0006141990923472445, "learning_rate": 6.15968492660222e-06, "loss": 0.0, "step": 6920 }, { "epoch": 0.4460269381968164, "grad_norm": 0.004484797434344207, "learning_rate": 6.158968850698175e-06, "loss": 0.0001, "step": 6921 }, { "epoch": 0.44609138364374556, "grad_norm": 0.0006767052006918171, "learning_rate": 6.158252774794129e-06, "loss": 0.0, "step": 6922 }, { "epoch": 0.44615582909067475, "grad_norm": 0.0006632304789780592, "learning_rate": 6.157536698890083e-06, "loss": 0.0, "step": 6923 }, { "epoch": 0.44622027453760393, "grad_norm": 0.6850470870255448, "learning_rate": 6.1568206229860375e-06, "loss": 0.002, "step": 6924 }, { "epoch": 0.4462847199845331, "grad_norm": 0.01536181478069005, "learning_rate": 6.156104547081991e-06, "loss": 0.0, "step": 6925 }, { "epoch": 0.44634916543146225, "grad_norm": 0.000819613932033979, "learning_rate": 6.155388471177945e-06, "loss": 0.0, "step": 6926 }, { "epoch": 0.44641361087839143, "grad_norm": 0.0013558397768012507, "learning_rate": 6.1546723952739e-06, "loss": 0.0, "step": 6927 }, { "epoch": 0.4464780563253206, "grad_norm": 0.00011668090304934545, "learning_rate": 6.153956319369854e-06, "loss": 0.0, "step": 6928 }, { "epoch": 0.4465425017722498, "grad_norm": 0.02503661579297557, "learning_rate": 6.153240243465808e-06, "loss": 0.0002, "step": 6929 }, { "epoch": 0.446606947219179, "grad_norm": 0.003287645970191306, "learning_rate": 6.152524167561762e-06, "loss": 0.0, "step": 6930 }, { "epoch": 0.4466713926661081, "grad_norm": 0.003287645970191306, "learning_rate": 6.152524167561762e-06, "loss": 0.0059, "step": 6931 }, { "epoch": 0.4467358381130373, "grad_norm": 0.0013659329784108567, "learning_rate": 6.151808091657716e-06, "loss": 0.0, "step": 6932 }, { "epoch": 0.4468002835599665, "grad_norm": 0.02730988103668324, "learning_rate": 6.15109201575367e-06, "loss": 0.0002, "step": 6933 }, { "epoch": 0.44686472900689567, "grad_norm": 0.0002492699167945417, "learning_rate": 6.1503759398496245e-06, "loss": 0.0, "step": 6934 }, { "epoch": 0.44692917445382485, "grad_norm": 0.0004506669312678534, "learning_rate": 6.149659863945579e-06, "loss": 0.0, "step": 6935 }, { "epoch": 0.44699361990075404, "grad_norm": 0.8233311651435545, "learning_rate": 6.148943788041532e-06, "loss": 0.0042, "step": 6936 }, { "epoch": 0.44705806534768316, "grad_norm": 0.00037711894076924535, "learning_rate": 6.148227712137487e-06, "loss": 0.0, "step": 6937 }, { "epoch": 0.44712251079461235, "grad_norm": 0.022929007680163573, "learning_rate": 6.147511636233441e-06, "loss": 0.0001, "step": 6938 }, { "epoch": 0.44718695624154153, "grad_norm": 0.03777062891841532, "learning_rate": 6.146795560329395e-06, "loss": 0.0001, "step": 6939 }, { "epoch": 0.4472514016884707, "grad_norm": 0.1337892295669292, "learning_rate": 6.1460794844253495e-06, "loss": 0.0004, "step": 6940 }, { "epoch": 0.4473158471353999, "grad_norm": 0.024741520669856795, "learning_rate": 6.145363408521305e-06, "loss": 0.0001, "step": 6941 }, { "epoch": 0.44738029258232903, "grad_norm": 0.006022601529771838, "learning_rate": 6.144647332617258e-06, "loss": 0.0, "step": 6942 }, { "epoch": 0.4474447380292582, "grad_norm": 0.03185848800646121, "learning_rate": 6.143931256713212e-06, "loss": 0.0003, "step": 6943 }, { "epoch": 0.4475091834761874, "grad_norm": 0.04492633735220352, "learning_rate": 6.143215180809167e-06, "loss": 0.0005, "step": 6944 }, { "epoch": 0.4475736289231166, "grad_norm": 0.0009602092421418444, "learning_rate": 6.142499104905121e-06, "loss": 0.0, "step": 6945 }, { "epoch": 0.44763807437004577, "grad_norm": 0.00023928778128441458, "learning_rate": 6.141783029001075e-06, "loss": 0.0, "step": 6946 }, { "epoch": 0.44770251981697495, "grad_norm": 0.09243969869578006, "learning_rate": 6.141066953097029e-06, "loss": 0.0001, "step": 6947 }, { "epoch": 0.4477669652639041, "grad_norm": 0.020509047130477586, "learning_rate": 6.140350877192983e-06, "loss": 0.0002, "step": 6948 }, { "epoch": 0.44783141071083327, "grad_norm": 0.5742998130825934, "learning_rate": 6.139634801288937e-06, "loss": 0.0026, "step": 6949 }, { "epoch": 0.44789585615776245, "grad_norm": 0.0006601064763192048, "learning_rate": 6.138918725384892e-06, "loss": 0.0, "step": 6950 }, { "epoch": 0.44796030160469164, "grad_norm": 0.4013238219805938, "learning_rate": 6.138202649480846e-06, "loss": 0.0024, "step": 6951 }, { "epoch": 0.4480247470516208, "grad_norm": 0.19748266008165033, "learning_rate": 6.137486573576799e-06, "loss": 0.0003, "step": 6952 }, { "epoch": 0.44808919249855, "grad_norm": 0.005614501578779723, "learning_rate": 6.136770497672754e-06, "loss": 0.0, "step": 6953 }, { "epoch": 0.44815363794547913, "grad_norm": 0.0018398338943028784, "learning_rate": 6.136054421768708e-06, "loss": 0.0, "step": 6954 }, { "epoch": 0.4482180833924083, "grad_norm": 0.006431620033200353, "learning_rate": 6.135338345864662e-06, "loss": 0.0, "step": 6955 }, { "epoch": 0.4482825288393375, "grad_norm": 5.3012932827666375e-05, "learning_rate": 6.1346222699606166e-06, "loss": 0.0, "step": 6956 }, { "epoch": 0.4483469742862667, "grad_norm": 0.0054546841273027434, "learning_rate": 6.13390619405657e-06, "loss": 0.0, "step": 6957 }, { "epoch": 0.44841141973319587, "grad_norm": 0.008955214094707408, "learning_rate": 6.133190118152524e-06, "loss": 0.0, "step": 6958 }, { "epoch": 0.448475865180125, "grad_norm": 0.012356203690497503, "learning_rate": 6.132474042248479e-06, "loss": 0.0, "step": 6959 }, { "epoch": 0.4485403106270542, "grad_norm": 0.38077338788568676, "learning_rate": 6.131757966344433e-06, "loss": 0.0019, "step": 6960 }, { "epoch": 0.44860475607398337, "grad_norm": 0.3053363835218518, "learning_rate": 6.131041890440386e-06, "loss": 0.0017, "step": 6961 }, { "epoch": 0.44866920152091255, "grad_norm": 0.0024220880437371517, "learning_rate": 6.130325814536341e-06, "loss": 0.0, "step": 6962 }, { "epoch": 0.44873364696784174, "grad_norm": 0.0015772679082248853, "learning_rate": 6.129609738632295e-06, "loss": 0.0, "step": 6963 }, { "epoch": 0.4487980924147709, "grad_norm": 0.00033190575499703344, "learning_rate": 6.128893662728249e-06, "loss": 0.0, "step": 6964 }, { "epoch": 0.44886253786170005, "grad_norm": 0.00012355856685873574, "learning_rate": 6.128177586824204e-06, "loss": 0.0, "step": 6965 }, { "epoch": 0.44892698330862923, "grad_norm": 0.0008402414323257852, "learning_rate": 6.127461510920159e-06, "loss": 0.0, "step": 6966 }, { "epoch": 0.4489914287555584, "grad_norm": 0.3629500817741936, "learning_rate": 6.126745435016113e-06, "loss": 0.0032, "step": 6967 }, { "epoch": 0.4490558742024876, "grad_norm": 2.327572136077638, "learning_rate": 6.1260293591120665e-06, "loss": 0.009, "step": 6968 }, { "epoch": 0.4491203196494168, "grad_norm": 0.02112311101313786, "learning_rate": 6.125313283208021e-06, "loss": 0.0002, "step": 6969 }, { "epoch": 0.4491847650963459, "grad_norm": 0.014130007366206468, "learning_rate": 6.124597207303975e-06, "loss": 0.0, "step": 6970 }, { "epoch": 0.4492492105432751, "grad_norm": 0.0053104441258905654, "learning_rate": 6.123881131399929e-06, "loss": 0.0001, "step": 6971 }, { "epoch": 0.4493136559902043, "grad_norm": 0.10688876165916378, "learning_rate": 6.123165055495884e-06, "loss": 0.0002, "step": 6972 }, { "epoch": 0.44937810143713347, "grad_norm": 0.004312108841936607, "learning_rate": 6.122448979591837e-06, "loss": 0.0, "step": 6973 }, { "epoch": 0.44944254688406265, "grad_norm": 0.0020536256520046975, "learning_rate": 6.121732903687791e-06, "loss": 0.0, "step": 6974 }, { "epoch": 0.44950699233099184, "grad_norm": 0.4778005434065899, "learning_rate": 6.121016827783746e-06, "loss": 0.0004, "step": 6975 }, { "epoch": 0.44957143777792097, "grad_norm": 0.3517645890823827, "learning_rate": 6.1203007518797e-06, "loss": 0.0019, "step": 6976 }, { "epoch": 0.44963588322485015, "grad_norm": 0.00961769649574364, "learning_rate": 6.1195846759756534e-06, "loss": 0.0, "step": 6977 }, { "epoch": 0.44970032867177934, "grad_norm": 0.02834457866455974, "learning_rate": 6.118868600071608e-06, "loss": 0.0, "step": 6978 }, { "epoch": 0.4497647741187085, "grad_norm": 0.3496750604736784, "learning_rate": 6.118152524167562e-06, "loss": 0.0005, "step": 6979 }, { "epoch": 0.4498292195656377, "grad_norm": 0.5185237666966998, "learning_rate": 6.117436448263516e-06, "loss": 0.0009, "step": 6980 }, { "epoch": 0.44989366501256683, "grad_norm": 0.0019981095150621467, "learning_rate": 6.116720372359471e-06, "loss": 0.0, "step": 6981 }, { "epoch": 0.449958110459496, "grad_norm": 0.001970621272454851, "learning_rate": 6.116004296455424e-06, "loss": 0.0, "step": 6982 }, { "epoch": 0.4500225559064252, "grad_norm": 0.14547553760578696, "learning_rate": 6.115288220551378e-06, "loss": 0.0003, "step": 6983 }, { "epoch": 0.4500870013533544, "grad_norm": 0.04114063122287838, "learning_rate": 6.114572144647333e-06, "loss": 0.0, "step": 6984 }, { "epoch": 0.4501514468002836, "grad_norm": 0.007233528345344776, "learning_rate": 6.113856068743287e-06, "loss": 0.0, "step": 6985 }, { "epoch": 0.45021589224721276, "grad_norm": 0.0026296207033014393, "learning_rate": 6.113139992839241e-06, "loss": 0.0, "step": 6986 }, { "epoch": 0.4502803376941419, "grad_norm": 0.03319251543124218, "learning_rate": 6.112423916935195e-06, "loss": 0.0002, "step": 6987 }, { "epoch": 0.45034478314107107, "grad_norm": 0.0024029586808359893, "learning_rate": 6.111707841031151e-06, "loss": 0.0, "step": 6988 }, { "epoch": 0.45040922858800025, "grad_norm": 0.001096244893310349, "learning_rate": 6.110991765127104e-06, "loss": 0.0, "step": 6989 }, { "epoch": 0.45047367403492944, "grad_norm": 0.47845158051066183, "learning_rate": 6.1102756892230585e-06, "loss": 0.0026, "step": 6990 }, { "epoch": 0.4505381194818586, "grad_norm": 0.005456332068239849, "learning_rate": 6.109559613319013e-06, "loss": 0.0001, "step": 6991 }, { "epoch": 0.4506025649287878, "grad_norm": 0.023044872550530067, "learning_rate": 6.108843537414967e-06, "loss": 0.0, "step": 6992 }, { "epoch": 0.45066701037571694, "grad_norm": 9.10151264291464e-05, "learning_rate": 6.1081274615109205e-06, "loss": 0.0, "step": 6993 }, { "epoch": 0.4507314558226461, "grad_norm": 0.0011771712542816663, "learning_rate": 6.107411385606875e-06, "loss": 0.0, "step": 6994 }, { "epoch": 0.4507959012695753, "grad_norm": 0.004017593902929894, "learning_rate": 6.106695309702829e-06, "loss": 0.0, "step": 6995 }, { "epoch": 0.4508603467165045, "grad_norm": 0.16323827150517303, "learning_rate": 6.1059792337987834e-06, "loss": 0.0008, "step": 6996 }, { "epoch": 0.4509247921634337, "grad_norm": 0.027595080223723742, "learning_rate": 6.105263157894738e-06, "loss": 0.0, "step": 6997 }, { "epoch": 0.4509892376103628, "grad_norm": 0.0021043324610431602, "learning_rate": 6.104547081990691e-06, "loss": 0.0, "step": 6998 }, { "epoch": 0.451053683057292, "grad_norm": 0.03236437900145626, "learning_rate": 6.1038310060866455e-06, "loss": 0.0001, "step": 6999 }, { "epoch": 0.45111812850422117, "grad_norm": 0.4955685155462287, "learning_rate": 6.1031149301826e-06, "loss": 0.0036, "step": 7000 }, { "epoch": 0.45118257395115036, "grad_norm": 0.0005264324608317756, "learning_rate": 6.102398854278554e-06, "loss": 0.0, "step": 7001 }, { "epoch": 0.45124701939807954, "grad_norm": 0.03288913141526833, "learning_rate": 6.101682778374508e-06, "loss": 0.0001, "step": 7002 }, { "epoch": 0.4513114648450087, "grad_norm": 0.001246752781181037, "learning_rate": 6.100966702470462e-06, "loss": 0.0015, "step": 7003 }, { "epoch": 0.45137591029193785, "grad_norm": 0.0005902236479268267, "learning_rate": 6.100250626566416e-06, "loss": 0.0, "step": 7004 }, { "epoch": 0.45144035573886704, "grad_norm": 0.2308556251414973, "learning_rate": 6.09953455066237e-06, "loss": 0.0011, "step": 7005 }, { "epoch": 0.4515048011857962, "grad_norm": 0.0017562515855380186, "learning_rate": 6.098818474758325e-06, "loss": 0.0, "step": 7006 }, { "epoch": 0.4515692466327254, "grad_norm": 0.01532581818107362, "learning_rate": 6.098102398854279e-06, "loss": 0.0, "step": 7007 }, { "epoch": 0.4516336920796546, "grad_norm": 0.08927833375484105, "learning_rate": 6.0973863229502325e-06, "loss": 0.0003, "step": 7008 }, { "epoch": 0.4516981375265837, "grad_norm": 0.0284605009468016, "learning_rate": 6.096670247046187e-06, "loss": 0.0, "step": 7009 }, { "epoch": 0.4517625829735129, "grad_norm": 0.0016943289754871211, "learning_rate": 6.095954171142141e-06, "loss": 0.0, "step": 7010 }, { "epoch": 0.4518270284204421, "grad_norm": 0.02218787044607103, "learning_rate": 6.095238095238096e-06, "loss": 0.0, "step": 7011 }, { "epoch": 0.4518914738673713, "grad_norm": 0.014370586866855587, "learning_rate": 6.0945220193340505e-06, "loss": 0.0001, "step": 7012 }, { "epoch": 0.45195591931430046, "grad_norm": 0.0029353212413927815, "learning_rate": 6.093805943430005e-06, "loss": 0.0, "step": 7013 }, { "epoch": 0.45202036476122964, "grad_norm": 0.009811081740184684, "learning_rate": 6.093089867525958e-06, "loss": 0.0, "step": 7014 }, { "epoch": 0.45208481020815877, "grad_norm": 0.05393490263073934, "learning_rate": 6.0923737916219126e-06, "loss": 0.0002, "step": 7015 }, { "epoch": 0.45214925565508796, "grad_norm": 2.5784644625886863, "learning_rate": 6.091657715717867e-06, "loss": 0.0189, "step": 7016 }, { "epoch": 0.45221370110201714, "grad_norm": 0.0029356539960954304, "learning_rate": 6.090941639813821e-06, "loss": 0.0, "step": 7017 }, { "epoch": 0.4522781465489463, "grad_norm": 0.00022978794218516798, "learning_rate": 6.0902255639097755e-06, "loss": 0.0, "step": 7018 }, { "epoch": 0.4523425919958755, "grad_norm": 0.029644232123119035, "learning_rate": 6.089509488005729e-06, "loss": 0.0002, "step": 7019 }, { "epoch": 0.45240703744280464, "grad_norm": 0.0034083100602888576, "learning_rate": 6.088793412101683e-06, "loss": 0.0, "step": 7020 }, { "epoch": 0.4524714828897338, "grad_norm": 0.004447110035412839, "learning_rate": 6.0880773361976375e-06, "loss": 0.0, "step": 7021 }, { "epoch": 0.452535928336663, "grad_norm": 0.13061681728651678, "learning_rate": 6.087361260293592e-06, "loss": 0.0003, "step": 7022 }, { "epoch": 0.4526003737835922, "grad_norm": 0.03177697500649157, "learning_rate": 6.086645184389546e-06, "loss": 0.0, "step": 7023 }, { "epoch": 0.4526648192305214, "grad_norm": 0.06309334915132048, "learning_rate": 6.0859291084854996e-06, "loss": 0.0003, "step": 7024 }, { "epoch": 0.45272926467745056, "grad_norm": 0.04666786572863354, "learning_rate": 6.085213032581454e-06, "loss": 0.0001, "step": 7025 }, { "epoch": 0.4527937101243797, "grad_norm": 0.030821576426459607, "learning_rate": 6.084496956677408e-06, "loss": 0.0001, "step": 7026 }, { "epoch": 0.4528581555713089, "grad_norm": 0.010487922371730633, "learning_rate": 6.0837808807733625e-06, "loss": 0.0, "step": 7027 }, { "epoch": 0.45292260101823806, "grad_norm": 0.2080304755817776, "learning_rate": 6.083064804869317e-06, "loss": 0.0021, "step": 7028 }, { "epoch": 0.45298704646516724, "grad_norm": 0.009692660458068866, "learning_rate": 6.08234872896527e-06, "loss": 0.0016, "step": 7029 }, { "epoch": 0.4530514919120964, "grad_norm": 0.004954783683027185, "learning_rate": 6.0816326530612245e-06, "loss": 0.0, "step": 7030 }, { "epoch": 0.4531159373590256, "grad_norm": 0.003751176911784929, "learning_rate": 6.080916577157179e-06, "loss": 0.0, "step": 7031 }, { "epoch": 0.45318038280595474, "grad_norm": 0.04467092785293174, "learning_rate": 6.080200501253133e-06, "loss": 0.0004, "step": 7032 }, { "epoch": 0.4532448282528839, "grad_norm": 0.20824716792808376, "learning_rate": 6.079484425349087e-06, "loss": 0.0005, "step": 7033 }, { "epoch": 0.4533092736998131, "grad_norm": 7.700961625131927e-05, "learning_rate": 6.078768349445041e-06, "loss": 0.0, "step": 7034 }, { "epoch": 0.4533737191467423, "grad_norm": 0.03432253720668772, "learning_rate": 6.078052273540996e-06, "loss": 0.0, "step": 7035 }, { "epoch": 0.4534381645936715, "grad_norm": 0.00040768886861480716, "learning_rate": 6.07733619763695e-06, "loss": 0.0, "step": 7036 }, { "epoch": 0.4535026100406006, "grad_norm": 0.009148011904105437, "learning_rate": 6.076620121732905e-06, "loss": 0.0, "step": 7037 }, { "epoch": 0.4535670554875298, "grad_norm": 0.42105234257474816, "learning_rate": 6.075904045828859e-06, "loss": 0.0032, "step": 7038 }, { "epoch": 0.453631500934459, "grad_norm": 0.003082251194585121, "learning_rate": 6.075187969924813e-06, "loss": 0.0, "step": 7039 }, { "epoch": 0.45369594638138816, "grad_norm": 0.0007968014546186246, "learning_rate": 6.074471894020767e-06, "loss": 0.0, "step": 7040 }, { "epoch": 0.45376039182831734, "grad_norm": 0.0013500992183111478, "learning_rate": 6.073755818116721e-06, "loss": 0.0, "step": 7041 }, { "epoch": 0.45382483727524653, "grad_norm": 0.020758522620687073, "learning_rate": 6.073039742212675e-06, "loss": 0.0001, "step": 7042 }, { "epoch": 0.45388928272217566, "grad_norm": 0.003780350314592039, "learning_rate": 6.0723236663086295e-06, "loss": 0.0, "step": 7043 }, { "epoch": 0.45395372816910484, "grad_norm": 0.0025714049356480063, "learning_rate": 6.071607590404584e-06, "loss": 0.0, "step": 7044 }, { "epoch": 0.454018173616034, "grad_norm": 0.000688096630702408, "learning_rate": 6.070891514500537e-06, "loss": 0.0, "step": 7045 }, { "epoch": 0.4540826190629632, "grad_norm": 0.0006434500711874743, "learning_rate": 6.070175438596492e-06, "loss": 0.0, "step": 7046 }, { "epoch": 0.4541470645098924, "grad_norm": 0.35732653654378393, "learning_rate": 6.069459362692446e-06, "loss": 0.0023, "step": 7047 }, { "epoch": 0.4542115099568215, "grad_norm": 0.01280457731163026, "learning_rate": 6.0687432867884e-06, "loss": 0.0, "step": 7048 }, { "epoch": 0.4542759554037507, "grad_norm": 0.1197270360691481, "learning_rate": 6.0680272108843545e-06, "loss": 0.0004, "step": 7049 }, { "epoch": 0.4543404008506799, "grad_norm": 0.0010014052039343515, "learning_rate": 6.067311134980308e-06, "loss": 0.0, "step": 7050 }, { "epoch": 0.4544048462976091, "grad_norm": 0.03635723904725859, "learning_rate": 6.066595059076262e-06, "loss": 0.0002, "step": 7051 }, { "epoch": 0.45446929174453826, "grad_norm": 0.00575358947033153, "learning_rate": 6.0658789831722165e-06, "loss": 0.0, "step": 7052 }, { "epoch": 0.45453373719146745, "grad_norm": 0.013448601461061216, "learning_rate": 6.065162907268171e-06, "loss": 0.0001, "step": 7053 }, { "epoch": 0.4545981826383966, "grad_norm": 0.00041888456590139113, "learning_rate": 6.064446831364125e-06, "loss": 0.0, "step": 7054 }, { "epoch": 0.45466262808532576, "grad_norm": 0.002986199622612587, "learning_rate": 6.063730755460079e-06, "loss": 0.0, "step": 7055 }, { "epoch": 0.45472707353225494, "grad_norm": 0.0008021949438291668, "learning_rate": 6.063014679556033e-06, "loss": 0.0, "step": 7056 }, { "epoch": 0.45479151897918413, "grad_norm": 0.3501131487920616, "learning_rate": 6.062298603651987e-06, "loss": 0.0012, "step": 7057 }, { "epoch": 0.4548559644261133, "grad_norm": 0.001603722886898084, "learning_rate": 6.061582527747942e-06, "loss": 0.0, "step": 7058 }, { "epoch": 0.4549204098730425, "grad_norm": 0.1883643315255252, "learning_rate": 6.060866451843897e-06, "loss": 0.0003, "step": 7059 }, { "epoch": 0.4549848553199716, "grad_norm": 0.0004600154112903025, "learning_rate": 6.060150375939851e-06, "loss": 0.0, "step": 7060 }, { "epoch": 0.4550493007669008, "grad_norm": 0.008173502769027983, "learning_rate": 6.059434300035804e-06, "loss": 0.0, "step": 7061 }, { "epoch": 0.45511374621383, "grad_norm": 0.02577972412109375, "learning_rate": 6.058718224131759e-06, "loss": 0.0001, "step": 7062 }, { "epoch": 0.4551781916607592, "grad_norm": 0.010985050193318776, "learning_rate": 6.058002148227713e-06, "loss": 0.0, "step": 7063 }, { "epoch": 0.45524263710768836, "grad_norm": 0.00025358798746525133, "learning_rate": 6.057286072323667e-06, "loss": 0.0, "step": 7064 }, { "epoch": 0.4553070825546175, "grad_norm": 0.001464238886327982, "learning_rate": 6.0565699964196216e-06, "loss": 0.0, "step": 7065 }, { "epoch": 0.4553715280015467, "grad_norm": 0.018947541797622022, "learning_rate": 6.055853920515575e-06, "loss": 0.0001, "step": 7066 }, { "epoch": 0.45543597344847586, "grad_norm": 0.0005999839663420513, "learning_rate": 6.055137844611529e-06, "loss": 0.0, "step": 7067 }, { "epoch": 0.45550041889540505, "grad_norm": 0.1648804562727237, "learning_rate": 6.054421768707484e-06, "loss": 0.0012, "step": 7068 }, { "epoch": 0.45556486434233423, "grad_norm": 0.5202259495779435, "learning_rate": 6.053705692803438e-06, "loss": 0.0021, "step": 7069 }, { "epoch": 0.4556293097892634, "grad_norm": 0.003008793348343684, "learning_rate": 6.052989616899392e-06, "loss": 0.0, "step": 7070 }, { "epoch": 0.45569375523619254, "grad_norm": 0.07646857151479122, "learning_rate": 6.052273540995346e-06, "loss": 0.0002, "step": 7071 }, { "epoch": 0.45575820068312173, "grad_norm": 0.2913056023367716, "learning_rate": 6.0515574650913e-06, "loss": 0.002, "step": 7072 }, { "epoch": 0.4558226461300509, "grad_norm": 0.09931329124014251, "learning_rate": 6.050841389187254e-06, "loss": 0.0005, "step": 7073 }, { "epoch": 0.4558870915769801, "grad_norm": 0.2881209788590056, "learning_rate": 6.0501253132832086e-06, "loss": 0.0089, "step": 7074 }, { "epoch": 0.4559515370239093, "grad_norm": 0.00044719869471577915, "learning_rate": 6.049409237379162e-06, "loss": 0.0, "step": 7075 }, { "epoch": 0.4560159824708384, "grad_norm": 0.011173389599324287, "learning_rate": 6.048693161475116e-06, "loss": 0.0, "step": 7076 }, { "epoch": 0.4560804279177676, "grad_norm": 0.0010514280422994013, "learning_rate": 6.047977085571071e-06, "loss": 0.0, "step": 7077 }, { "epoch": 0.4561448733646968, "grad_norm": 0.0006105020922317698, "learning_rate": 6.047261009667025e-06, "loss": 0.0, "step": 7078 }, { "epoch": 0.45620931881162596, "grad_norm": 0.20810970980521848, "learning_rate": 6.046544933762979e-06, "loss": 0.0003, "step": 7079 }, { "epoch": 0.45627376425855515, "grad_norm": 0.00014491230861546294, "learning_rate": 6.045828857858933e-06, "loss": 0.0, "step": 7080 }, { "epoch": 0.45633820970548433, "grad_norm": 0.0008180527801640292, "learning_rate": 6.045112781954889e-06, "loss": 0.0, "step": 7081 }, { "epoch": 0.45640265515241346, "grad_norm": 0.015151065614577604, "learning_rate": 6.044396706050842e-06, "loss": 0.0, "step": 7082 }, { "epoch": 0.45646710059934265, "grad_norm": 0.00016913031952718266, "learning_rate": 6.043680630146796e-06, "loss": 0.0, "step": 7083 }, { "epoch": 0.45653154604627183, "grad_norm": 0.0003290981387745927, "learning_rate": 6.042964554242751e-06, "loss": 0.0, "step": 7084 }, { "epoch": 0.456595991493201, "grad_norm": 0.00033238180508284173, "learning_rate": 6.042248478338705e-06, "loss": 0.0, "step": 7085 }, { "epoch": 0.4566604369401302, "grad_norm": 0.0005725779904502857, "learning_rate": 6.041532402434659e-06, "loss": 0.0, "step": 7086 }, { "epoch": 0.4567248823870593, "grad_norm": 0.0006684580100672684, "learning_rate": 6.040816326530613e-06, "loss": 0.0, "step": 7087 }, { "epoch": 0.4567893278339885, "grad_norm": 0.0015680347505695766, "learning_rate": 6.040100250626567e-06, "loss": 0.0, "step": 7088 }, { "epoch": 0.4568537732809177, "grad_norm": 0.00016943457185629763, "learning_rate": 6.039384174722521e-06, "loss": 0.0, "step": 7089 }, { "epoch": 0.4569182187278469, "grad_norm": 0.005080476489226899, "learning_rate": 6.038668098818476e-06, "loss": 0.0, "step": 7090 }, { "epoch": 0.45698266417477607, "grad_norm": 0.011049966479080763, "learning_rate": 6.037952022914429e-06, "loss": 0.0002, "step": 7091 }, { "epoch": 0.45704710962170525, "grad_norm": 0.0004842771125507965, "learning_rate": 6.037235947010383e-06, "loss": 0.0, "step": 7092 }, { "epoch": 0.4571115550686344, "grad_norm": 3.9384781038392525e-05, "learning_rate": 6.036519871106338e-06, "loss": 0.0, "step": 7093 }, { "epoch": 0.45717600051556356, "grad_norm": 0.005493382939640472, "learning_rate": 6.035803795202292e-06, "loss": 0.0, "step": 7094 }, { "epoch": 0.45724044596249275, "grad_norm": 0.016864852974129232, "learning_rate": 6.035087719298246e-06, "loss": 0.0001, "step": 7095 }, { "epoch": 0.45730489140942193, "grad_norm": 0.23987302508294103, "learning_rate": 6.0343716433942e-06, "loss": 0.0055, "step": 7096 }, { "epoch": 0.4573693368563511, "grad_norm": 0.000661137041265991, "learning_rate": 6.033655567490154e-06, "loss": 0.0, "step": 7097 }, { "epoch": 0.4574337823032803, "grad_norm": 0.1169914314041797, "learning_rate": 6.032939491586108e-06, "loss": 0.0018, "step": 7098 }, { "epoch": 0.45749822775020943, "grad_norm": 0.09465142347291379, "learning_rate": 6.032223415682063e-06, "loss": 0.0002, "step": 7099 }, { "epoch": 0.4575626731971386, "grad_norm": 0.0004188542693633139, "learning_rate": 6.031507339778017e-06, "loss": 0.0, "step": 7100 }, { "epoch": 0.4576271186440678, "grad_norm": 0.0006338906192689149, "learning_rate": 6.03079126387397e-06, "loss": 0.0, "step": 7101 }, { "epoch": 0.457691564090997, "grad_norm": 0.24523413098807104, "learning_rate": 6.030075187969925e-06, "loss": 0.0021, "step": 7102 }, { "epoch": 0.45775600953792617, "grad_norm": 0.005114259183133491, "learning_rate": 6.029359112065879e-06, "loss": 0.0, "step": 7103 }, { "epoch": 0.4578204549848553, "grad_norm": 0.00010116153271195064, "learning_rate": 6.028643036161833e-06, "loss": 0.0, "step": 7104 }, { "epoch": 0.4578849004317845, "grad_norm": 0.0012903867681714094, "learning_rate": 6.0279269602577884e-06, "loss": 0.0, "step": 7105 }, { "epoch": 0.45794934587871367, "grad_norm": 0.0012879498751937105, "learning_rate": 6.027210884353743e-06, "loss": 0.0, "step": 7106 }, { "epoch": 0.45801379132564285, "grad_norm": 0.02153369947324202, "learning_rate": 6.026494808449696e-06, "loss": 0.0001, "step": 7107 }, { "epoch": 0.45807823677257203, "grad_norm": 0.0064330967485958694, "learning_rate": 6.0257787325456505e-06, "loss": 0.0, "step": 7108 }, { "epoch": 0.4581426822195012, "grad_norm": 0.00024972203715889577, "learning_rate": 6.025062656641605e-06, "loss": 0.0, "step": 7109 }, { "epoch": 0.45820712766643035, "grad_norm": 0.0016818562335592555, "learning_rate": 6.024346580737559e-06, "loss": 0.0, "step": 7110 }, { "epoch": 0.45827157311335953, "grad_norm": 0.0129347845643351, "learning_rate": 6.023630504833513e-06, "loss": 0.0, "step": 7111 }, { "epoch": 0.4583360185602887, "grad_norm": 5.749007999088142e-05, "learning_rate": 6.022914428929467e-06, "loss": 0.0, "step": 7112 }, { "epoch": 0.4584004640072179, "grad_norm": 0.0006387188346334157, "learning_rate": 6.022198353025421e-06, "loss": 0.0, "step": 7113 }, { "epoch": 0.4584649094541471, "grad_norm": 0.00012755880001202847, "learning_rate": 6.0214822771213754e-06, "loss": 0.0, "step": 7114 }, { "epoch": 0.4585293549010762, "grad_norm": 0.10691374328866679, "learning_rate": 6.02076620121733e-06, "loss": 0.0002, "step": 7115 }, { "epoch": 0.4585938003480054, "grad_norm": 0.9480175376851292, "learning_rate": 6.020050125313284e-06, "loss": 0.0168, "step": 7116 }, { "epoch": 0.4586582457949346, "grad_norm": 0.0006098464369457968, "learning_rate": 6.0193340494092375e-06, "loss": 0.0, "step": 7117 }, { "epoch": 0.45872269124186377, "grad_norm": 0.0047673415025089256, "learning_rate": 6.018617973505192e-06, "loss": 0.0, "step": 7118 }, { "epoch": 0.45878713668879295, "grad_norm": 0.01061895586109674, "learning_rate": 6.017901897601146e-06, "loss": 0.0001, "step": 7119 }, { "epoch": 0.45885158213572214, "grad_norm": 0.00033589021288196004, "learning_rate": 6.0171858216971e-06, "loss": 0.0, "step": 7120 }, { "epoch": 0.45891602758265126, "grad_norm": 0.0008578054963712787, "learning_rate": 6.016469745793055e-06, "loss": 0.0, "step": 7121 }, { "epoch": 0.45898047302958045, "grad_norm": 0.07172960847940046, "learning_rate": 6.015753669889008e-06, "loss": 0.0001, "step": 7122 }, { "epoch": 0.45904491847650963, "grad_norm": 0.0005887507011680352, "learning_rate": 6.015037593984962e-06, "loss": 0.0, "step": 7123 }, { "epoch": 0.4591093639234388, "grad_norm": 0.002714727540164013, "learning_rate": 6.014321518080917e-06, "loss": 0.0, "step": 7124 }, { "epoch": 0.459173809370368, "grad_norm": 0.004810577714593613, "learning_rate": 6.013605442176871e-06, "loss": 0.0, "step": 7125 }, { "epoch": 0.45923825481729713, "grad_norm": 0.09211610013209857, "learning_rate": 6.012889366272825e-06, "loss": 0.0001, "step": 7126 }, { "epoch": 0.4593027002642263, "grad_norm": 0.04762401536160834, "learning_rate": 6.012173290368779e-06, "loss": 0.0001, "step": 7127 }, { "epoch": 0.4593671457111555, "grad_norm": 0.0002858773479675718, "learning_rate": 6.011457214464734e-06, "loss": 0.0, "step": 7128 }, { "epoch": 0.4594315911580847, "grad_norm": 0.0007525722764745775, "learning_rate": 6.010741138560688e-06, "loss": 0.0, "step": 7129 }, { "epoch": 0.45949603660501387, "grad_norm": 1.6808105972308205, "learning_rate": 6.0100250626566425e-06, "loss": 0.018, "step": 7130 }, { "epoch": 0.45956048205194305, "grad_norm": 0.001529241299410118, "learning_rate": 6.009308986752597e-06, "loss": 0.0, "step": 7131 }, { "epoch": 0.4596249274988722, "grad_norm": 0.0038925792922026225, "learning_rate": 6.008592910848551e-06, "loss": 0.0, "step": 7132 }, { "epoch": 0.45968937294580137, "grad_norm": 0.010454839195876581, "learning_rate": 6.0078768349445046e-06, "loss": 0.0001, "step": 7133 }, { "epoch": 0.45975381839273055, "grad_norm": 0.0007752854575114376, "learning_rate": 6.007160759040459e-06, "loss": 0.0, "step": 7134 }, { "epoch": 0.45981826383965974, "grad_norm": 0.007434410935910509, "learning_rate": 6.006444683136413e-06, "loss": 0.0001, "step": 7135 }, { "epoch": 0.4598827092865889, "grad_norm": 0.6166326950570551, "learning_rate": 6.0057286072323675e-06, "loss": 0.0026, "step": 7136 }, { "epoch": 0.4599471547335181, "grad_norm": 0.0003005682636650433, "learning_rate": 6.005012531328322e-06, "loss": 0.0, "step": 7137 }, { "epoch": 0.46001160018044723, "grad_norm": 0.00016686412122810733, "learning_rate": 6.004296455424275e-06, "loss": 0.0, "step": 7138 }, { "epoch": 0.4600760456273764, "grad_norm": 0.0008105308466117006, "learning_rate": 6.0035803795202295e-06, "loss": 0.0, "step": 7139 }, { "epoch": 0.4601404910743056, "grad_norm": 0.002631408133243594, "learning_rate": 6.002864303616184e-06, "loss": 0.0, "step": 7140 }, { "epoch": 0.4602049365212348, "grad_norm": 0.00022067880593967695, "learning_rate": 6.002148227712138e-06, "loss": 0.0, "step": 7141 }, { "epoch": 0.46026938196816397, "grad_norm": 0.001211258225825929, "learning_rate": 6.001432151808092e-06, "loss": 0.0, "step": 7142 }, { "epoch": 0.4603338274150931, "grad_norm": 9.802932496178768e-05, "learning_rate": 6.000716075904046e-06, "loss": 0.0, "step": 7143 }, { "epoch": 0.4603982728620223, "grad_norm": 0.26474249319163073, "learning_rate": 6e-06, "loss": 0.0026, "step": 7144 }, { "epoch": 0.46046271830895147, "grad_norm": 1.265212050520982, "learning_rate": 5.9992839240959545e-06, "loss": 0.0032, "step": 7145 }, { "epoch": 0.46052716375588065, "grad_norm": 0.004371471218572817, "learning_rate": 5.998567848191909e-06, "loss": 0.0, "step": 7146 }, { "epoch": 0.46059160920280984, "grad_norm": 0.00043372477915415046, "learning_rate": 5.997851772287863e-06, "loss": 0.0, "step": 7147 }, { "epoch": 0.460656054649739, "grad_norm": 0.008678506284997306, "learning_rate": 5.9971356963838165e-06, "loss": 0.0, "step": 7148 }, { "epoch": 0.46072050009666815, "grad_norm": 0.0003419930026615307, "learning_rate": 5.996419620479771e-06, "loss": 0.0, "step": 7149 }, { "epoch": 0.46078494554359734, "grad_norm": 0.002855349159979639, "learning_rate": 5.995703544575725e-06, "loss": 0.0, "step": 7150 }, { "epoch": 0.4608493909905265, "grad_norm": 0.003908428454548936, "learning_rate": 5.994987468671679e-06, "loss": 0.0, "step": 7151 }, { "epoch": 0.4609138364374557, "grad_norm": 0.0010189799207077413, "learning_rate": 5.9942713927676345e-06, "loss": 0.0, "step": 7152 }, { "epoch": 0.4609782818843849, "grad_norm": 0.02421507927741126, "learning_rate": 5.993555316863589e-06, "loss": 0.0001, "step": 7153 }, { "epoch": 0.461042727331314, "grad_norm": 8.896939169050152e-05, "learning_rate": 5.992839240959542e-06, "loss": 0.0, "step": 7154 }, { "epoch": 0.4611071727782432, "grad_norm": 0.029285788425192732, "learning_rate": 5.992123165055497e-06, "loss": 0.0001, "step": 7155 }, { "epoch": 0.4611716182251724, "grad_norm": 0.034353822516338714, "learning_rate": 5.991407089151451e-06, "loss": 0.0, "step": 7156 }, { "epoch": 0.46123606367210157, "grad_norm": 0.06933937937293207, "learning_rate": 5.990691013247405e-06, "loss": 0.0001, "step": 7157 }, { "epoch": 0.46130050911903075, "grad_norm": 0.002533742700116563, "learning_rate": 5.9899749373433595e-06, "loss": 0.0, "step": 7158 }, { "epoch": 0.46136495456595994, "grad_norm": 0.03438283799772264, "learning_rate": 5.989258861439313e-06, "loss": 0.0002, "step": 7159 }, { "epoch": 0.46142940001288907, "grad_norm": 0.009976091964890367, "learning_rate": 5.988542785535267e-06, "loss": 0.0, "step": 7160 }, { "epoch": 0.46149384545981825, "grad_norm": 0.013725964591191813, "learning_rate": 5.9878267096312215e-06, "loss": 0.0, "step": 7161 }, { "epoch": 0.46155829090674744, "grad_norm": 0.022082188905863047, "learning_rate": 5.987110633727176e-06, "loss": 0.0001, "step": 7162 }, { "epoch": 0.4616227363536766, "grad_norm": 0.01672156348965313, "learning_rate": 5.98639455782313e-06, "loss": 0.0, "step": 7163 }, { "epoch": 0.4616871818006058, "grad_norm": 0.007685841599548759, "learning_rate": 5.985678481919084e-06, "loss": 0.0, "step": 7164 }, { "epoch": 0.46175162724753493, "grad_norm": 0.010648891229751439, "learning_rate": 5.984962406015038e-06, "loss": 0.0, "step": 7165 }, { "epoch": 0.4618160726944641, "grad_norm": 0.08365923854229963, "learning_rate": 5.984246330110992e-06, "loss": 0.0017, "step": 7166 }, { "epoch": 0.4618805181413933, "grad_norm": 0.08341798140164661, "learning_rate": 5.9835302542069465e-06, "loss": 0.0017, "step": 7167 }, { "epoch": 0.4619449635883225, "grad_norm": 0.10906040554440197, "learning_rate": 5.9828141783029e-06, "loss": 0.0001, "step": 7168 }, { "epoch": 0.4620094090352517, "grad_norm": 0.0016372742490571707, "learning_rate": 5.982098102398854e-06, "loss": 0.0, "step": 7169 }, { "epoch": 0.46207385448218086, "grad_norm": 0.0005011299067149563, "learning_rate": 5.9813820264948085e-06, "loss": 0.0, "step": 7170 }, { "epoch": 0.46213829992911, "grad_norm": 0.0173133371179596, "learning_rate": 5.980665950590763e-06, "loss": 0.0, "step": 7171 }, { "epoch": 0.46220274537603917, "grad_norm": 0.0011703360208209618, "learning_rate": 5.979949874686717e-06, "loss": 0.0, "step": 7172 }, { "epoch": 0.46226719082296835, "grad_norm": 0.0040470612125930484, "learning_rate": 5.979233798782671e-06, "loss": 0.0, "step": 7173 }, { "epoch": 0.46233163626989754, "grad_norm": 0.0007731407965276777, "learning_rate": 5.978517722878625e-06, "loss": 0.0, "step": 7174 }, { "epoch": 0.4623960817168267, "grad_norm": 0.005962191522255315, "learning_rate": 5.97780164697458e-06, "loss": 0.0001, "step": 7175 }, { "epoch": 0.4624605271637559, "grad_norm": 0.04625911729908732, "learning_rate": 5.977085571070534e-06, "loss": 0.0001, "step": 7176 }, { "epoch": 0.46252497261068504, "grad_norm": 0.002839065374832336, "learning_rate": 5.976369495166489e-06, "loss": 0.0, "step": 7177 }, { "epoch": 0.4625894180576142, "grad_norm": 0.0056488077167803855, "learning_rate": 5.975653419262443e-06, "loss": 0.0001, "step": 7178 }, { "epoch": 0.4626538635045434, "grad_norm": 2.9853239298381764, "learning_rate": 5.974937343358397e-06, "loss": 0.0079, "step": 7179 }, { "epoch": 0.4627183089514726, "grad_norm": 0.15088591204430143, "learning_rate": 5.974221267454351e-06, "loss": 0.0007, "step": 7180 }, { "epoch": 0.4627827543984018, "grad_norm": 0.004880895834493149, "learning_rate": 5.973505191550305e-06, "loss": 0.0, "step": 7181 }, { "epoch": 0.4628471998453309, "grad_norm": 0.00042946334087172727, "learning_rate": 5.972789115646259e-06, "loss": 0.0, "step": 7182 }, { "epoch": 0.4629116452922601, "grad_norm": 0.035591940752936105, "learning_rate": 5.9720730397422136e-06, "loss": 0.0001, "step": 7183 }, { "epoch": 0.46297609073918927, "grad_norm": 0.0010547751112222126, "learning_rate": 5.971356963838167e-06, "loss": 0.0, "step": 7184 }, { "epoch": 0.46304053618611846, "grad_norm": 0.06196757870758435, "learning_rate": 5.970640887934121e-06, "loss": 0.0001, "step": 7185 }, { "epoch": 0.46310498163304764, "grad_norm": 0.0026980154325516037, "learning_rate": 5.969924812030076e-06, "loss": 0.0, "step": 7186 }, { "epoch": 0.4631694270799768, "grad_norm": 0.22517366429317423, "learning_rate": 5.96920873612603e-06, "loss": 0.0002, "step": 7187 }, { "epoch": 0.46323387252690595, "grad_norm": 0.004543474395993957, "learning_rate": 5.968492660221984e-06, "loss": 0.0, "step": 7188 }, { "epoch": 0.46329831797383514, "grad_norm": 0.008096930609374857, "learning_rate": 5.967776584317938e-06, "loss": 0.0, "step": 7189 }, { "epoch": 0.4633627634207643, "grad_norm": 8.040914560860149e-05, "learning_rate": 5.967060508413892e-06, "loss": 0.0, "step": 7190 }, { "epoch": 0.4634272088676935, "grad_norm": 0.0038613975120519836, "learning_rate": 5.966344432509846e-06, "loss": 0.0, "step": 7191 }, { "epoch": 0.4634916543146227, "grad_norm": 0.009150825453134418, "learning_rate": 5.9656283566058006e-06, "loss": 0.0, "step": 7192 }, { "epoch": 0.4635560997615518, "grad_norm": 0.006888794586705892, "learning_rate": 5.964912280701755e-06, "loss": 0.0, "step": 7193 }, { "epoch": 0.463620545208481, "grad_norm": 0.0015190788830026104, "learning_rate": 5.964196204797708e-06, "loss": 0.0, "step": 7194 }, { "epoch": 0.4636849906554102, "grad_norm": 0.0010511826214208134, "learning_rate": 5.963480128893663e-06, "loss": 0.0, "step": 7195 }, { "epoch": 0.4637494361023394, "grad_norm": 0.006848881538616532, "learning_rate": 5.962764052989617e-06, "loss": 0.0, "step": 7196 }, { "epoch": 0.46381388154926856, "grad_norm": 0.19715999381198668, "learning_rate": 5.962047977085571e-06, "loss": 0.0018, "step": 7197 }, { "epoch": 0.46387832699619774, "grad_norm": 9.671389949897078e-05, "learning_rate": 5.961331901181526e-06, "loss": 0.0, "step": 7198 }, { "epoch": 0.46394277244312687, "grad_norm": 0.010491111385630709, "learning_rate": 5.960615825277481e-06, "loss": 0.0, "step": 7199 }, { "epoch": 0.46400721789005606, "grad_norm": 0.000629562530891665, "learning_rate": 5.959899749373434e-06, "loss": 0.0, "step": 7200 }, { "epoch": 0.46407166333698524, "grad_norm": 0.002112236082025321, "learning_rate": 5.959183673469388e-06, "loss": 0.0, "step": 7201 }, { "epoch": 0.4641361087839144, "grad_norm": 0.2710224448099513, "learning_rate": 5.958467597565343e-06, "loss": 0.0002, "step": 7202 }, { "epoch": 0.4642005542308436, "grad_norm": 0.02375578089601274, "learning_rate": 5.957751521661297e-06, "loss": 0.0017, "step": 7203 }, { "epoch": 0.46426499967777274, "grad_norm": 0.0011988023769871627, "learning_rate": 5.957035445757251e-06, "loss": 0.0, "step": 7204 }, { "epoch": 0.4643294451247019, "grad_norm": 0.003208027864138399, "learning_rate": 5.956319369853205e-06, "loss": 0.0, "step": 7205 }, { "epoch": 0.4643938905716311, "grad_norm": 0.0009489984323486449, "learning_rate": 5.955603293949159e-06, "loss": 0.0, "step": 7206 }, { "epoch": 0.4644583360185603, "grad_norm": 0.00045291492100388596, "learning_rate": 5.954887218045113e-06, "loss": 0.0, "step": 7207 }, { "epoch": 0.4645227814654895, "grad_norm": 0.015716493476830035, "learning_rate": 5.954171142141068e-06, "loss": 0.0001, "step": 7208 }, { "epoch": 0.46458722691241866, "grad_norm": 0.010753894111213871, "learning_rate": 5.953455066237022e-06, "loss": 0.0, "step": 7209 }, { "epoch": 0.4646516723593478, "grad_norm": 0.0021181276175598375, "learning_rate": 5.952738990332975e-06, "loss": 0.0, "step": 7210 }, { "epoch": 0.464716117806277, "grad_norm": 0.0005319157505790057, "learning_rate": 5.95202291442893e-06, "loss": 0.0, "step": 7211 }, { "epoch": 0.46478056325320616, "grad_norm": 4.463178627000733e-05, "learning_rate": 5.951306838524884e-06, "loss": 0.0, "step": 7212 }, { "epoch": 0.46484500870013534, "grad_norm": 0.0013662977275578781, "learning_rate": 5.950590762620838e-06, "loss": 0.0, "step": 7213 }, { "epoch": 0.4649094541470645, "grad_norm": 0.010369585677639376, "learning_rate": 5.949874686716793e-06, "loss": 0.0, "step": 7214 }, { "epoch": 0.4649738995939937, "grad_norm": 0.03391908857152616, "learning_rate": 5.949158610812746e-06, "loss": 0.0004, "step": 7215 }, { "epoch": 0.46503834504092284, "grad_norm": 0.06855665758350678, "learning_rate": 5.9484425349087e-06, "loss": 0.0001, "step": 7216 }, { "epoch": 0.465102790487852, "grad_norm": 0.21561523982086514, "learning_rate": 5.947726459004655e-06, "loss": 0.001, "step": 7217 }, { "epoch": 0.4651672359347812, "grad_norm": 0.0034353563543390304, "learning_rate": 5.947010383100609e-06, "loss": 0.0, "step": 7218 }, { "epoch": 0.4652316813817104, "grad_norm": 0.00018852596750020293, "learning_rate": 5.946294307196563e-06, "loss": 0.0, "step": 7219 }, { "epoch": 0.4652961268286396, "grad_norm": 0.009362767432748718, "learning_rate": 5.945578231292517e-06, "loss": 0.0, "step": 7220 }, { "epoch": 0.4653605722755687, "grad_norm": 0.022573569619495887, "learning_rate": 5.944862155388471e-06, "loss": 0.0001, "step": 7221 }, { "epoch": 0.4654250177224979, "grad_norm": 0.006736289318203268, "learning_rate": 5.944146079484426e-06, "loss": 0.0, "step": 7222 }, { "epoch": 0.4654894631694271, "grad_norm": 0.5131869329603391, "learning_rate": 5.9434300035803804e-06, "loss": 0.0035, "step": 7223 }, { "epoch": 0.46555390861635626, "grad_norm": 0.15121273651669012, "learning_rate": 5.942713927676335e-06, "loss": 0.0005, "step": 7224 }, { "epoch": 0.46561835406328544, "grad_norm": 0.000143468918569542, "learning_rate": 5.941997851772289e-06, "loss": 0.0, "step": 7225 }, { "epoch": 0.46568279951021463, "grad_norm": 0.003423290574634767, "learning_rate": 5.9412817758682425e-06, "loss": 0.0, "step": 7226 }, { "epoch": 0.46574724495714376, "grad_norm": 0.05099336860487578, "learning_rate": 5.940565699964197e-06, "loss": 0.0004, "step": 7227 }, { "epoch": 0.46581169040407294, "grad_norm": 0.07674497419352205, "learning_rate": 5.939849624060151e-06, "loss": 0.0017, "step": 7228 }, { "epoch": 0.4658761358510021, "grad_norm": 0.16468423587705824, "learning_rate": 5.939133548156105e-06, "loss": 0.0013, "step": 7229 }, { "epoch": 0.4659405812979313, "grad_norm": 0.0024757373314606267, "learning_rate": 5.93841747225206e-06, "loss": 0.0, "step": 7230 }, { "epoch": 0.4660050267448605, "grad_norm": 0.0039586155872088965, "learning_rate": 5.937701396348013e-06, "loss": 0.0, "step": 7231 }, { "epoch": 0.4660694721917896, "grad_norm": 0.01858411356660055, "learning_rate": 5.9369853204439674e-06, "loss": 0.0, "step": 7232 }, { "epoch": 0.4661339176387188, "grad_norm": 0.21115758681710384, "learning_rate": 5.936269244539922e-06, "loss": 0.0026, "step": 7233 }, { "epoch": 0.466198363085648, "grad_norm": 0.10160640537815353, "learning_rate": 5.935553168635876e-06, "loss": 0.0002, "step": 7234 }, { "epoch": 0.4662628085325772, "grad_norm": 0.045725959999042595, "learning_rate": 5.93483709273183e-06, "loss": 0.0, "step": 7235 }, { "epoch": 0.46632725397950636, "grad_norm": 0.01414354404282994, "learning_rate": 5.934121016827784e-06, "loss": 0.0, "step": 7236 }, { "epoch": 0.46639169942643555, "grad_norm": 0.01251770166379137, "learning_rate": 5.933404940923738e-06, "loss": 0.0001, "step": 7237 }, { "epoch": 0.4664561448733647, "grad_norm": 0.0007334224021861777, "learning_rate": 5.932688865019692e-06, "loss": 0.0, "step": 7238 }, { "epoch": 0.46652059032029386, "grad_norm": 0.0022743016070544573, "learning_rate": 5.931972789115647e-06, "loss": 0.0, "step": 7239 }, { "epoch": 0.46658503576722304, "grad_norm": 0.00039270465816523245, "learning_rate": 5.931256713211601e-06, "loss": 0.0, "step": 7240 }, { "epoch": 0.46664948121415223, "grad_norm": 0.00010237405302461057, "learning_rate": 5.930540637307554e-06, "loss": 0.0, "step": 7241 }, { "epoch": 0.4667139266610814, "grad_norm": 0.004509319091979509, "learning_rate": 5.929824561403509e-06, "loss": 0.0, "step": 7242 }, { "epoch": 0.46677837210801054, "grad_norm": 0.12337622011710904, "learning_rate": 5.929108485499463e-06, "loss": 0.0001, "step": 7243 }, { "epoch": 0.4668428175549397, "grad_norm": 0.005279184753878209, "learning_rate": 5.928392409595417e-06, "loss": 0.0, "step": 7244 }, { "epoch": 0.4669072630018689, "grad_norm": 0.013528141218017584, "learning_rate": 5.9276763336913725e-06, "loss": 0.0, "step": 7245 }, { "epoch": 0.4669717084487981, "grad_norm": 7.838986385026141e-05, "learning_rate": 5.926960257787327e-06, "loss": 0.0, "step": 7246 }, { "epoch": 0.4670361538957273, "grad_norm": 0.09289410924673282, "learning_rate": 5.92624418188328e-06, "loss": 0.0, "step": 7247 }, { "epoch": 0.46710059934265646, "grad_norm": 0.005147704458191427, "learning_rate": 5.9255281059792345e-06, "loss": 0.0, "step": 7248 }, { "epoch": 0.4671650447895856, "grad_norm": 0.0005078345397182034, "learning_rate": 5.924812030075189e-06, "loss": 0.0, "step": 7249 }, { "epoch": 0.4672294902365148, "grad_norm": 0.01140905101926907, "learning_rate": 5.924095954171143e-06, "loss": 0.0001, "step": 7250 }, { "epoch": 0.46729393568344396, "grad_norm": 0.013532734656177478, "learning_rate": 5.923379878267097e-06, "loss": 0.0001, "step": 7251 }, { "epoch": 0.46735838113037315, "grad_norm": 0.00450951372079799, "learning_rate": 5.922663802363051e-06, "loss": 0.0, "step": 7252 }, { "epoch": 0.46742282657730233, "grad_norm": 0.00013456809584500636, "learning_rate": 5.921947726459005e-06, "loss": 0.0, "step": 7253 }, { "epoch": 0.4674872720242315, "grad_norm": 0.5415825747969696, "learning_rate": 5.9212316505549595e-06, "loss": 0.0032, "step": 7254 }, { "epoch": 0.46755171747116064, "grad_norm": 0.06273022360028993, "learning_rate": 5.920515574650914e-06, "loss": 0.0002, "step": 7255 }, { "epoch": 0.46761616291808983, "grad_norm": 0.001915030388477191, "learning_rate": 5.919799498746868e-06, "loss": 0.0, "step": 7256 }, { "epoch": 0.467680608365019, "grad_norm": 0.0018932465533284114, "learning_rate": 5.9190834228428215e-06, "loss": 0.0, "step": 7257 }, { "epoch": 0.4677450538119482, "grad_norm": 0.01038187083953511, "learning_rate": 5.918367346938776e-06, "loss": 0.0, "step": 7258 }, { "epoch": 0.4678094992588774, "grad_norm": 0.00021900495299448367, "learning_rate": 5.91765127103473e-06, "loss": 0.0, "step": 7259 }, { "epoch": 0.4678739447058065, "grad_norm": 0.0002388793545880022, "learning_rate": 5.916935195130684e-06, "loss": 0.0, "step": 7260 }, { "epoch": 0.4679383901527357, "grad_norm": 0.0033289252996606057, "learning_rate": 5.916219119226638e-06, "loss": 0.0, "step": 7261 }, { "epoch": 0.4680028355996649, "grad_norm": 0.00018771824976225135, "learning_rate": 5.915503043322592e-06, "loss": 0.0, "step": 7262 }, { "epoch": 0.46806728104659406, "grad_norm": 0.07404355653908364, "learning_rate": 5.9147869674185465e-06, "loss": 0.0008, "step": 7263 }, { "epoch": 0.46813172649352325, "grad_norm": 0.003018865199686008, "learning_rate": 5.914070891514501e-06, "loss": 0.0, "step": 7264 }, { "epoch": 0.46819617194045243, "grad_norm": 0.04145828291970412, "learning_rate": 5.913354815610455e-06, "loss": 0.0006, "step": 7265 }, { "epoch": 0.46826061738738156, "grad_norm": 0.002445120612271508, "learning_rate": 5.9126387397064085e-06, "loss": 0.0, "step": 7266 }, { "epoch": 0.46832506283431075, "grad_norm": 0.0041065608074551405, "learning_rate": 5.911922663802363e-06, "loss": 0.0, "step": 7267 }, { "epoch": 0.46838950828123993, "grad_norm": 0.03801747734846256, "learning_rate": 5.911206587898318e-06, "loss": 0.0001, "step": 7268 }, { "epoch": 0.4684539537281691, "grad_norm": 0.00017918018740986495, "learning_rate": 5.910490511994272e-06, "loss": 0.0, "step": 7269 }, { "epoch": 0.4685183991750983, "grad_norm": 0.008305111975488961, "learning_rate": 5.9097744360902265e-06, "loss": 0.0001, "step": 7270 }, { "epoch": 0.46858284462202743, "grad_norm": 3.966849481567199e-05, "learning_rate": 5.909058360186181e-06, "loss": 0.0, "step": 7271 }, { "epoch": 0.4686472900689566, "grad_norm": 0.0004967530768057412, "learning_rate": 5.908342284282135e-06, "loss": 0.0, "step": 7272 }, { "epoch": 0.4687117355158858, "grad_norm": 0.0757589549519109, "learning_rate": 5.907626208378089e-06, "loss": 0.0002, "step": 7273 }, { "epoch": 0.468776180962815, "grad_norm": 0.0013153308624478225, "learning_rate": 5.906910132474043e-06, "loss": 0.0, "step": 7274 }, { "epoch": 0.46884062640974417, "grad_norm": 0.004487714706241631, "learning_rate": 5.906194056569997e-06, "loss": 0.0, "step": 7275 }, { "epoch": 0.46890507185667335, "grad_norm": 0.0024251842236077178, "learning_rate": 5.9054779806659515e-06, "loss": 0.0, "step": 7276 }, { "epoch": 0.4689695173036025, "grad_norm": 0.0162244810554148, "learning_rate": 5.904761904761905e-06, "loss": 0.0001, "step": 7277 }, { "epoch": 0.46903396275053166, "grad_norm": 0.00018407225832327176, "learning_rate": 5.904045828857859e-06, "loss": 0.0, "step": 7278 }, { "epoch": 0.46909840819746085, "grad_norm": 0.0008793784795991423, "learning_rate": 5.9033297529538135e-06, "loss": 0.0, "step": 7279 }, { "epoch": 0.46916285364439003, "grad_norm": 0.045148619317254515, "learning_rate": 5.902613677049768e-06, "loss": 0.0002, "step": 7280 }, { "epoch": 0.4692272990913192, "grad_norm": 0.0007002317138202523, "learning_rate": 5.901897601145722e-06, "loss": 0.0, "step": 7281 }, { "epoch": 0.46929174453824835, "grad_norm": 0.0076014641681480645, "learning_rate": 5.901181525241676e-06, "loss": 0.0, "step": 7282 }, { "epoch": 0.46935618998517753, "grad_norm": 0.058544814019823664, "learning_rate": 5.90046544933763e-06, "loss": 0.0002, "step": 7283 }, { "epoch": 0.4694206354321067, "grad_norm": 0.2589595880628915, "learning_rate": 5.899749373433584e-06, "loss": 0.0019, "step": 7284 }, { "epoch": 0.4694850808790359, "grad_norm": 0.0009875575563172685, "learning_rate": 5.8990332975295385e-06, "loss": 0.0, "step": 7285 }, { "epoch": 0.4695495263259651, "grad_norm": 0.0001364893657174719, "learning_rate": 5.898317221625493e-06, "loss": 0.0, "step": 7286 }, { "epoch": 0.46961397177289427, "grad_norm": 0.08705876659342433, "learning_rate": 5.897601145721446e-06, "loss": 0.0007, "step": 7287 }, { "epoch": 0.4696784172198234, "grad_norm": 0.01562531292125164, "learning_rate": 5.8968850698174005e-06, "loss": 0.0001, "step": 7288 }, { "epoch": 0.4697428626667526, "grad_norm": 0.04099352701129466, "learning_rate": 5.896168993913355e-06, "loss": 0.0001, "step": 7289 }, { "epoch": 0.46980730811368177, "grad_norm": 0.0005863534760000729, "learning_rate": 5.895452918009309e-06, "loss": 0.0, "step": 7290 }, { "epoch": 0.46987175356061095, "grad_norm": 0.0015909966007760811, "learning_rate": 5.8947368421052634e-06, "loss": 0.0, "step": 7291 }, { "epoch": 0.46993619900754013, "grad_norm": 0.00274800952267154, "learning_rate": 5.8940207662012186e-06, "loss": 0.0, "step": 7292 }, { "epoch": 0.4700006444544693, "grad_norm": 0.000492930772659004, "learning_rate": 5.893304690297173e-06, "loss": 0.0, "step": 7293 }, { "epoch": 0.47006508990139845, "grad_norm": 0.0025907573420449314, "learning_rate": 5.892588614393126e-06, "loss": 0.0, "step": 7294 }, { "epoch": 0.47012953534832763, "grad_norm": 0.0007235664111460894, "learning_rate": 5.891872538489081e-06, "loss": 0.0, "step": 7295 }, { "epoch": 0.4701939807952568, "grad_norm": 0.003975885038461543, "learning_rate": 5.891156462585035e-06, "loss": 0.0, "step": 7296 }, { "epoch": 0.470258426242186, "grad_norm": 0.0025255628655265244, "learning_rate": 5.890440386680989e-06, "loss": 0.0, "step": 7297 }, { "epoch": 0.4703228716891152, "grad_norm": 0.005506941121865925, "learning_rate": 5.889724310776943e-06, "loss": 0.0001, "step": 7298 }, { "epoch": 0.4703873171360443, "grad_norm": 0.0001799831213966565, "learning_rate": 5.889008234872897e-06, "loss": 0.0, "step": 7299 }, { "epoch": 0.4704517625829735, "grad_norm": 0.004868009039231323, "learning_rate": 5.888292158968851e-06, "loss": 0.0015, "step": 7300 }, { "epoch": 0.4705162080299027, "grad_norm": 0.4330565587309389, "learning_rate": 5.8875760830648056e-06, "loss": 0.0004, "step": 7301 }, { "epoch": 0.47058065347683187, "grad_norm": 0.017751362441838763, "learning_rate": 5.88686000716076e-06, "loss": 0.0, "step": 7302 }, { "epoch": 0.47064509892376105, "grad_norm": 0.00020847490324210836, "learning_rate": 5.886143931256713e-06, "loss": 0.0, "step": 7303 }, { "epoch": 0.47070954437069024, "grad_norm": 0.007923028419122423, "learning_rate": 5.885427855352668e-06, "loss": 0.0, "step": 7304 }, { "epoch": 0.47077398981761936, "grad_norm": 0.005294704298977462, "learning_rate": 5.884711779448622e-06, "loss": 0.0, "step": 7305 }, { "epoch": 0.47083843526454855, "grad_norm": 0.0007913525318603742, "learning_rate": 5.883995703544576e-06, "loss": 0.0, "step": 7306 }, { "epoch": 0.47090288071147773, "grad_norm": 0.009260764334228884, "learning_rate": 5.8832796276405305e-06, "loss": 0.0, "step": 7307 }, { "epoch": 0.4709673261584069, "grad_norm": 0.14998565371771327, "learning_rate": 5.882563551736484e-06, "loss": 0.0003, "step": 7308 }, { "epoch": 0.4710317716053361, "grad_norm": 0.002568590896597837, "learning_rate": 5.881847475832438e-06, "loss": 0.0, "step": 7309 }, { "epoch": 0.47109621705226523, "grad_norm": 0.0001356063438065277, "learning_rate": 5.8811313999283926e-06, "loss": 0.0, "step": 7310 }, { "epoch": 0.4711606624991944, "grad_norm": 3.737375653479307e-05, "learning_rate": 5.880415324024347e-06, "loss": 0.0, "step": 7311 }, { "epoch": 0.4712251079461236, "grad_norm": 0.0018837255037034548, "learning_rate": 5.879699248120301e-06, "loss": 0.0, "step": 7312 }, { "epoch": 0.4712895533930528, "grad_norm": 9.369458189287065e-05, "learning_rate": 5.878983172216255e-06, "loss": 0.0, "step": 7313 }, { "epoch": 0.47135399883998197, "grad_norm": 0.004181875470268751, "learning_rate": 5.878267096312209e-06, "loss": 0.0, "step": 7314 }, { "epoch": 0.47141844428691115, "grad_norm": 0.003124371841280807, "learning_rate": 5.877551020408164e-06, "loss": 0.0001, "step": 7315 }, { "epoch": 0.4714828897338403, "grad_norm": 0.00016681583676826113, "learning_rate": 5.876834944504118e-06, "loss": 0.0, "step": 7316 }, { "epoch": 0.47154733518076947, "grad_norm": 0.013830684057848068, "learning_rate": 5.876118868600073e-06, "loss": 0.0, "step": 7317 }, { "epoch": 0.47161178062769865, "grad_norm": 0.013160734314818012, "learning_rate": 5.875402792696027e-06, "loss": 0.0001, "step": 7318 }, { "epoch": 0.47167622607462784, "grad_norm": 0.005155490320092713, "learning_rate": 5.87468671679198e-06, "loss": 0.0, "step": 7319 }, { "epoch": 0.471740671521557, "grad_norm": 0.0018278137970520977, "learning_rate": 5.873970640887935e-06, "loss": 0.0, "step": 7320 }, { "epoch": 0.47180511696848615, "grad_norm": 0.003185959559079041, "learning_rate": 5.873254564983889e-06, "loss": 0.0, "step": 7321 }, { "epoch": 0.47186956241541533, "grad_norm": 0.2694356997754317, "learning_rate": 5.872538489079843e-06, "loss": 0.0024, "step": 7322 }, { "epoch": 0.4719340078623445, "grad_norm": 0.01864031511807212, "learning_rate": 5.871822413175798e-06, "loss": 0.0, "step": 7323 }, { "epoch": 0.4719984533092737, "grad_norm": 0.015568650232301293, "learning_rate": 5.871106337271751e-06, "loss": 0.0001, "step": 7324 }, { "epoch": 0.4720628987562029, "grad_norm": 0.0058521904086244305, "learning_rate": 5.870390261367705e-06, "loss": 0.0, "step": 7325 }, { "epoch": 0.47212734420313207, "grad_norm": 0.09893792709546372, "learning_rate": 5.86967418546366e-06, "loss": 0.0001, "step": 7326 }, { "epoch": 0.4721917896500612, "grad_norm": 0.3626702410404047, "learning_rate": 5.868958109559614e-06, "loss": 0.0022, "step": 7327 }, { "epoch": 0.4722562350969904, "grad_norm": 0.24238985590879442, "learning_rate": 5.868242033655568e-06, "loss": 0.0003, "step": 7328 }, { "epoch": 0.47232068054391957, "grad_norm": 0.2921772246160459, "learning_rate": 5.867525957751522e-06, "loss": 0.0022, "step": 7329 }, { "epoch": 0.47238512599084875, "grad_norm": 0.10111720327515572, "learning_rate": 5.866809881847476e-06, "loss": 0.0017, "step": 7330 }, { "epoch": 0.47244957143777794, "grad_norm": 0.0007373990260450756, "learning_rate": 5.86609380594343e-06, "loss": 0.0, "step": 7331 }, { "epoch": 0.4725140168847071, "grad_norm": 0.014592344951044412, "learning_rate": 5.865377730039385e-06, "loss": 0.0002, "step": 7332 }, { "epoch": 0.47257846233163625, "grad_norm": 0.004748893437132448, "learning_rate": 5.864661654135339e-06, "loss": 0.0, "step": 7333 }, { "epoch": 0.47264290777856544, "grad_norm": 0.04088056414933486, "learning_rate": 5.863945578231292e-06, "loss": 0.0001, "step": 7334 }, { "epoch": 0.4727073532254946, "grad_norm": 0.0006974730055049175, "learning_rate": 5.863229502327247e-06, "loss": 0.0, "step": 7335 }, { "epoch": 0.4727717986724238, "grad_norm": 0.0060704542827779, "learning_rate": 5.862513426423201e-06, "loss": 0.0, "step": 7336 }, { "epoch": 0.472836244119353, "grad_norm": 6.82556858305384e-05, "learning_rate": 5.861797350519155e-06, "loss": 0.0, "step": 7337 }, { "epoch": 0.4729006895662821, "grad_norm": 0.01232797728501841, "learning_rate": 5.86108127461511e-06, "loss": 0.0001, "step": 7338 }, { "epoch": 0.4729651350132113, "grad_norm": 0.14040671069080024, "learning_rate": 5.860365198711065e-06, "loss": 0.0013, "step": 7339 }, { "epoch": 0.4730295804601405, "grad_norm": 0.0069158175830979236, "learning_rate": 5.859649122807018e-06, "loss": 0.0, "step": 7340 }, { "epoch": 0.47309402590706967, "grad_norm": 0.15098211468237663, "learning_rate": 5.8589330469029724e-06, "loss": 0.0014, "step": 7341 }, { "epoch": 0.47315847135399886, "grad_norm": 0.0005686017696016296, "learning_rate": 5.858216970998927e-06, "loss": 0.0, "step": 7342 }, { "epoch": 0.47322291680092804, "grad_norm": 0.06047954006858101, "learning_rate": 5.857500895094881e-06, "loss": 0.0001, "step": 7343 }, { "epoch": 0.47328736224785717, "grad_norm": 0.07052647298794551, "learning_rate": 5.856784819190835e-06, "loss": 0.0008, "step": 7344 }, { "epoch": 0.47335180769478635, "grad_norm": 8.880895053995195e-05, "learning_rate": 5.856068743286789e-06, "loss": 0.0, "step": 7345 }, { "epoch": 0.47341625314171554, "grad_norm": 0.0009487713006594181, "learning_rate": 5.855352667382743e-06, "loss": 0.0, "step": 7346 }, { "epoch": 0.4734806985886447, "grad_norm": 0.2606289649908769, "learning_rate": 5.854636591478697e-06, "loss": 0.0014, "step": 7347 }, { "epoch": 0.4735451440355739, "grad_norm": 0.19189682199600822, "learning_rate": 5.853920515574652e-06, "loss": 0.0015, "step": 7348 }, { "epoch": 0.47360958948250304, "grad_norm": 0.00025634485228848494, "learning_rate": 5.853204439670606e-06, "loss": 0.0, "step": 7349 }, { "epoch": 0.4736740349294322, "grad_norm": 0.0021115394978189358, "learning_rate": 5.8524883637665594e-06, "loss": 0.0, "step": 7350 }, { "epoch": 0.4737384803763614, "grad_norm": 0.008847833623355518, "learning_rate": 5.851772287862514e-06, "loss": 0.0001, "step": 7351 }, { "epoch": 0.4738029258232906, "grad_norm": 0.1189699239903375, "learning_rate": 5.851056211958468e-06, "loss": 0.0004, "step": 7352 }, { "epoch": 0.4738673712702198, "grad_norm": 0.005188761681522458, "learning_rate": 5.850340136054422e-06, "loss": 0.0, "step": 7353 }, { "epoch": 0.47393181671714896, "grad_norm": 0.009026635366319133, "learning_rate": 5.849624060150377e-06, "loss": 0.0, "step": 7354 }, { "epoch": 0.4739962621640781, "grad_norm": 0.00019702236665921518, "learning_rate": 5.84890798424633e-06, "loss": 0.0, "step": 7355 }, { "epoch": 0.47406070761100727, "grad_norm": 3.311441360306938, "learning_rate": 5.848191908342284e-06, "loss": 0.0171, "step": 7356 }, { "epoch": 0.47412515305793645, "grad_norm": 0.001216040447727667, "learning_rate": 5.847475832438239e-06, "loss": 0.0, "step": 7357 }, { "epoch": 0.47418959850486564, "grad_norm": 0.17368396932281072, "learning_rate": 5.846759756534193e-06, "loss": 0.0028, "step": 7358 }, { "epoch": 0.4742540439517948, "grad_norm": 0.156202195722558, "learning_rate": 5.846043680630146e-06, "loss": 0.0011, "step": 7359 }, { "epoch": 0.47431848939872395, "grad_norm": 0.04618093117781858, "learning_rate": 5.845327604726101e-06, "loss": 0.0002, "step": 7360 }, { "epoch": 0.47438293484565314, "grad_norm": 0.01748293277886574, "learning_rate": 5.844611528822055e-06, "loss": 0.0, "step": 7361 }, { "epoch": 0.4744473802925823, "grad_norm": 0.0077936942194119655, "learning_rate": 5.84389545291801e-06, "loss": 0.0, "step": 7362 }, { "epoch": 0.4745118257395115, "grad_norm": 0.0030220017533081076, "learning_rate": 5.8431793770139645e-06, "loss": 0.0, "step": 7363 }, { "epoch": 0.4745762711864407, "grad_norm": 0.020481092922284175, "learning_rate": 5.842463301109919e-06, "loss": 0.0, "step": 7364 }, { "epoch": 0.4746407166333699, "grad_norm": 0.047249753805527046, "learning_rate": 5.841747225205873e-06, "loss": 0.0004, "step": 7365 }, { "epoch": 0.474705162080299, "grad_norm": 0.21330669411996359, "learning_rate": 5.8410311493018265e-06, "loss": 0.0006, "step": 7366 }, { "epoch": 0.4747696075272282, "grad_norm": 2.6491441151497485e-05, "learning_rate": 5.840315073397781e-06, "loss": 0.0, "step": 7367 }, { "epoch": 0.4748340529741574, "grad_norm": 0.0006470988794403943, "learning_rate": 5.839598997493735e-06, "loss": 0.0, "step": 7368 }, { "epoch": 0.47489849842108656, "grad_norm": 6.626651095656053e-05, "learning_rate": 5.838882921589689e-06, "loss": 0.0, "step": 7369 }, { "epoch": 0.47496294386801574, "grad_norm": 0.0010031601606788707, "learning_rate": 5.838166845685644e-06, "loss": 0.0, "step": 7370 }, { "epoch": 0.4750273893149449, "grad_norm": 0.0006513101402277502, "learning_rate": 5.837450769781597e-06, "loss": 0.0, "step": 7371 }, { "epoch": 0.47509183476187405, "grad_norm": 0.007908438327495618, "learning_rate": 5.8367346938775515e-06, "loss": 0.0, "step": 7372 }, { "epoch": 0.47515628020880324, "grad_norm": 0.000893835609315527, "learning_rate": 5.836018617973506e-06, "loss": 0.0, "step": 7373 }, { "epoch": 0.4752207256557324, "grad_norm": 0.0033915587171132375, "learning_rate": 5.83530254206946e-06, "loss": 0.0, "step": 7374 }, { "epoch": 0.4752851711026616, "grad_norm": 0.01097830771040835, "learning_rate": 5.8345864661654135e-06, "loss": 0.0, "step": 7375 }, { "epoch": 0.4753496165495908, "grad_norm": 0.0011861722306901786, "learning_rate": 5.833870390261368e-06, "loss": 0.0, "step": 7376 }, { "epoch": 0.4754140619965199, "grad_norm": 0.039472695668676026, "learning_rate": 5.833154314357322e-06, "loss": 0.0001, "step": 7377 }, { "epoch": 0.4754785074434491, "grad_norm": 0.00013474575537218255, "learning_rate": 5.832438238453276e-06, "loss": 0.0, "step": 7378 }, { "epoch": 0.4755429528903783, "grad_norm": 0.004523991526979276, "learning_rate": 5.831722162549231e-06, "loss": 0.0, "step": 7379 }, { "epoch": 0.4756073983373075, "grad_norm": 0.006835058807200215, "learning_rate": 5.831006086645184e-06, "loss": 0.0, "step": 7380 }, { "epoch": 0.47567184378423666, "grad_norm": 0.05759036522811417, "learning_rate": 5.8302900107411385e-06, "loss": 0.0001, "step": 7381 }, { "epoch": 0.47573628923116584, "grad_norm": 0.4775274076385219, "learning_rate": 5.829573934837093e-06, "loss": 0.0034, "step": 7382 }, { "epoch": 0.47580073467809497, "grad_norm": 0.0005595710209434688, "learning_rate": 5.828857858933047e-06, "loss": 0.0, "step": 7383 }, { "epoch": 0.47586518012502416, "grad_norm": 0.002950148607580095, "learning_rate": 5.828141783029001e-06, "loss": 0.0, "step": 7384 }, { "epoch": 0.47592962557195334, "grad_norm": 0.10752587919006612, "learning_rate": 5.8274257071249565e-06, "loss": 0.0007, "step": 7385 }, { "epoch": 0.4759940710188825, "grad_norm": 0.005091958235020094, "learning_rate": 5.826709631220911e-06, "loss": 0.0001, "step": 7386 }, { "epoch": 0.4760585164658117, "grad_norm": 0.003841617842285557, "learning_rate": 5.825993555316864e-06, "loss": 0.0, "step": 7387 }, { "epoch": 0.47612296191274084, "grad_norm": 0.0006383723237262383, "learning_rate": 5.8252774794128185e-06, "loss": 0.0, "step": 7388 }, { "epoch": 0.47618740735967, "grad_norm": 0.004039966120740329, "learning_rate": 5.824561403508773e-06, "loss": 0.0, "step": 7389 }, { "epoch": 0.4762518528065992, "grad_norm": 0.0010477007617992421, "learning_rate": 5.823845327604727e-06, "loss": 0.0, "step": 7390 }, { "epoch": 0.4763162982535284, "grad_norm": 1.387736674906414, "learning_rate": 5.823129251700681e-06, "loss": 0.0068, "step": 7391 }, { "epoch": 0.4763807437004576, "grad_norm": 0.07167502330915866, "learning_rate": 5.822413175796635e-06, "loss": 0.0002, "step": 7392 }, { "epoch": 0.47644518914738676, "grad_norm": 0.014098552902959926, "learning_rate": 5.821697099892589e-06, "loss": 0.0, "step": 7393 }, { "epoch": 0.4765096345943159, "grad_norm": 0.08331748195728007, "learning_rate": 5.8209810239885435e-06, "loss": 0.0001, "step": 7394 }, { "epoch": 0.4765740800412451, "grad_norm": 0.08048932115789725, "learning_rate": 5.820264948084498e-06, "loss": 0.0002, "step": 7395 }, { "epoch": 0.47663852548817426, "grad_norm": 0.8311032058054546, "learning_rate": 5.819548872180451e-06, "loss": 0.0028, "step": 7396 }, { "epoch": 0.47670297093510344, "grad_norm": 0.20586471478306612, "learning_rate": 5.8188327962764055e-06, "loss": 0.0005, "step": 7397 }, { "epoch": 0.4767674163820326, "grad_norm": 0.322638659886776, "learning_rate": 5.81811672037236e-06, "loss": 0.0031, "step": 7398 }, { "epoch": 0.4768318618289618, "grad_norm": 0.011732446097699342, "learning_rate": 5.817400644468314e-06, "loss": 0.0001, "step": 7399 }, { "epoch": 0.47689630727589094, "grad_norm": 0.15127922701807855, "learning_rate": 5.8166845685642684e-06, "loss": 0.0001, "step": 7400 }, { "epoch": 0.4769607527228201, "grad_norm": 0.216969181012013, "learning_rate": 5.815968492660222e-06, "loss": 0.0025, "step": 7401 }, { "epoch": 0.4770251981697493, "grad_norm": 0.30436968734146624, "learning_rate": 5.815252416756176e-06, "loss": 0.0004, "step": 7402 }, { "epoch": 0.4770896436166785, "grad_norm": 0.07910840864342857, "learning_rate": 5.8145363408521305e-06, "loss": 0.0001, "step": 7403 }, { "epoch": 0.4771540890636077, "grad_norm": 0.001726383251496407, "learning_rate": 5.813820264948085e-06, "loss": 0.0, "step": 7404 }, { "epoch": 0.4772185345105368, "grad_norm": 0.0003675535003440943, "learning_rate": 5.813104189044039e-06, "loss": 0.0, "step": 7405 }, { "epoch": 0.477282979957466, "grad_norm": 0.001882370654428137, "learning_rate": 5.8123881131399925e-06, "loss": 0.0, "step": 7406 }, { "epoch": 0.4773474254043952, "grad_norm": 0.04665171072155993, "learning_rate": 5.811672037235947e-06, "loss": 0.0002, "step": 7407 }, { "epoch": 0.47741187085132436, "grad_norm": 0.006549475831629919, "learning_rate": 5.810955961331902e-06, "loss": 0.0, "step": 7408 }, { "epoch": 0.47747631629825354, "grad_norm": 0.004427687294087215, "learning_rate": 5.810239885427856e-06, "loss": 0.0, "step": 7409 }, { "epoch": 0.47754076174518273, "grad_norm": 0.0017522578296534845, "learning_rate": 5.8095238095238106e-06, "loss": 0.0, "step": 7410 }, { "epoch": 0.47760520719211186, "grad_norm": 0.3626807387078549, "learning_rate": 5.808807733619765e-06, "loss": 0.0017, "step": 7411 }, { "epoch": 0.47766965263904104, "grad_norm": 0.0008852981396097059, "learning_rate": 5.808091657715718e-06, "loss": 0.0, "step": 7412 }, { "epoch": 0.4777340980859702, "grad_norm": 0.023258903396705985, "learning_rate": 5.807375581811673e-06, "loss": 0.0, "step": 7413 }, { "epoch": 0.4777985435328994, "grad_norm": 0.005061872213993539, "learning_rate": 5.806659505907627e-06, "loss": 0.0001, "step": 7414 }, { "epoch": 0.4778629889798286, "grad_norm": 0.0009047437859875983, "learning_rate": 5.805943430003581e-06, "loss": 0.0, "step": 7415 }, { "epoch": 0.4779274344267577, "grad_norm": 0.00862082379217804, "learning_rate": 5.8052273540995355e-06, "loss": 0.0, "step": 7416 }, { "epoch": 0.4779918798736869, "grad_norm": 0.001022643294842138, "learning_rate": 5.804511278195489e-06, "loss": 0.0, "step": 7417 }, { "epoch": 0.4780563253206161, "grad_norm": 0.00018940896883695436, "learning_rate": 5.803795202291443e-06, "loss": 0.0, "step": 7418 }, { "epoch": 0.4781207707675453, "grad_norm": 0.002504940827306221, "learning_rate": 5.8030791263873976e-06, "loss": 0.0, "step": 7419 }, { "epoch": 0.47818521621447446, "grad_norm": 0.00035435915281225466, "learning_rate": 5.802363050483352e-06, "loss": 0.0, "step": 7420 }, { "epoch": 0.47824966166140365, "grad_norm": 0.0021682544293959963, "learning_rate": 5.801646974579306e-06, "loss": 0.0, "step": 7421 }, { "epoch": 0.4783141071083328, "grad_norm": 0.01594937015862623, "learning_rate": 5.80093089867526e-06, "loss": 0.0001, "step": 7422 }, { "epoch": 0.47837855255526196, "grad_norm": 0.0018651042764992, "learning_rate": 5.800214822771214e-06, "loss": 0.0, "step": 7423 }, { "epoch": 0.47844299800219114, "grad_norm": 0.001665721738655338, "learning_rate": 5.799498746867168e-06, "loss": 0.0, "step": 7424 }, { "epoch": 0.47850744344912033, "grad_norm": 0.0003566205222117585, "learning_rate": 5.7987826709631225e-06, "loss": 0.0, "step": 7425 }, { "epoch": 0.4785718888960495, "grad_norm": 0.0008448971831598651, "learning_rate": 5.798066595059077e-06, "loss": 0.0, "step": 7426 }, { "epoch": 0.47863633434297864, "grad_norm": 7.599035001878121e-05, "learning_rate": 5.79735051915503e-06, "loss": 0.0, "step": 7427 }, { "epoch": 0.4787007797899078, "grad_norm": 0.06246002789145786, "learning_rate": 5.7966344432509846e-06, "loss": 0.0006, "step": 7428 }, { "epoch": 0.478765225236837, "grad_norm": 0.0005704727887440845, "learning_rate": 5.795918367346939e-06, "loss": 0.0, "step": 7429 }, { "epoch": 0.4788296706837662, "grad_norm": 9.001205747052077e-05, "learning_rate": 5.795202291442893e-06, "loss": 0.0, "step": 7430 }, { "epoch": 0.4788941161306954, "grad_norm": 1.6705578758492294, "learning_rate": 5.7944862155388475e-06, "loss": 0.0206, "step": 7431 }, { "epoch": 0.47895856157762456, "grad_norm": 0.0011345647547070011, "learning_rate": 5.793770139634803e-06, "loss": 0.0, "step": 7432 }, { "epoch": 0.4790230070245537, "grad_norm": 0.14501953125, "learning_rate": 5.793054063730756e-06, "loss": 0.0003, "step": 7433 }, { "epoch": 0.4790874524714829, "grad_norm": 0.8865466181852298, "learning_rate": 5.79233798782671e-06, "loss": 0.0128, "step": 7434 }, { "epoch": 0.47915189791841206, "grad_norm": 0.3714381777882189, "learning_rate": 5.791621911922665e-06, "loss": 0.003, "step": 7435 }, { "epoch": 0.47921634336534125, "grad_norm": 0.5642342588606207, "learning_rate": 5.790905836018619e-06, "loss": 0.0019, "step": 7436 }, { "epoch": 0.47928078881227043, "grad_norm": 0.016275023984882227, "learning_rate": 5.790189760114573e-06, "loss": 0.0002, "step": 7437 }, { "epoch": 0.4793452342591996, "grad_norm": 0.008938399006915438, "learning_rate": 5.789473684210527e-06, "loss": 0.0, "step": 7438 }, { "epoch": 0.47940967970612874, "grad_norm": 0.0010685410566123547, "learning_rate": 5.788757608306481e-06, "loss": 0.0, "step": 7439 }, { "epoch": 0.47947412515305793, "grad_norm": 0.23940940258627813, "learning_rate": 5.788041532402435e-06, "loss": 0.0005, "step": 7440 }, { "epoch": 0.4795385705999871, "grad_norm": 1.1296119464180416, "learning_rate": 5.78732545649839e-06, "loss": 0.0082, "step": 7441 }, { "epoch": 0.4796030160469163, "grad_norm": 0.0049990550644581685, "learning_rate": 5.786609380594344e-06, "loss": 0.0, "step": 7442 }, { "epoch": 0.4796674614938455, "grad_norm": 0.0003853361686928715, "learning_rate": 5.785893304690297e-06, "loss": 0.0, "step": 7443 }, { "epoch": 0.4797319069407746, "grad_norm": 0.012548110204785096, "learning_rate": 5.785177228786252e-06, "loss": 0.0001, "step": 7444 }, { "epoch": 0.4797963523877038, "grad_norm": 0.01280574895032373, "learning_rate": 5.784461152882206e-06, "loss": 0.0, "step": 7445 }, { "epoch": 0.479860797834633, "grad_norm": 0.04450895395433987, "learning_rate": 5.78374507697816e-06, "loss": 0.0001, "step": 7446 }, { "epoch": 0.47992524328156216, "grad_norm": 0.21640394877508198, "learning_rate": 5.7830290010741145e-06, "loss": 0.002, "step": 7447 }, { "epoch": 0.47998968872849135, "grad_norm": 0.023149261464048686, "learning_rate": 5.782312925170068e-06, "loss": 0.0, "step": 7448 }, { "epoch": 0.48005413417542053, "grad_norm": 0.07784700405213824, "learning_rate": 5.781596849266022e-06, "loss": 0.0001, "step": 7449 }, { "epoch": 0.48011857962234966, "grad_norm": 0.004143772080952122, "learning_rate": 5.780880773361977e-06, "loss": 0.0, "step": 7450 }, { "epoch": 0.48018302506927885, "grad_norm": 0.02690683351179182, "learning_rate": 5.780164697457931e-06, "loss": 0.0001, "step": 7451 }, { "epoch": 0.48024747051620803, "grad_norm": 0.15777312226534332, "learning_rate": 5.779448621553884e-06, "loss": 0.0001, "step": 7452 }, { "epoch": 0.4803119159631372, "grad_norm": 0.011854422022662701, "learning_rate": 5.778732545649839e-06, "loss": 0.0001, "step": 7453 }, { "epoch": 0.4803763614100664, "grad_norm": 0.0246195620217399, "learning_rate": 5.778016469745793e-06, "loss": 0.0, "step": 7454 }, { "epoch": 0.48044080685699553, "grad_norm": 0.004674714612297223, "learning_rate": 5.777300393841748e-06, "loss": 0.0, "step": 7455 }, { "epoch": 0.4805052523039247, "grad_norm": 0.08145114731413798, "learning_rate": 5.776584317937702e-06, "loss": 0.0001, "step": 7456 }, { "epoch": 0.4805696977508539, "grad_norm": 0.0011909823012698556, "learning_rate": 5.775868242033657e-06, "loss": 0.0, "step": 7457 }, { "epoch": 0.4806341431977831, "grad_norm": 0.0018303589815469893, "learning_rate": 5.775152166129611e-06, "loss": 0.0, "step": 7458 }, { "epoch": 0.48069858864471227, "grad_norm": 0.010239236917028723, "learning_rate": 5.7744360902255644e-06, "loss": 0.0, "step": 7459 }, { "epoch": 0.48076303409164145, "grad_norm": 0.09835660615437058, "learning_rate": 5.773720014321519e-06, "loss": 0.0001, "step": 7460 }, { "epoch": 0.4808274795385706, "grad_norm": 0.014118260546048035, "learning_rate": 5.773003938417473e-06, "loss": 0.0001, "step": 7461 }, { "epoch": 0.48089192498549976, "grad_norm": 0.000602907625883965, "learning_rate": 5.772287862513427e-06, "loss": 0.0, "step": 7462 }, { "epoch": 0.48095637043242895, "grad_norm": 0.06684985026054438, "learning_rate": 5.771571786609382e-06, "loss": 0.0001, "step": 7463 }, { "epoch": 0.48102081587935813, "grad_norm": 0.17200650256293518, "learning_rate": 5.770855710705335e-06, "loss": 0.0016, "step": 7464 }, { "epoch": 0.4810852613262873, "grad_norm": 0.021725604178362307, "learning_rate": 5.770139634801289e-06, "loss": 0.0, "step": 7465 }, { "epoch": 0.48114970677321645, "grad_norm": 0.01244272677275712, "learning_rate": 5.769423558897244e-06, "loss": 0.0, "step": 7466 }, { "epoch": 0.48121415222014563, "grad_norm": 0.0024778840976099475, "learning_rate": 5.768707482993198e-06, "loss": 0.0, "step": 7467 }, { "epoch": 0.4812785976670748, "grad_norm": 0.00267979224785924, "learning_rate": 5.7679914070891514e-06, "loss": 0.0, "step": 7468 }, { "epoch": 0.481343043114004, "grad_norm": 0.003459435935471662, "learning_rate": 5.767275331185106e-06, "loss": 0.0, "step": 7469 }, { "epoch": 0.4814074885609332, "grad_norm": 0.44937154410443325, "learning_rate": 5.76655925528106e-06, "loss": 0.0028, "step": 7470 }, { "epoch": 0.48147193400786237, "grad_norm": 0.1190897293505687, "learning_rate": 5.765843179377014e-06, "loss": 0.0003, "step": 7471 }, { "epoch": 0.4815363794547915, "grad_norm": 0.00193991626554029, "learning_rate": 5.765127103472969e-06, "loss": 0.0, "step": 7472 }, { "epoch": 0.4816008249017207, "grad_norm": 0.0002928079372750345, "learning_rate": 5.764411027568922e-06, "loss": 0.0, "step": 7473 }, { "epoch": 0.48166527034864987, "grad_norm": 0.0026289588066395926, "learning_rate": 5.763694951664876e-06, "loss": 0.0, "step": 7474 }, { "epoch": 0.48172971579557905, "grad_norm": 0.016364185159167613, "learning_rate": 5.762978875760831e-06, "loss": 0.0002, "step": 7475 }, { "epoch": 0.48179416124250823, "grad_norm": 0.002886073119532732, "learning_rate": 5.762262799856785e-06, "loss": 0.0, "step": 7476 }, { "epoch": 0.4818586066894374, "grad_norm": 0.0005045980107262203, "learning_rate": 5.761546723952739e-06, "loss": 0.0, "step": 7477 }, { "epoch": 0.48192305213636655, "grad_norm": 0.012034130486031228, "learning_rate": 5.760830648048694e-06, "loss": 0.0001, "step": 7478 }, { "epoch": 0.48198749758329573, "grad_norm": 0.0009000554618051204, "learning_rate": 5.760114572144649e-06, "loss": 0.0, "step": 7479 }, { "epoch": 0.4820519430302249, "grad_norm": 0.00965203255101447, "learning_rate": 5.759398496240602e-06, "loss": 0.0, "step": 7480 }, { "epoch": 0.4821163884771541, "grad_norm": 0.05475154813132867, "learning_rate": 5.7586824203365565e-06, "loss": 0.0001, "step": 7481 }, { "epoch": 0.4821808339240833, "grad_norm": 0.020473049849451586, "learning_rate": 5.757966344432511e-06, "loss": 0.0002, "step": 7482 }, { "epoch": 0.4822452793710124, "grad_norm": 0.0010466034143123027, "learning_rate": 5.757250268528465e-06, "loss": 0.0, "step": 7483 }, { "epoch": 0.4823097248179416, "grad_norm": 0.018919086369527306, "learning_rate": 5.7565341926244185e-06, "loss": 0.0001, "step": 7484 }, { "epoch": 0.4823741702648708, "grad_norm": 0.0019212618847136709, "learning_rate": 5.755818116720373e-06, "loss": 0.0, "step": 7485 }, { "epoch": 0.48243861571179997, "grad_norm": 0.00398771180909543, "learning_rate": 5.755102040816327e-06, "loss": 0.0, "step": 7486 }, { "epoch": 0.48250306115872915, "grad_norm": 0.009733597569587743, "learning_rate": 5.754385964912281e-06, "loss": 0.0, "step": 7487 }, { "epoch": 0.48256750660565834, "grad_norm": 0.00010193001177631241, "learning_rate": 5.753669889008236e-06, "loss": 0.0, "step": 7488 }, { "epoch": 0.48263195205258747, "grad_norm": 0.0004880647422603623, "learning_rate": 5.752953813104189e-06, "loss": 0.0, "step": 7489 }, { "epoch": 0.48269639749951665, "grad_norm": 0.00360417876237237, "learning_rate": 5.7522377372001435e-06, "loss": 0.0, "step": 7490 }, { "epoch": 0.48276084294644583, "grad_norm": 0.0004049090707836464, "learning_rate": 5.751521661296098e-06, "loss": 0.0, "step": 7491 }, { "epoch": 0.482825288393375, "grad_norm": 0.0013167952446200002, "learning_rate": 5.750805585392052e-06, "loss": 0.0, "step": 7492 }, { "epoch": 0.4828897338403042, "grad_norm": 0.025174958117207765, "learning_rate": 5.750089509488006e-06, "loss": 0.0002, "step": 7493 }, { "epoch": 0.48295417928723333, "grad_norm": 0.06014975165900907, "learning_rate": 5.74937343358396e-06, "loss": 0.0016, "step": 7494 }, { "epoch": 0.4830186247341625, "grad_norm": 0.012060249113546211, "learning_rate": 5.748657357679914e-06, "loss": 0.0, "step": 7495 }, { "epoch": 0.4830830701810917, "grad_norm": 0.12713788599629475, "learning_rate": 5.747941281775868e-06, "loss": 0.0028, "step": 7496 }, { "epoch": 0.4831475156280209, "grad_norm": 0.005913938901135187, "learning_rate": 5.747225205871823e-06, "loss": 0.0, "step": 7497 }, { "epoch": 0.48321196107495007, "grad_norm": 0.00039260180087013206, "learning_rate": 5.746509129967777e-06, "loss": 0.0, "step": 7498 }, { "epoch": 0.48327640652187925, "grad_norm": 0.014482109685575993, "learning_rate": 5.7457930540637305e-06, "loss": 0.0001, "step": 7499 }, { "epoch": 0.4833408519688084, "grad_norm": 0.00447910521815926, "learning_rate": 5.745076978159685e-06, "loss": 0.0, "step": 7500 }, { "epoch": 0.48340529741573757, "grad_norm": 0.026631144486073698, "learning_rate": 5.744360902255639e-06, "loss": 0.0001, "step": 7501 }, { "epoch": 0.48346974286266675, "grad_norm": 0.012365717619568232, "learning_rate": 5.743644826351594e-06, "loss": 0.0001, "step": 7502 }, { "epoch": 0.48353418830959594, "grad_norm": 0.000551421378977375, "learning_rate": 5.7429287504475485e-06, "loss": 0.0, "step": 7503 }, { "epoch": 0.4835986337565251, "grad_norm": 0.007876399816532313, "learning_rate": 5.742212674543503e-06, "loss": 0.0001, "step": 7504 }, { "epoch": 0.48366307920345425, "grad_norm": 0.001752821937067081, "learning_rate": 5.741496598639456e-06, "loss": 0.0, "step": 7505 }, { "epoch": 0.48372752465038343, "grad_norm": 0.002184141243038027, "learning_rate": 5.7407805227354105e-06, "loss": 0.0, "step": 7506 }, { "epoch": 0.4837919700973126, "grad_norm": 0.19810261064370907, "learning_rate": 5.740064446831365e-06, "loss": 0.0009, "step": 7507 }, { "epoch": 0.4838564155442418, "grad_norm": 0.0006581602418458326, "learning_rate": 5.739348370927319e-06, "loss": 0.0, "step": 7508 }, { "epoch": 0.483920860991171, "grad_norm": 0.00032189703491337544, "learning_rate": 5.7386322950232734e-06, "loss": 0.0, "step": 7509 }, { "epoch": 0.48398530643810017, "grad_norm": 0.0007444887274812117, "learning_rate": 5.737916219119227e-06, "loss": 0.0, "step": 7510 }, { "epoch": 0.4840497518850293, "grad_norm": 0.0661404739703034, "learning_rate": 5.737200143215181e-06, "loss": 0.0002, "step": 7511 }, { "epoch": 0.4841141973319585, "grad_norm": 0.020757209588938764, "learning_rate": 5.7364840673111355e-06, "loss": 0.0001, "step": 7512 }, { "epoch": 0.48417864277888767, "grad_norm": 0.005867169331984068, "learning_rate": 5.73576799140709e-06, "loss": 0.0, "step": 7513 }, { "epoch": 0.48424308822581685, "grad_norm": 0.00047232161720890626, "learning_rate": 5.735051915503044e-06, "loss": 0.0, "step": 7514 }, { "epoch": 0.48430753367274604, "grad_norm": 0.006958581875660543, "learning_rate": 5.7343358395989975e-06, "loss": 0.0, "step": 7515 }, { "epoch": 0.4843719791196752, "grad_norm": 0.0033243687316280453, "learning_rate": 5.733619763694952e-06, "loss": 0.0, "step": 7516 }, { "epoch": 0.48443642456660435, "grad_norm": 0.00025208480732531735, "learning_rate": 5.732903687790906e-06, "loss": 0.0, "step": 7517 }, { "epoch": 0.48450087001353354, "grad_norm": 0.014381225038362599, "learning_rate": 5.7321876118868604e-06, "loss": 0.0, "step": 7518 }, { "epoch": 0.4845653154604627, "grad_norm": 0.2902895555370628, "learning_rate": 5.731471535982815e-06, "loss": 0.0006, "step": 7519 }, { "epoch": 0.4846297609073919, "grad_norm": 0.004500232171590274, "learning_rate": 5.730755460078768e-06, "loss": 0.0, "step": 7520 }, { "epoch": 0.4846942063543211, "grad_norm": 0.005529903035349548, "learning_rate": 5.7300393841747225e-06, "loss": 0.0, "step": 7521 }, { "epoch": 0.4847586518012502, "grad_norm": 0.0014221680833822147, "learning_rate": 5.729323308270677e-06, "loss": 0.0, "step": 7522 }, { "epoch": 0.4848230972481794, "grad_norm": 0.0029934765826047783, "learning_rate": 5.728607232366631e-06, "loss": 0.0, "step": 7523 }, { "epoch": 0.4848875426951086, "grad_norm": 0.0015283611343524148, "learning_rate": 5.727891156462585e-06, "loss": 0.0, "step": 7524 }, { "epoch": 0.48495198814203777, "grad_norm": 0.0019634857246066248, "learning_rate": 5.7271750805585405e-06, "loss": 0.0, "step": 7525 }, { "epoch": 0.48501643358896696, "grad_norm": 0.0034480413744822237, "learning_rate": 5.726459004654494e-06, "loss": 0.0, "step": 7526 }, { "epoch": 0.48508087903589614, "grad_norm": 0.0026394266389676234, "learning_rate": 5.725742928750448e-06, "loss": 0.0, "step": 7527 }, { "epoch": 0.48514532448282527, "grad_norm": 0.014411129078745335, "learning_rate": 5.7250268528464026e-06, "loss": 0.0001, "step": 7528 }, { "epoch": 0.48520976992975445, "grad_norm": 0.0067901182709101905, "learning_rate": 5.724310776942357e-06, "loss": 0.0, "step": 7529 }, { "epoch": 0.48527421537668364, "grad_norm": 0.002589366357544819, "learning_rate": 5.723594701038311e-06, "loss": 0.0, "step": 7530 }, { "epoch": 0.4853386608236128, "grad_norm": 0.2295686716096039, "learning_rate": 5.722878625134265e-06, "loss": 0.0002, "step": 7531 }, { "epoch": 0.485403106270542, "grad_norm": 0.005061924499310147, "learning_rate": 5.722162549230219e-06, "loss": 0.0, "step": 7532 }, { "epoch": 0.48546755171747114, "grad_norm": 0.011882524196712532, "learning_rate": 5.721446473326173e-06, "loss": 0.0001, "step": 7533 }, { "epoch": 0.4855319971644003, "grad_norm": 0.16392287489124577, "learning_rate": 5.7207303974221275e-06, "loss": 0.0008, "step": 7534 }, { "epoch": 0.4855964426113295, "grad_norm": 0.002512980230831525, "learning_rate": 5.720014321518082e-06, "loss": 0.0, "step": 7535 }, { "epoch": 0.4856608880582587, "grad_norm": 0.006464325307909982, "learning_rate": 5.719298245614035e-06, "loss": 0.0, "step": 7536 }, { "epoch": 0.4857253335051879, "grad_norm": 0.001662718931743277, "learning_rate": 5.7185821697099896e-06, "loss": 0.0, "step": 7537 }, { "epoch": 0.48578977895211706, "grad_norm": 0.00312735401489503, "learning_rate": 5.717866093805944e-06, "loss": 0.0, "step": 7538 }, { "epoch": 0.4858542243990462, "grad_norm": 0.002918325996476907, "learning_rate": 5.717150017901898e-06, "loss": 0.0, "step": 7539 }, { "epoch": 0.48591866984597537, "grad_norm": 0.0013583321002518408, "learning_rate": 5.7164339419978525e-06, "loss": 0.0, "step": 7540 }, { "epoch": 0.48598311529290456, "grad_norm": 0.03570830831840738, "learning_rate": 5.715717866093806e-06, "loss": 0.0001, "step": 7541 }, { "epoch": 0.48604756073983374, "grad_norm": 0.06065757499082424, "learning_rate": 5.71500179018976e-06, "loss": 0.0001, "step": 7542 }, { "epoch": 0.4861120061867629, "grad_norm": 0.00027401369607215015, "learning_rate": 5.7142857142857145e-06, "loss": 0.0, "step": 7543 }, { "epoch": 0.48617645163369205, "grad_norm": 0.40300723802844124, "learning_rate": 5.713569638381669e-06, "loss": 0.0053, "step": 7544 }, { "epoch": 0.48624089708062124, "grad_norm": 0.0010218532965069267, "learning_rate": 5.712853562477622e-06, "loss": 0.0, "step": 7545 }, { "epoch": 0.4863053425275504, "grad_norm": 0.00012427245394306858, "learning_rate": 5.7121374865735766e-06, "loss": 0.0, "step": 7546 }, { "epoch": 0.4863697879744796, "grad_norm": 0.00135554513866112, "learning_rate": 5.711421410669531e-06, "loss": 0.0, "step": 7547 }, { "epoch": 0.4864342334214088, "grad_norm": 0.009465878989607369, "learning_rate": 5.710705334765485e-06, "loss": 0.0, "step": 7548 }, { "epoch": 0.486498678868338, "grad_norm": 0.003342775812316762, "learning_rate": 5.70998925886144e-06, "loss": 0.0001, "step": 7549 }, { "epoch": 0.4865631243152671, "grad_norm": 0.722024424887394, "learning_rate": 5.709273182957395e-06, "loss": 0.0034, "step": 7550 }, { "epoch": 0.4866275697621963, "grad_norm": 0.0004114479573313148, "learning_rate": 5.708557107053349e-06, "loss": 0.0, "step": 7551 }, { "epoch": 0.4866920152091255, "grad_norm": 0.0004321755849311948, "learning_rate": 5.707841031149302e-06, "loss": 0.0, "step": 7552 }, { "epoch": 0.48675646065605466, "grad_norm": 0.0028718780582452424, "learning_rate": 5.707124955245257e-06, "loss": 0.0, "step": 7553 }, { "epoch": 0.48682090610298384, "grad_norm": 0.9203624601679466, "learning_rate": 5.706408879341211e-06, "loss": 0.0046, "step": 7554 }, { "epoch": 0.486885351549913, "grad_norm": 0.0048342640045114255, "learning_rate": 5.705692803437165e-06, "loss": 0.0, "step": 7555 }, { "epoch": 0.48694979699684215, "grad_norm": 0.0002541909663165527, "learning_rate": 5.7049767275331195e-06, "loss": 0.0, "step": 7556 }, { "epoch": 0.48701424244377134, "grad_norm": 0.0004487357840125776, "learning_rate": 5.704260651629073e-06, "loss": 0.0, "step": 7557 }, { "epoch": 0.4870786878907005, "grad_norm": 0.0008346864542774917, "learning_rate": 5.703544575725027e-06, "loss": 0.0, "step": 7558 }, { "epoch": 0.4871431333376297, "grad_norm": 0.0018916479246447236, "learning_rate": 5.702828499820982e-06, "loss": 0.0, "step": 7559 }, { "epoch": 0.4872075787845589, "grad_norm": 0.018288931008625598, "learning_rate": 5.702112423916936e-06, "loss": 0.0, "step": 7560 }, { "epoch": 0.487272024231488, "grad_norm": 0.0010530625038883763, "learning_rate": 5.701396348012889e-06, "loss": 0.0, "step": 7561 }, { "epoch": 0.4873364696784172, "grad_norm": 0.007322550871804302, "learning_rate": 5.700680272108844e-06, "loss": 0.0, "step": 7562 }, { "epoch": 0.4874009151253464, "grad_norm": 0.003065408485538047, "learning_rate": 5.699964196204798e-06, "loss": 0.0, "step": 7563 }, { "epoch": 0.4874653605722756, "grad_norm": 0.0006956848235609778, "learning_rate": 5.699248120300752e-06, "loss": 0.0, "step": 7564 }, { "epoch": 0.48752980601920476, "grad_norm": 0.1693866565782438, "learning_rate": 5.6985320443967065e-06, "loss": 0.001, "step": 7565 }, { "epoch": 0.48759425146613394, "grad_norm": 0.1538983159029183, "learning_rate": 5.69781596849266e-06, "loss": 0.0002, "step": 7566 }, { "epoch": 0.48765869691306307, "grad_norm": 0.0006069839695752836, "learning_rate": 5.697099892588614e-06, "loss": 0.0, "step": 7567 }, { "epoch": 0.48772314235999226, "grad_norm": 0.0033471887203188194, "learning_rate": 5.696383816684569e-06, "loss": 0.0, "step": 7568 }, { "epoch": 0.48778758780692144, "grad_norm": 0.06231746804088351, "learning_rate": 5.695667740780523e-06, "loss": 0.0006, "step": 7569 }, { "epoch": 0.4878520332538506, "grad_norm": 0.016381977460167466, "learning_rate": 5.694951664876477e-06, "loss": 0.0001, "step": 7570 }, { "epoch": 0.4879164787007798, "grad_norm": 0.003621855047769446, "learning_rate": 5.694235588972431e-06, "loss": 0.0, "step": 7571 }, { "epoch": 0.48798092414770894, "grad_norm": 0.00014088177846377143, "learning_rate": 5.693519513068387e-06, "loss": 0.0, "step": 7572 }, { "epoch": 0.4880453695946381, "grad_norm": 0.0014828029465861036, "learning_rate": 5.69280343716434e-06, "loss": 0.0, "step": 7573 }, { "epoch": 0.4881098150415673, "grad_norm": 0.012641590761369413, "learning_rate": 5.692087361260294e-06, "loss": 0.0001, "step": 7574 }, { "epoch": 0.4881742604884965, "grad_norm": 0.001850971111009934, "learning_rate": 5.691371285356249e-06, "loss": 0.0, "step": 7575 }, { "epoch": 0.4882387059354257, "grad_norm": 0.046230391549455706, "learning_rate": 5.690655209452203e-06, "loss": 0.0001, "step": 7576 }, { "epoch": 0.48830315138235486, "grad_norm": 0.01488584860814521, "learning_rate": 5.689939133548157e-06, "loss": 0.0001, "step": 7577 }, { "epoch": 0.488367596829284, "grad_norm": 0.000514240191141193, "learning_rate": 5.689223057644111e-06, "loss": 0.0, "step": 7578 }, { "epoch": 0.4884320422762132, "grad_norm": 0.008130484260185939, "learning_rate": 5.688506981740065e-06, "loss": 0.0001, "step": 7579 }, { "epoch": 0.48849648772314236, "grad_norm": 0.0030046709164151357, "learning_rate": 5.687790905836019e-06, "loss": 0.0, "step": 7580 }, { "epoch": 0.48856093317007154, "grad_norm": 0.015805491853670892, "learning_rate": 5.687074829931974e-06, "loss": 0.0002, "step": 7581 }, { "epoch": 0.4886253786170007, "grad_norm": 0.011213436765041867, "learning_rate": 5.686358754027927e-06, "loss": 0.0, "step": 7582 }, { "epoch": 0.48868982406392986, "grad_norm": 0.0007413171695860233, "learning_rate": 5.685642678123881e-06, "loss": 0.0, "step": 7583 }, { "epoch": 0.48875426951085904, "grad_norm": 0.032078999380731485, "learning_rate": 5.684926602219836e-06, "loss": 0.0001, "step": 7584 }, { "epoch": 0.4888187149577882, "grad_norm": 0.015428289726630193, "learning_rate": 5.68421052631579e-06, "loss": 0.0001, "step": 7585 }, { "epoch": 0.4888831604047174, "grad_norm": 0.7430837339681301, "learning_rate": 5.683494450411744e-06, "loss": 0.003, "step": 7586 }, { "epoch": 0.4889476058516466, "grad_norm": 0.16889645305455697, "learning_rate": 5.682778374507698e-06, "loss": 0.0006, "step": 7587 }, { "epoch": 0.4890120512985758, "grad_norm": 0.01572568683559918, "learning_rate": 5.682062298603652e-06, "loss": 0.0, "step": 7588 }, { "epoch": 0.4890764967455049, "grad_norm": 0.06375657091566687, "learning_rate": 5.681346222699606e-06, "loss": 0.0001, "step": 7589 }, { "epoch": 0.4891409421924341, "grad_norm": 0.0013207851323213868, "learning_rate": 5.680630146795561e-06, "loss": 0.0, "step": 7590 }, { "epoch": 0.4892053876393633, "grad_norm": 0.0013833316345360025, "learning_rate": 5.679914070891515e-06, "loss": 0.0, "step": 7591 }, { "epoch": 0.48926983308629246, "grad_norm": 0.002289494076961141, "learning_rate": 5.679197994987468e-06, "loss": 0.0, "step": 7592 }, { "epoch": 0.48933427853322164, "grad_norm": 0.0002545245494613917, "learning_rate": 5.678481919083423e-06, "loss": 0.0, "step": 7593 }, { "epoch": 0.48939872398015083, "grad_norm": 0.001060228307932159, "learning_rate": 5.677765843179377e-06, "loss": 0.0, "step": 7594 }, { "epoch": 0.48946316942707996, "grad_norm": 0.03487591100642847, "learning_rate": 5.677049767275332e-06, "loss": 0.0, "step": 7595 }, { "epoch": 0.48952761487400914, "grad_norm": 0.0168614849164812, "learning_rate": 5.676333691371286e-06, "loss": 0.0001, "step": 7596 }, { "epoch": 0.4895920603209383, "grad_norm": 0.0006910856059586328, "learning_rate": 5.675617615467241e-06, "loss": 0.0, "step": 7597 }, { "epoch": 0.4896565057678675, "grad_norm": 0.07767658589564769, "learning_rate": 5.674901539563194e-06, "loss": 0.0, "step": 7598 }, { "epoch": 0.4897209512147967, "grad_norm": 0.3195438699631821, "learning_rate": 5.6741854636591485e-06, "loss": 0.0018, "step": 7599 }, { "epoch": 0.4897853966617258, "grad_norm": 0.002169986000435176, "learning_rate": 5.673469387755103e-06, "loss": 0.0, "step": 7600 }, { "epoch": 0.489849842108655, "grad_norm": 0.012983810109652043, "learning_rate": 5.672753311851057e-06, "loss": 0.0, "step": 7601 }, { "epoch": 0.4899142875555842, "grad_norm": 0.014353244432383392, "learning_rate": 5.672037235947011e-06, "loss": 0.0, "step": 7602 }, { "epoch": 0.4899787330025134, "grad_norm": 0.00011045339360843307, "learning_rate": 5.671321160042965e-06, "loss": 0.0, "step": 7603 }, { "epoch": 0.49004317844944256, "grad_norm": 0.006119750306557415, "learning_rate": 5.670605084138919e-06, "loss": 0.0, "step": 7604 }, { "epoch": 0.49010762389637175, "grad_norm": 0.003246663160360342, "learning_rate": 5.669889008234873e-06, "loss": 0.0, "step": 7605 }, { "epoch": 0.4901720693433009, "grad_norm": 0.062110801071613486, "learning_rate": 5.669172932330828e-06, "loss": 0.0001, "step": 7606 }, { "epoch": 0.49023651479023006, "grad_norm": 0.007737362321899938, "learning_rate": 5.668456856426782e-06, "loss": 0.0, "step": 7607 }, { "epoch": 0.49030096023715924, "grad_norm": 0.020342826642074038, "learning_rate": 5.6677407805227355e-06, "loss": 0.0001, "step": 7608 }, { "epoch": 0.49036540568408843, "grad_norm": 0.002374675857821564, "learning_rate": 5.66702470461869e-06, "loss": 0.0, "step": 7609 }, { "epoch": 0.4904298511310176, "grad_norm": 0.002491284824095385, "learning_rate": 5.666308628714644e-06, "loss": 0.0, "step": 7610 }, { "epoch": 0.49049429657794674, "grad_norm": 1.0187017932555893, "learning_rate": 5.665592552810598e-06, "loss": 0.003, "step": 7611 }, { "epoch": 0.4905587420248759, "grad_norm": 0.05487809644575838, "learning_rate": 5.664876476906553e-06, "loss": 0.0003, "step": 7612 }, { "epoch": 0.4906231874718051, "grad_norm": 0.0018616346784588265, "learning_rate": 5.664160401002506e-06, "loss": 0.0, "step": 7613 }, { "epoch": 0.4906876329187343, "grad_norm": 0.0015321618605294287, "learning_rate": 5.66344432509846e-06, "loss": 0.0, "step": 7614 }, { "epoch": 0.4907520783656635, "grad_norm": 0.025696763498570488, "learning_rate": 5.662728249194415e-06, "loss": 0.0, "step": 7615 }, { "epoch": 0.49081652381259266, "grad_norm": 0.05154074278488259, "learning_rate": 5.662012173290369e-06, "loss": 0.0003, "step": 7616 }, { "epoch": 0.4908809692595218, "grad_norm": 0.26117784201754574, "learning_rate": 5.661296097386323e-06, "loss": 0.0008, "step": 7617 }, { "epoch": 0.490945414706451, "grad_norm": 0.21229055685764633, "learning_rate": 5.660580021482277e-06, "loss": 0.0006, "step": 7618 }, { "epoch": 0.49100986015338016, "grad_norm": 0.0019154365838764172, "learning_rate": 5.659863945578232e-06, "loss": 0.0, "step": 7619 }, { "epoch": 0.49107430560030935, "grad_norm": 0.000521643832761043, "learning_rate": 5.659147869674186e-06, "loss": 0.0, "step": 7620 }, { "epoch": 0.49113875104723853, "grad_norm": 0.05246131011336328, "learning_rate": 5.6584317937701405e-06, "loss": 0.0002, "step": 7621 }, { "epoch": 0.49120319649416766, "grad_norm": 0.023859120947491402, "learning_rate": 5.657715717866095e-06, "loss": 0.0001, "step": 7622 }, { "epoch": 0.49126764194109684, "grad_norm": 0.07066070248157924, "learning_rate": 5.656999641962049e-06, "loss": 0.0002, "step": 7623 }, { "epoch": 0.49133208738802603, "grad_norm": 0.0058702492762727244, "learning_rate": 5.6562835660580025e-06, "loss": 0.0, "step": 7624 }, { "epoch": 0.4913965328349552, "grad_norm": 0.45628538974263216, "learning_rate": 5.655567490153957e-06, "loss": 0.001, "step": 7625 }, { "epoch": 0.4914609782818844, "grad_norm": 0.016563145807952723, "learning_rate": 5.654851414249911e-06, "loss": 0.0, "step": 7626 }, { "epoch": 0.4915254237288136, "grad_norm": 0.0007029439357273081, "learning_rate": 5.6541353383458654e-06, "loss": 0.0, "step": 7627 }, { "epoch": 0.4915898691757427, "grad_norm": 0.07840164793822463, "learning_rate": 5.65341926244182e-06, "loss": 0.0, "step": 7628 }, { "epoch": 0.4916543146226719, "grad_norm": 0.3056592361995422, "learning_rate": 5.652703186537773e-06, "loss": 0.0023, "step": 7629 }, { "epoch": 0.4917187600696011, "grad_norm": 0.6774211016599851, "learning_rate": 5.6519871106337275e-06, "loss": 0.0035, "step": 7630 }, { "epoch": 0.49178320551653026, "grad_norm": 0.001993808920682025, "learning_rate": 5.651271034729682e-06, "loss": 0.0, "step": 7631 }, { "epoch": 0.49184765096345945, "grad_norm": 0.1596749738737264, "learning_rate": 5.650554958825636e-06, "loss": 0.0003, "step": 7632 }, { "epoch": 0.49191209641038863, "grad_norm": 0.018059339301017427, "learning_rate": 5.64983888292159e-06, "loss": 0.0001, "step": 7633 }, { "epoch": 0.49197654185731776, "grad_norm": 0.0013676482324627582, "learning_rate": 5.649122807017544e-06, "loss": 0.0, "step": 7634 }, { "epoch": 0.49204098730424695, "grad_norm": 0.001527761549636789, "learning_rate": 5.648406731113498e-06, "loss": 0.0, "step": 7635 }, { "epoch": 0.49210543275117613, "grad_norm": 0.002563567830487596, "learning_rate": 5.6476906552094524e-06, "loss": 0.0, "step": 7636 }, { "epoch": 0.4921698781981053, "grad_norm": 0.0020392195153546198, "learning_rate": 5.646974579305407e-06, "loss": 0.0, "step": 7637 }, { "epoch": 0.4922343236450345, "grad_norm": 0.0025428419743069013, "learning_rate": 5.646258503401361e-06, "loss": 0.0, "step": 7638 }, { "epoch": 0.49229876909196363, "grad_norm": 0.0010233870824352465, "learning_rate": 5.6455424274973145e-06, "loss": 0.0, "step": 7639 }, { "epoch": 0.4923632145388928, "grad_norm": 0.16308028959442436, "learning_rate": 5.644826351593269e-06, "loss": 0.0005, "step": 7640 }, { "epoch": 0.492427659985822, "grad_norm": 0.0002236146238672205, "learning_rate": 5.644110275689223e-06, "loss": 0.0, "step": 7641 }, { "epoch": 0.4924921054327512, "grad_norm": 0.003699660007362483, "learning_rate": 5.643394199785178e-06, "loss": 0.0, "step": 7642 }, { "epoch": 0.49255655087968037, "grad_norm": 0.6511107573406089, "learning_rate": 5.6426781238811325e-06, "loss": 0.0012, "step": 7643 }, { "epoch": 0.49262099632660955, "grad_norm": 0.0006555271681799672, "learning_rate": 5.641962047977087e-06, "loss": 0.0, "step": 7644 }, { "epoch": 0.4926854417735387, "grad_norm": 0.011247004639613586, "learning_rate": 5.64124597207304e-06, "loss": 0.0, "step": 7645 }, { "epoch": 0.49274988722046786, "grad_norm": 0.04122928475379016, "learning_rate": 5.6405298961689946e-06, "loss": 0.0, "step": 7646 }, { "epoch": 0.49281433266739705, "grad_norm": 0.09820465599572177, "learning_rate": 5.639813820264949e-06, "loss": 0.0003, "step": 7647 }, { "epoch": 0.49287877811432623, "grad_norm": 0.0005418565121424015, "learning_rate": 5.639097744360903e-06, "loss": 0.0, "step": 7648 }, { "epoch": 0.4929432235612554, "grad_norm": 0.25866958451243477, "learning_rate": 5.6383816684568575e-06, "loss": 0.0004, "step": 7649 }, { "epoch": 0.49300766900818455, "grad_norm": 0.014861191542048305, "learning_rate": 5.637665592552811e-06, "loss": 0.0, "step": 7650 }, { "epoch": 0.49307211445511373, "grad_norm": 0.06393503994418094, "learning_rate": 5.636949516648765e-06, "loss": 0.0002, "step": 7651 }, { "epoch": 0.4931365599020429, "grad_norm": 0.001295256191406105, "learning_rate": 5.6362334407447195e-06, "loss": 0.0, "step": 7652 }, { "epoch": 0.4932010053489721, "grad_norm": 0.0013651152885221776, "learning_rate": 5.635517364840674e-06, "loss": 0.0, "step": 7653 }, { "epoch": 0.4932654507959013, "grad_norm": 0.034053694591434605, "learning_rate": 5.634801288936628e-06, "loss": 0.0, "step": 7654 }, { "epoch": 0.49332989624283047, "grad_norm": 0.025483034313675483, "learning_rate": 5.6340852130325816e-06, "loss": 0.0001, "step": 7655 }, { "epoch": 0.4933943416897596, "grad_norm": 0.0867213163984139, "learning_rate": 5.633369137128536e-06, "loss": 0.0002, "step": 7656 }, { "epoch": 0.4934587871366888, "grad_norm": 0.036528468614227996, "learning_rate": 5.63265306122449e-06, "loss": 0.0005, "step": 7657 }, { "epoch": 0.49352323258361797, "grad_norm": 0.014582704541776105, "learning_rate": 5.6319369853204445e-06, "loss": 0.0001, "step": 7658 }, { "epoch": 0.49358767803054715, "grad_norm": 0.006312064480264101, "learning_rate": 5.631220909416398e-06, "loss": 0.0, "step": 7659 }, { "epoch": 0.49365212347747633, "grad_norm": 0.05966863374292694, "learning_rate": 5.630504833512352e-06, "loss": 0.0001, "step": 7660 }, { "epoch": 0.49371656892440546, "grad_norm": 0.01641082586326572, "learning_rate": 5.6297887576083065e-06, "loss": 0.0001, "step": 7661 }, { "epoch": 0.49378101437133465, "grad_norm": 0.003940255907692563, "learning_rate": 5.629072681704261e-06, "loss": 0.0, "step": 7662 }, { "epoch": 0.49384545981826383, "grad_norm": 0.006613115214583254, "learning_rate": 5.628356605800215e-06, "loss": 0.0001, "step": 7663 }, { "epoch": 0.493909905265193, "grad_norm": 0.05100384626993441, "learning_rate": 5.6276405298961686e-06, "loss": 0.0001, "step": 7664 }, { "epoch": 0.4939743507121222, "grad_norm": 0.003475097216405046, "learning_rate": 5.6269244539921246e-06, "loss": 0.0, "step": 7665 }, { "epoch": 0.4940387961590514, "grad_norm": 0.002349933148403365, "learning_rate": 5.626208378088078e-06, "loss": 0.0, "step": 7666 }, { "epoch": 0.4941032416059805, "grad_norm": 0.1691805409207571, "learning_rate": 5.625492302184032e-06, "loss": 0.0004, "step": 7667 }, { "epoch": 0.4941676870529097, "grad_norm": 0.04184740078663928, "learning_rate": 5.624776226279987e-06, "loss": 0.0001, "step": 7668 }, { "epoch": 0.4942321324998389, "grad_norm": 0.015622903452228139, "learning_rate": 5.624060150375941e-06, "loss": 0.0, "step": 7669 }, { "epoch": 0.49429657794676807, "grad_norm": 0.006198852245547676, "learning_rate": 5.623344074471895e-06, "loss": 0.0, "step": 7670 }, { "epoch": 0.49436102339369725, "grad_norm": 0.03239944533858724, "learning_rate": 5.622627998567849e-06, "loss": 0.0001, "step": 7671 }, { "epoch": 0.49442546884062644, "grad_norm": 0.23790876792287802, "learning_rate": 5.621911922663803e-06, "loss": 0.004, "step": 7672 }, { "epoch": 0.49448991428755557, "grad_norm": 0.002371005246239983, "learning_rate": 5.621195846759757e-06, "loss": 0.0, "step": 7673 }, { "epoch": 0.49455435973448475, "grad_norm": 0.00392991030615517, "learning_rate": 5.6204797708557115e-06, "loss": 0.0, "step": 7674 }, { "epoch": 0.49461880518141393, "grad_norm": 0.4288448915211329, "learning_rate": 5.619763694951665e-06, "loss": 0.0046, "step": 7675 }, { "epoch": 0.4946832506283431, "grad_norm": 0.05516306764421135, "learning_rate": 5.619047619047619e-06, "loss": 0.0003, "step": 7676 }, { "epoch": 0.4947476960752723, "grad_norm": 0.010246722793248482, "learning_rate": 5.618331543143574e-06, "loss": 0.0, "step": 7677 }, { "epoch": 0.49481214152220143, "grad_norm": 0.007946841214768584, "learning_rate": 5.617615467239528e-06, "loss": 0.0, "step": 7678 }, { "epoch": 0.4948765869691306, "grad_norm": 0.0548576853204545, "learning_rate": 5.616899391335482e-06, "loss": 0.0, "step": 7679 }, { "epoch": 0.4949410324160598, "grad_norm": 0.07455733516548389, "learning_rate": 5.616183315431436e-06, "loss": 0.0001, "step": 7680 }, { "epoch": 0.495005477862989, "grad_norm": 0.001946500852743602, "learning_rate": 5.61546723952739e-06, "loss": 0.0, "step": 7681 }, { "epoch": 0.49506992330991817, "grad_norm": 0.11966813281237758, "learning_rate": 5.614751163623344e-06, "loss": 0.0003, "step": 7682 }, { "epoch": 0.49513436875684735, "grad_norm": 0.22977987073403688, "learning_rate": 5.6140350877192985e-06, "loss": 0.0012, "step": 7683 }, { "epoch": 0.4951988142037765, "grad_norm": 0.004786080663721793, "learning_rate": 5.613319011815253e-06, "loss": 0.0, "step": 7684 }, { "epoch": 0.49526325965070567, "grad_norm": 0.3058660284807045, "learning_rate": 5.612602935911206e-06, "loss": 0.0025, "step": 7685 }, { "epoch": 0.49532770509763485, "grad_norm": 0.0006387131833626681, "learning_rate": 5.611886860007161e-06, "loss": 0.0, "step": 7686 }, { "epoch": 0.49539215054456404, "grad_norm": 0.007263654905943296, "learning_rate": 5.611170784103115e-06, "loss": 0.0, "step": 7687 }, { "epoch": 0.4954565959914932, "grad_norm": 0.00047506028153626915, "learning_rate": 5.610454708199069e-06, "loss": 0.0, "step": 7688 }, { "epoch": 0.49552104143842235, "grad_norm": 0.0012289411413441314, "learning_rate": 5.609738632295024e-06, "loss": 0.0, "step": 7689 }, { "epoch": 0.49558548688535153, "grad_norm": 0.040993396378150186, "learning_rate": 5.609022556390979e-06, "loss": 0.0001, "step": 7690 }, { "epoch": 0.4956499323322807, "grad_norm": 0.0013355882791165105, "learning_rate": 5.608306480486932e-06, "loss": 0.0, "step": 7691 }, { "epoch": 0.4957143777792099, "grad_norm": 0.010719311534290885, "learning_rate": 5.607590404582886e-06, "loss": 0.0, "step": 7692 }, { "epoch": 0.4957788232261391, "grad_norm": 0.0038407025845055135, "learning_rate": 5.606874328678841e-06, "loss": 0.0, "step": 7693 }, { "epoch": 0.49584326867306827, "grad_norm": 0.4340892057394482, "learning_rate": 5.606158252774795e-06, "loss": 0.0044, "step": 7694 }, { "epoch": 0.4959077141199974, "grad_norm": 0.005255564610214897, "learning_rate": 5.605442176870749e-06, "loss": 0.0, "step": 7695 }, { "epoch": 0.4959721595669266, "grad_norm": 0.018044455183344257, "learning_rate": 5.604726100966703e-06, "loss": 0.0002, "step": 7696 }, { "epoch": 0.49603660501385577, "grad_norm": 0.0006821521166229848, "learning_rate": 5.604010025062657e-06, "loss": 0.0, "step": 7697 }, { "epoch": 0.49610105046078495, "grad_norm": 0.12202850007171458, "learning_rate": 5.603293949158611e-06, "loss": 0.0002, "step": 7698 }, { "epoch": 0.49616549590771414, "grad_norm": 0.036351122742084956, "learning_rate": 5.602577873254566e-06, "loss": 0.0, "step": 7699 }, { "epoch": 0.49622994135464327, "grad_norm": 0.00016657967509917158, "learning_rate": 5.60186179735052e-06, "loss": 0.0, "step": 7700 }, { "epoch": 0.49629438680157245, "grad_norm": 0.1313316661645494, "learning_rate": 5.601145721446473e-06, "loss": 0.0003, "step": 7701 }, { "epoch": 0.49635883224850164, "grad_norm": 0.5427525590777412, "learning_rate": 5.600429645542428e-06, "loss": 0.001, "step": 7702 }, { "epoch": 0.4964232776954308, "grad_norm": 0.004619553697276954, "learning_rate": 5.599713569638382e-06, "loss": 0.0, "step": 7703 }, { "epoch": 0.49648772314236, "grad_norm": 0.12349671304702227, "learning_rate": 5.598997493734336e-06, "loss": 0.0016, "step": 7704 }, { "epoch": 0.4965521685892892, "grad_norm": 0.001022775411367935, "learning_rate": 5.5982814178302906e-06, "loss": 0.0, "step": 7705 }, { "epoch": 0.4966166140362183, "grad_norm": 0.11159717941088838, "learning_rate": 5.597565341926244e-06, "loss": 0.0002, "step": 7706 }, { "epoch": 0.4966810594831475, "grad_norm": 0.006720570718083302, "learning_rate": 5.596849266022198e-06, "loss": 0.0, "step": 7707 }, { "epoch": 0.4967455049300767, "grad_norm": 0.0001619117415859008, "learning_rate": 5.596133190118153e-06, "loss": 0.0, "step": 7708 }, { "epoch": 0.49680995037700587, "grad_norm": 1.5955664810823738, "learning_rate": 5.595417114214107e-06, "loss": 0.0081, "step": 7709 }, { "epoch": 0.49687439582393506, "grad_norm": 0.008624999726623077, "learning_rate": 5.594701038310061e-06, "loss": 0.0, "step": 7710 }, { "epoch": 0.49693884127086424, "grad_norm": 0.011486251104675875, "learning_rate": 5.593984962406015e-06, "loss": 0.0, "step": 7711 }, { "epoch": 0.49700328671779337, "grad_norm": 0.2950930319796222, "learning_rate": 5.59326888650197e-06, "loss": 0.0009, "step": 7712 }, { "epoch": 0.49706773216472255, "grad_norm": 0.0014471563901175158, "learning_rate": 5.592552810597924e-06, "loss": 0.0, "step": 7713 }, { "epoch": 0.49713217761165174, "grad_norm": 0.0038007229908679743, "learning_rate": 5.591836734693878e-06, "loss": 0.0, "step": 7714 }, { "epoch": 0.4971966230585809, "grad_norm": 0.0018911992897892018, "learning_rate": 5.591120658789833e-06, "loss": 0.0, "step": 7715 }, { "epoch": 0.4972610685055101, "grad_norm": 0.017360700248121624, "learning_rate": 5.590404582885787e-06, "loss": 0.0, "step": 7716 }, { "epoch": 0.49732551395243924, "grad_norm": 0.000842689886037626, "learning_rate": 5.5896885069817405e-06, "loss": 0.0, "step": 7717 }, { "epoch": 0.4973899593993684, "grad_norm": 0.10780282530014913, "learning_rate": 5.588972431077695e-06, "loss": 0.0002, "step": 7718 }, { "epoch": 0.4974544048462976, "grad_norm": 0.009681021889705044, "learning_rate": 5.588256355173649e-06, "loss": 0.0001, "step": 7719 }, { "epoch": 0.4975188502932268, "grad_norm": 0.0017426617240457378, "learning_rate": 5.587540279269603e-06, "loss": 0.0, "step": 7720 }, { "epoch": 0.497583295740156, "grad_norm": 1.5181758690231042, "learning_rate": 5.586824203365558e-06, "loss": 0.0113, "step": 7721 }, { "epoch": 0.49764774118708516, "grad_norm": 0.006614844277389507, "learning_rate": 5.586108127461511e-06, "loss": 0.0001, "step": 7722 }, { "epoch": 0.4977121866340143, "grad_norm": 0.2634574442064215, "learning_rate": 5.585392051557465e-06, "loss": 0.0012, "step": 7723 }, { "epoch": 0.49777663208094347, "grad_norm": 0.26991512770438303, "learning_rate": 5.58467597565342e-06, "loss": 0.0023, "step": 7724 }, { "epoch": 0.49784107752787266, "grad_norm": 0.009235564462089122, "learning_rate": 5.583959899749374e-06, "loss": 0.0, "step": 7725 }, { "epoch": 0.49790552297480184, "grad_norm": 0.2566201641688113, "learning_rate": 5.583243823845328e-06, "loss": 0.0005, "step": 7726 }, { "epoch": 0.497969968421731, "grad_norm": 0.005090404771266792, "learning_rate": 5.582527747941282e-06, "loss": 0.0, "step": 7727 }, { "epoch": 0.49803441386866015, "grad_norm": 0.27343225474095073, "learning_rate": 5.581811672037236e-06, "loss": 0.002, "step": 7728 }, { "epoch": 0.49809885931558934, "grad_norm": 0.031385309490975594, "learning_rate": 5.58109559613319e-06, "loss": 0.0, "step": 7729 }, { "epoch": 0.4981633047625185, "grad_norm": 0.023604299895657106, "learning_rate": 5.580379520229145e-06, "loss": 0.0, "step": 7730 }, { "epoch": 0.4982277502094477, "grad_norm": 0.010524595936297206, "learning_rate": 5.579663444325099e-06, "loss": 0.0, "step": 7731 }, { "epoch": 0.4982921956563769, "grad_norm": 0.08313716178764882, "learning_rate": 5.578947368421052e-06, "loss": 0.0001, "step": 7732 }, { "epoch": 0.4983566411033061, "grad_norm": 0.20455374205723056, "learning_rate": 5.578231292517007e-06, "loss": 0.0002, "step": 7733 }, { "epoch": 0.4984210865502352, "grad_norm": 0.006954444476851246, "learning_rate": 5.577515216612961e-06, "loss": 0.0, "step": 7734 }, { "epoch": 0.4984855319971644, "grad_norm": 0.1379288863464268, "learning_rate": 5.576799140708916e-06, "loss": 0.0012, "step": 7735 }, { "epoch": 0.4985499774440936, "grad_norm": 0.22782729888330902, "learning_rate": 5.5760830648048704e-06, "loss": 0.0003, "step": 7736 }, { "epoch": 0.49861442289102276, "grad_norm": 0.004750422647156393, "learning_rate": 5.575366988900825e-06, "loss": 0.0, "step": 7737 }, { "epoch": 0.49867886833795194, "grad_norm": 0.02539140787201507, "learning_rate": 5.574650912996778e-06, "loss": 0.0, "step": 7738 }, { "epoch": 0.49874331378488107, "grad_norm": 0.0074664136120375325, "learning_rate": 5.5739348370927325e-06, "loss": 0.0, "step": 7739 }, { "epoch": 0.49880775923181025, "grad_norm": 0.040856026752530014, "learning_rate": 5.573218761188687e-06, "loss": 0.0005, "step": 7740 }, { "epoch": 0.49887220467873944, "grad_norm": 0.023396444870685918, "learning_rate": 5.572502685284641e-06, "loss": 0.0016, "step": 7741 }, { "epoch": 0.4989366501256686, "grad_norm": 0.003881107778788916, "learning_rate": 5.571786609380595e-06, "loss": 0.0, "step": 7742 }, { "epoch": 0.4990010955725978, "grad_norm": 0.004024760296975562, "learning_rate": 5.571070533476549e-06, "loss": 0.0, "step": 7743 }, { "epoch": 0.499065541019527, "grad_norm": 0.005052507845867952, "learning_rate": 5.570354457572503e-06, "loss": 0.0001, "step": 7744 }, { "epoch": 0.4991299864664561, "grad_norm": 0.006618753422869714, "learning_rate": 5.5696383816684574e-06, "loss": 0.0, "step": 7745 }, { "epoch": 0.4991944319133853, "grad_norm": 0.013298532013776608, "learning_rate": 5.568922305764412e-06, "loss": 0.0, "step": 7746 }, { "epoch": 0.4992588773603145, "grad_norm": 0.008402784516505377, "learning_rate": 5.568206229860366e-06, "loss": 0.0, "step": 7747 }, { "epoch": 0.4993233228072437, "grad_norm": 0.028681718139751507, "learning_rate": 5.5674901539563195e-06, "loss": 0.0, "step": 7748 }, { "epoch": 0.49938776825417286, "grad_norm": 0.0004790018685773785, "learning_rate": 5.566774078052274e-06, "loss": 0.0, "step": 7749 }, { "epoch": 0.49945221370110204, "grad_norm": 0.00017217928028040734, "learning_rate": 5.566058002148228e-06, "loss": 0.0, "step": 7750 }, { "epoch": 0.4995166591480312, "grad_norm": 0.004187983856702524, "learning_rate": 5.565341926244182e-06, "loss": 0.0, "step": 7751 }, { "epoch": 0.49958110459496036, "grad_norm": 0.00661046670208516, "learning_rate": 5.564625850340136e-06, "loss": 0.0001, "step": 7752 }, { "epoch": 0.49964555004188954, "grad_norm": 0.0028452297455472866, "learning_rate": 5.56390977443609e-06, "loss": 0.0, "step": 7753 }, { "epoch": 0.4997099954888187, "grad_norm": 0.08567421038959883, "learning_rate": 5.5631936985320444e-06, "loss": 0.0003, "step": 7754 }, { "epoch": 0.4997744409357479, "grad_norm": 0.000638038899302093, "learning_rate": 5.562477622627999e-06, "loss": 0.0, "step": 7755 }, { "epoch": 0.49983888638267704, "grad_norm": 0.00032599765353913423, "learning_rate": 5.561761546723953e-06, "loss": 0.0, "step": 7756 }, { "epoch": 0.4999033318296062, "grad_norm": 0.005907647457967332, "learning_rate": 5.5610454708199065e-06, "loss": 0.0, "step": 7757 }, { "epoch": 0.4999677772765354, "grad_norm": 0.005853467747557248, "learning_rate": 5.560329394915861e-06, "loss": 0.0, "step": 7758 }, { "epoch": 0.5000322227234646, "grad_norm": 0.001166157403481343, "learning_rate": 5.559613319011816e-06, "loss": 0.0, "step": 7759 }, { "epoch": 0.5000966681703938, "grad_norm": 0.006691685794746546, "learning_rate": 5.55889724310777e-06, "loss": 0.0, "step": 7760 }, { "epoch": 0.500161113617323, "grad_norm": 0.00032993997002244194, "learning_rate": 5.5581811672037245e-06, "loss": 0.0, "step": 7761 }, { "epoch": 0.5002255590642521, "grad_norm": 0.010774148663307965, "learning_rate": 5.557465091299679e-06, "loss": 0.0001, "step": 7762 }, { "epoch": 0.5002900045111813, "grad_norm": 0.04316844108864388, "learning_rate": 5.556749015395633e-06, "loss": 0.0001, "step": 7763 }, { "epoch": 0.5003544499581105, "grad_norm": 0.1035563865516481, "learning_rate": 5.5560329394915866e-06, "loss": 0.0002, "step": 7764 }, { "epoch": 0.5004188954050396, "grad_norm": 0.0013261507056000824, "learning_rate": 5.555316863587541e-06, "loss": 0.0, "step": 7765 }, { "epoch": 0.5004833408519688, "grad_norm": 0.012714541640589294, "learning_rate": 5.554600787683495e-06, "loss": 0.0, "step": 7766 }, { "epoch": 0.500547786298898, "grad_norm": 0.0031739563583639747, "learning_rate": 5.5538847117794495e-06, "loss": 0.0, "step": 7767 }, { "epoch": 0.5006122317458271, "grad_norm": 0.11012735207591887, "learning_rate": 5.553168635875403e-06, "loss": 0.0001, "step": 7768 }, { "epoch": 0.5006766771927563, "grad_norm": 0.015187950687640926, "learning_rate": 5.552452559971357e-06, "loss": 0.0001, "step": 7769 }, { "epoch": 0.5007411226396855, "grad_norm": 0.013135091208567084, "learning_rate": 5.5517364840673115e-06, "loss": 0.0, "step": 7770 }, { "epoch": 0.5008055680866147, "grad_norm": 0.2000327962569176, "learning_rate": 5.551020408163266e-06, "loss": 0.0003, "step": 7771 }, { "epoch": 0.5008700135335439, "grad_norm": 0.00024363627308754418, "learning_rate": 5.55030433225922e-06, "loss": 0.0, "step": 7772 }, { "epoch": 0.5009344589804731, "grad_norm": 4.0486426170484195e-05, "learning_rate": 5.5495882563551736e-06, "loss": 0.0, "step": 7773 }, { "epoch": 0.5009989044274022, "grad_norm": 0.0044095567704248275, "learning_rate": 5.548872180451128e-06, "loss": 0.0, "step": 7774 }, { "epoch": 0.5010633498743314, "grad_norm": 0.06624869983000084, "learning_rate": 5.548156104547082e-06, "loss": 0.0001, "step": 7775 }, { "epoch": 0.5011277953212605, "grad_norm": 0.26731744305720906, "learning_rate": 5.5474400286430365e-06, "loss": 0.0012, "step": 7776 }, { "epoch": 0.5011922407681897, "grad_norm": 0.0031713670464031084, "learning_rate": 5.546723952738991e-06, "loss": 0.0, "step": 7777 }, { "epoch": 0.5012566862151189, "grad_norm": 0.10094297798692436, "learning_rate": 5.546007876834944e-06, "loss": 0.0002, "step": 7778 }, { "epoch": 0.5013211316620481, "grad_norm": 0.0006766827932490896, "learning_rate": 5.5452918009308985e-06, "loss": 0.0, "step": 7779 }, { "epoch": 0.5013855771089772, "grad_norm": 0.006504950923653288, "learning_rate": 5.544575725026853e-06, "loss": 0.0, "step": 7780 }, { "epoch": 0.5014500225559064, "grad_norm": 0.002359928133011971, "learning_rate": 5.543859649122807e-06, "loss": 0.0, "step": 7781 }, { "epoch": 0.5015144680028356, "grad_norm": 0.001259427859195719, "learning_rate": 5.543143573218762e-06, "loss": 0.0, "step": 7782 }, { "epoch": 0.5015789134497648, "grad_norm": 4.103638976019595e-05, "learning_rate": 5.5424274973147166e-06, "loss": 0.0, "step": 7783 }, { "epoch": 0.501643358896694, "grad_norm": 0.008854051233046384, "learning_rate": 5.54171142141067e-06, "loss": 0.0, "step": 7784 }, { "epoch": 0.5017078043436232, "grad_norm": 0.0033936376934568737, "learning_rate": 5.540995345506624e-06, "loss": 0.0, "step": 7785 }, { "epoch": 0.5017722497905523, "grad_norm": 0.023507155038777785, "learning_rate": 5.540279269602579e-06, "loss": 0.0, "step": 7786 }, { "epoch": 0.5018366952374814, "grad_norm": 0.005942011229836717, "learning_rate": 5.539563193698533e-06, "loss": 0.0, "step": 7787 }, { "epoch": 0.5019011406844106, "grad_norm": 0.02232280920114333, "learning_rate": 5.538847117794487e-06, "loss": 0.0001, "step": 7788 }, { "epoch": 0.5019655861313398, "grad_norm": 0.0016590937530407013, "learning_rate": 5.538131041890441e-06, "loss": 0.0, "step": 7789 }, { "epoch": 0.502030031578269, "grad_norm": 0.011643822116127455, "learning_rate": 5.537414965986395e-06, "loss": 0.0001, "step": 7790 }, { "epoch": 0.5020944770251982, "grad_norm": 0.0005359369657540056, "learning_rate": 5.536698890082349e-06, "loss": 0.0, "step": 7791 }, { "epoch": 0.5021589224721273, "grad_norm": 0.06418886703555608, "learning_rate": 5.5359828141783035e-06, "loss": 0.0, "step": 7792 }, { "epoch": 0.5022233679190565, "grad_norm": 0.0025205363208524005, "learning_rate": 5.535266738274258e-06, "loss": 0.0, "step": 7793 }, { "epoch": 0.5022878133659857, "grad_norm": 0.47469825069465993, "learning_rate": 5.534550662370211e-06, "loss": 0.0027, "step": 7794 }, { "epoch": 0.5023522588129149, "grad_norm": 0.0067098508005370774, "learning_rate": 5.533834586466166e-06, "loss": 0.0, "step": 7795 }, { "epoch": 0.5024167042598441, "grad_norm": 7.736586171454487e-05, "learning_rate": 5.53311851056212e-06, "loss": 0.0, "step": 7796 }, { "epoch": 0.5024811497067733, "grad_norm": 0.0212810879829306, "learning_rate": 5.532402434658074e-06, "loss": 0.0, "step": 7797 }, { "epoch": 0.5025455951537023, "grad_norm": 0.027915426011045134, "learning_rate": 5.5316863587540285e-06, "loss": 0.0001, "step": 7798 }, { "epoch": 0.5026100406006315, "grad_norm": 0.0005632651672147035, "learning_rate": 5.530970282849982e-06, "loss": 0.0, "step": 7799 }, { "epoch": 0.5026744860475607, "grad_norm": 0.20493163371728026, "learning_rate": 5.530254206945936e-06, "loss": 0.0008, "step": 7800 }, { "epoch": 0.5027389314944899, "grad_norm": 0.015775780970922863, "learning_rate": 5.5295381310418905e-06, "loss": 0.0, "step": 7801 }, { "epoch": 0.5028033769414191, "grad_norm": 0.06814150491643632, "learning_rate": 5.528822055137845e-06, "loss": 0.0001, "step": 7802 }, { "epoch": 0.5028678223883483, "grad_norm": 0.18345450132435454, "learning_rate": 5.528105979233799e-06, "loss": 0.0005, "step": 7803 }, { "epoch": 0.5029322678352774, "grad_norm": 0.008875562764333902, "learning_rate": 5.527389903329753e-06, "loss": 0.0001, "step": 7804 }, { "epoch": 0.5029967132822066, "grad_norm": 0.00011833865029197304, "learning_rate": 5.526673827425708e-06, "loss": 0.0, "step": 7805 }, { "epoch": 0.5030611587291358, "grad_norm": 0.001142839209205189, "learning_rate": 5.525957751521662e-06, "loss": 0.0, "step": 7806 }, { "epoch": 0.503125604176065, "grad_norm": 0.0007741564340708393, "learning_rate": 5.525241675617616e-06, "loss": 0.0, "step": 7807 }, { "epoch": 0.5031900496229942, "grad_norm": 0.06842654198181584, "learning_rate": 5.524525599713571e-06, "loss": 0.0002, "step": 7808 }, { "epoch": 0.5032544950699233, "grad_norm": 0.0018370367967294632, "learning_rate": 5.523809523809525e-06, "loss": 0.0, "step": 7809 }, { "epoch": 0.5033189405168524, "grad_norm": 0.0042387088958715, "learning_rate": 5.523093447905478e-06, "loss": 0.0, "step": 7810 }, { "epoch": 0.5033833859637816, "grad_norm": 0.0014034877772028946, "learning_rate": 5.522377372001433e-06, "loss": 0.0, "step": 7811 }, { "epoch": 0.5034478314107108, "grad_norm": 0.047558174581854164, "learning_rate": 5.521661296097387e-06, "loss": 0.0003, "step": 7812 }, { "epoch": 0.50351227685764, "grad_norm": 0.11587947569488101, "learning_rate": 5.520945220193341e-06, "loss": 0.0001, "step": 7813 }, { "epoch": 0.5035767223045692, "grad_norm": 0.0008404481830644785, "learning_rate": 5.520229144289296e-06, "loss": 0.0, "step": 7814 }, { "epoch": 0.5036411677514984, "grad_norm": 0.027745073397463797, "learning_rate": 5.519513068385249e-06, "loss": 0.0002, "step": 7815 }, { "epoch": 0.5037056131984275, "grad_norm": 0.0015664983895771825, "learning_rate": 5.518796992481203e-06, "loss": 0.0, "step": 7816 }, { "epoch": 0.5037700586453567, "grad_norm": 4.358205768194176e-05, "learning_rate": 5.518080916577158e-06, "loss": 0.0, "step": 7817 }, { "epoch": 0.5038345040922859, "grad_norm": 0.7133459022193046, "learning_rate": 5.517364840673112e-06, "loss": 0.0042, "step": 7818 }, { "epoch": 0.5038989495392151, "grad_norm": 0.00675914167301981, "learning_rate": 5.516648764769066e-06, "loss": 0.0, "step": 7819 }, { "epoch": 0.5039633949861442, "grad_norm": 0.4145019286793012, "learning_rate": 5.51593268886502e-06, "loss": 0.0018, "step": 7820 }, { "epoch": 0.5040278404330734, "grad_norm": 0.0005312810452736112, "learning_rate": 5.515216612960974e-06, "loss": 0.0, "step": 7821 }, { "epoch": 0.5040922858800025, "grad_norm": 0.24971380839961985, "learning_rate": 5.514500537056928e-06, "loss": 0.0043, "step": 7822 }, { "epoch": 0.5041567313269317, "grad_norm": 0.11865125663479083, "learning_rate": 5.5137844611528826e-06, "loss": 0.0014, "step": 7823 }, { "epoch": 0.5042211767738609, "grad_norm": 0.005578891068497698, "learning_rate": 5.513068385248837e-06, "loss": 0.0, "step": 7824 }, { "epoch": 0.5042856222207901, "grad_norm": 0.00012931750021159341, "learning_rate": 5.51235230934479e-06, "loss": 0.0, "step": 7825 }, { "epoch": 0.5043500676677193, "grad_norm": 0.6063908845861173, "learning_rate": 5.511636233440745e-06, "loss": 0.0016, "step": 7826 }, { "epoch": 0.5044145131146485, "grad_norm": 0.00775264018639695, "learning_rate": 5.510920157536699e-06, "loss": 0.0, "step": 7827 }, { "epoch": 0.5044789585615777, "grad_norm": 0.1251495777689951, "learning_rate": 5.510204081632653e-06, "loss": 0.0017, "step": 7828 }, { "epoch": 0.5045434040085068, "grad_norm": 0.08034870037365004, "learning_rate": 5.509488005728608e-06, "loss": 0.0001, "step": 7829 }, { "epoch": 0.504607849455436, "grad_norm": 0.18931312894291646, "learning_rate": 5.508771929824563e-06, "loss": 0.001, "step": 7830 }, { "epoch": 0.5046722949023651, "grad_norm": 0.0001198111221057338, "learning_rate": 5.508055853920516e-06, "loss": 0.0, "step": 7831 }, { "epoch": 0.5047367403492943, "grad_norm": 0.0008712099359854654, "learning_rate": 5.50733977801647e-06, "loss": 0.0, "step": 7832 }, { "epoch": 0.5048011857962235, "grad_norm": 0.024833433088498218, "learning_rate": 5.506623702112425e-06, "loss": 0.0002, "step": 7833 }, { "epoch": 0.5048656312431526, "grad_norm": 0.0011160004670191623, "learning_rate": 5.505907626208379e-06, "loss": 0.0, "step": 7834 }, { "epoch": 0.5049300766900818, "grad_norm": 0.2044019086283718, "learning_rate": 5.505191550304333e-06, "loss": 0.0021, "step": 7835 }, { "epoch": 0.504994522137011, "grad_norm": 0.0008533538186981164, "learning_rate": 5.504475474400287e-06, "loss": 0.0, "step": 7836 }, { "epoch": 0.5050589675839402, "grad_norm": 0.000822287982651814, "learning_rate": 5.503759398496241e-06, "loss": 0.0, "step": 7837 }, { "epoch": 0.5051234130308694, "grad_norm": 0.000461616928902722, "learning_rate": 5.503043322592195e-06, "loss": 0.0, "step": 7838 }, { "epoch": 0.5051878584777986, "grad_norm": 0.0006680393711834995, "learning_rate": 5.50232724668815e-06, "loss": 0.0, "step": 7839 }, { "epoch": 0.5052523039247278, "grad_norm": 8.234770240082646e-05, "learning_rate": 5.501611170784104e-06, "loss": 0.0, "step": 7840 }, { "epoch": 0.5053167493716569, "grad_norm": 0.011275463051318033, "learning_rate": 5.500895094880057e-06, "loss": 0.0001, "step": 7841 }, { "epoch": 0.5053811948185861, "grad_norm": 8.605929278122782e-05, "learning_rate": 5.500179018976012e-06, "loss": 0.0, "step": 7842 }, { "epoch": 0.5054456402655152, "grad_norm": 0.678472155824014, "learning_rate": 5.499462943071966e-06, "loss": 0.0057, "step": 7843 }, { "epoch": 0.5055100857124444, "grad_norm": 0.0004380668417542172, "learning_rate": 5.49874686716792e-06, "loss": 0.0, "step": 7844 }, { "epoch": 0.5055745311593736, "grad_norm": 0.00023279857462329666, "learning_rate": 5.498030791263875e-06, "loss": 0.0, "step": 7845 }, { "epoch": 0.5056389766063027, "grad_norm": 0.351107005576601, "learning_rate": 5.497314715359828e-06, "loss": 0.0008, "step": 7846 }, { "epoch": 0.5057034220532319, "grad_norm": 0.014867512331617283, "learning_rate": 5.496598639455782e-06, "loss": 0.0, "step": 7847 }, { "epoch": 0.5057678675001611, "grad_norm": 0.00021601504494369152, "learning_rate": 5.495882563551737e-06, "loss": 0.0, "step": 7848 }, { "epoch": 0.5058323129470903, "grad_norm": 0.000571257955598517, "learning_rate": 5.495166487647691e-06, "loss": 0.0, "step": 7849 }, { "epoch": 0.5058967583940195, "grad_norm": 0.10286979470954646, "learning_rate": 5.494450411743644e-06, "loss": 0.0003, "step": 7850 }, { "epoch": 0.5059612038409487, "grad_norm": 0.011815902087395896, "learning_rate": 5.493734335839599e-06, "loss": 0.0001, "step": 7851 }, { "epoch": 0.5060256492878779, "grad_norm": 0.004256208408072257, "learning_rate": 5.493018259935554e-06, "loss": 0.0, "step": 7852 }, { "epoch": 0.506090094734807, "grad_norm": 0.0010554367991475037, "learning_rate": 5.492302184031508e-06, "loss": 0.0, "step": 7853 }, { "epoch": 0.5061545401817361, "grad_norm": 0.0008940419510676588, "learning_rate": 5.4915861081274624e-06, "loss": 0.0, "step": 7854 }, { "epoch": 0.5062189856286653, "grad_norm": 0.15635816883053835, "learning_rate": 5.490870032223417e-06, "loss": 0.0007, "step": 7855 }, { "epoch": 0.5062834310755945, "grad_norm": 0.002856546237428052, "learning_rate": 5.490153956319371e-06, "loss": 0.0, "step": 7856 }, { "epoch": 0.5063478765225237, "grad_norm": 0.13532431522261668, "learning_rate": 5.4894378804153245e-06, "loss": 0.0003, "step": 7857 }, { "epoch": 0.5064123219694528, "grad_norm": 0.8924651293527359, "learning_rate": 5.488721804511279e-06, "loss": 0.0041, "step": 7858 }, { "epoch": 0.506476767416382, "grad_norm": 0.0003431231341416375, "learning_rate": 5.488005728607233e-06, "loss": 0.0, "step": 7859 }, { "epoch": 0.5065412128633112, "grad_norm": 0.14257876513611784, "learning_rate": 5.487289652703187e-06, "loss": 0.0004, "step": 7860 }, { "epoch": 0.5066056583102404, "grad_norm": 0.5874093706149864, "learning_rate": 5.486573576799142e-06, "loss": 0.0022, "step": 7861 }, { "epoch": 0.5066701037571696, "grad_norm": 0.0017878685428287893, "learning_rate": 5.485857500895095e-06, "loss": 0.0, "step": 7862 }, { "epoch": 0.5067345492040988, "grad_norm": 0.041144057837388136, "learning_rate": 5.4851414249910494e-06, "loss": 0.0004, "step": 7863 }, { "epoch": 0.506798994651028, "grad_norm": 0.003932462138094369, "learning_rate": 5.484425349087004e-06, "loss": 0.0, "step": 7864 }, { "epoch": 0.506863440097957, "grad_norm": 0.0006095626648205814, "learning_rate": 5.483709273182958e-06, "loss": 0.0, "step": 7865 }, { "epoch": 0.5069278855448862, "grad_norm": 0.0048959003447374555, "learning_rate": 5.4829931972789115e-06, "loss": 0.0, "step": 7866 }, { "epoch": 0.5069923309918154, "grad_norm": 0.018113076815429438, "learning_rate": 5.482277121374866e-06, "loss": 0.0001, "step": 7867 }, { "epoch": 0.5070567764387446, "grad_norm": 0.010513576606503637, "learning_rate": 5.48156104547082e-06, "loss": 0.0, "step": 7868 }, { "epoch": 0.5071212218856738, "grad_norm": 0.003912950009431698, "learning_rate": 5.480844969566774e-06, "loss": 0.0, "step": 7869 }, { "epoch": 0.507185667332603, "grad_norm": 0.0023382469396162698, "learning_rate": 5.480128893662729e-06, "loss": 0.0, "step": 7870 }, { "epoch": 0.5072501127795321, "grad_norm": 0.021264051917344883, "learning_rate": 5.479412817758682e-06, "loss": 0.0, "step": 7871 }, { "epoch": 0.5073145582264613, "grad_norm": 0.04142688589003656, "learning_rate": 5.4786967418546364e-06, "loss": 0.0001, "step": 7872 }, { "epoch": 0.5073790036733905, "grad_norm": 0.34118988635976827, "learning_rate": 5.477980665950591e-06, "loss": 0.0026, "step": 7873 }, { "epoch": 0.5074434491203197, "grad_norm": 0.1676696231890971, "learning_rate": 5.477264590046545e-06, "loss": 0.0003, "step": 7874 }, { "epoch": 0.5075078945672489, "grad_norm": 0.07809003405108669, "learning_rate": 5.4765485141425e-06, "loss": 0.0005, "step": 7875 }, { "epoch": 0.507572340014178, "grad_norm": 0.016567306202172546, "learning_rate": 5.4758324382384545e-06, "loss": 0.0001, "step": 7876 }, { "epoch": 0.5076367854611071, "grad_norm": 0.7576097826830628, "learning_rate": 5.475116362334409e-06, "loss": 0.0029, "step": 7877 }, { "epoch": 0.5077012309080363, "grad_norm": 0.14010409729777418, "learning_rate": 5.474400286430362e-06, "loss": 0.0001, "step": 7878 }, { "epoch": 0.5077656763549655, "grad_norm": 0.005552410463870406, "learning_rate": 5.4736842105263165e-06, "loss": 0.0, "step": 7879 }, { "epoch": 0.5078301218018947, "grad_norm": 0.006620169711987851, "learning_rate": 5.472968134622271e-06, "loss": 0.0, "step": 7880 }, { "epoch": 0.5078945672488239, "grad_norm": 1.0878805985322575, "learning_rate": 5.472252058718225e-06, "loss": 0.0043, "step": 7881 }, { "epoch": 0.507959012695753, "grad_norm": 0.003664526663129976, "learning_rate": 5.4715359828141786e-06, "loss": 0.0, "step": 7882 }, { "epoch": 0.5080234581426822, "grad_norm": 0.051065456594272696, "learning_rate": 5.470819906910133e-06, "loss": 0.0001, "step": 7883 }, { "epoch": 0.5080879035896114, "grad_norm": 0.0010883020255493963, "learning_rate": 5.470103831006087e-06, "loss": 0.0, "step": 7884 }, { "epoch": 0.5081523490365406, "grad_norm": 0.3437544432266218, "learning_rate": 5.4693877551020415e-06, "loss": 0.0007, "step": 7885 }, { "epoch": 0.5082167944834698, "grad_norm": 0.09143956119452665, "learning_rate": 5.468671679197996e-06, "loss": 0.0001, "step": 7886 }, { "epoch": 0.5082812399303989, "grad_norm": 0.0005676533329750051, "learning_rate": 5.467955603293949e-06, "loss": 0.0, "step": 7887 }, { "epoch": 0.508345685377328, "grad_norm": 0.14704188747214672, "learning_rate": 5.4672395273899035e-06, "loss": 0.0001, "step": 7888 }, { "epoch": 0.5084101308242572, "grad_norm": 0.0025846929900272945, "learning_rate": 5.466523451485858e-06, "loss": 0.0, "step": 7889 }, { "epoch": 0.5084745762711864, "grad_norm": 0.009612976558434051, "learning_rate": 5.465807375581812e-06, "loss": 0.0, "step": 7890 }, { "epoch": 0.5085390217181156, "grad_norm": 0.2334955803368132, "learning_rate": 5.465091299677766e-06, "loss": 0.0007, "step": 7891 }, { "epoch": 0.5086034671650448, "grad_norm": 0.039201922748705605, "learning_rate": 5.46437522377372e-06, "loss": 0.0001, "step": 7892 }, { "epoch": 0.508667912611974, "grad_norm": 0.06696498251734098, "learning_rate": 5.463659147869674e-06, "loss": 0.0001, "step": 7893 }, { "epoch": 0.5087323580589032, "grad_norm": 0.020123397663725916, "learning_rate": 5.4629430719656285e-06, "loss": 0.0001, "step": 7894 }, { "epoch": 0.5087968035058323, "grad_norm": 0.010950162090299425, "learning_rate": 5.462226996061583e-06, "loss": 0.0001, "step": 7895 }, { "epoch": 0.5088612489527615, "grad_norm": 0.005105569536457743, "learning_rate": 5.461510920157537e-06, "loss": 0.0, "step": 7896 }, { "epoch": 0.5089256943996907, "grad_norm": 0.0021593518982048537, "learning_rate": 5.4607948442534905e-06, "loss": 0.0, "step": 7897 }, { "epoch": 0.5089901398466198, "grad_norm": 0.007026752353424155, "learning_rate": 5.460078768349445e-06, "loss": 0.0, "step": 7898 }, { "epoch": 0.509054585293549, "grad_norm": 0.0012650142913674938, "learning_rate": 5.4593626924454e-06, "loss": 0.0, "step": 7899 }, { "epoch": 0.5091190307404782, "grad_norm": 0.014156076340900966, "learning_rate": 5.458646616541354e-06, "loss": 0.0001, "step": 7900 }, { "epoch": 0.5091834761874073, "grad_norm": 0.17339007764593933, "learning_rate": 5.4579305406373086e-06, "loss": 0.0008, "step": 7901 }, { "epoch": 0.5092479216343365, "grad_norm": 0.0112315500039506, "learning_rate": 5.457214464733263e-06, "loss": 0.0, "step": 7902 }, { "epoch": 0.5093123670812657, "grad_norm": 0.01594386663439557, "learning_rate": 5.456498388829216e-06, "loss": 0.0002, "step": 7903 }, { "epoch": 0.5093768125281949, "grad_norm": 0.024997468723359482, "learning_rate": 5.455782312925171e-06, "loss": 0.0001, "step": 7904 }, { "epoch": 0.5094412579751241, "grad_norm": 0.005159837593019308, "learning_rate": 5.455066237021125e-06, "loss": 0.0, "step": 7905 }, { "epoch": 0.5095057034220533, "grad_norm": 0.013976639008599155, "learning_rate": 5.454350161117079e-06, "loss": 0.0001, "step": 7906 }, { "epoch": 0.5095701488689824, "grad_norm": 1.2407112226143289, "learning_rate": 5.4536340852130335e-06, "loss": 0.0017, "step": 7907 }, { "epoch": 0.5096345943159116, "grad_norm": 0.0009534155375335451, "learning_rate": 5.452918009308987e-06, "loss": 0.0, "step": 7908 }, { "epoch": 0.5096990397628407, "grad_norm": 0.0010277082009812432, "learning_rate": 5.452201933404941e-06, "loss": 0.0, "step": 7909 }, { "epoch": 0.5097634852097699, "grad_norm": 0.012268255597643768, "learning_rate": 5.4514858575008955e-06, "loss": 0.0, "step": 7910 }, { "epoch": 0.5098279306566991, "grad_norm": 0.05539452184990833, "learning_rate": 5.45076978159685e-06, "loss": 0.0003, "step": 7911 }, { "epoch": 0.5098923761036283, "grad_norm": 0.002352447691784602, "learning_rate": 5.450053705692804e-06, "loss": 0.0, "step": 7912 }, { "epoch": 0.5099568215505574, "grad_norm": 0.000641193713048714, "learning_rate": 5.449337629788758e-06, "loss": 0.0, "step": 7913 }, { "epoch": 0.5100212669974866, "grad_norm": 0.026912041162248948, "learning_rate": 5.448621553884712e-06, "loss": 0.0001, "step": 7914 }, { "epoch": 0.5100857124444158, "grad_norm": 0.007920404774023341, "learning_rate": 5.447905477980666e-06, "loss": 0.0001, "step": 7915 }, { "epoch": 0.510150157891345, "grad_norm": 0.252571013657588, "learning_rate": 5.4471894020766205e-06, "loss": 0.0003, "step": 7916 }, { "epoch": 0.5102146033382742, "grad_norm": 0.006732229568060559, "learning_rate": 5.446473326172575e-06, "loss": 0.0, "step": 7917 }, { "epoch": 0.5102790487852034, "grad_norm": 0.008280544471657686, "learning_rate": 5.445757250268528e-06, "loss": 0.0, "step": 7918 }, { "epoch": 0.5103434942321325, "grad_norm": 0.003950611251451125, "learning_rate": 5.4450411743644825e-06, "loss": 0.0, "step": 7919 }, { "epoch": 0.5104079396790617, "grad_norm": 0.20482560914732528, "learning_rate": 5.444325098460437e-06, "loss": 0.0017, "step": 7920 }, { "epoch": 0.5104723851259908, "grad_norm": 0.13401776278697505, "learning_rate": 5.443609022556391e-06, "loss": 0.0002, "step": 7921 }, { "epoch": 0.51053683057292, "grad_norm": 0.07180763760137501, "learning_rate": 5.442892946652346e-06, "loss": 0.0002, "step": 7922 }, { "epoch": 0.5106012760198492, "grad_norm": 0.015309862017110912, "learning_rate": 5.442176870748301e-06, "loss": 0.0, "step": 7923 }, { "epoch": 0.5106657214667784, "grad_norm": 0.0011142132569021373, "learning_rate": 5.441460794844254e-06, "loss": 0.0, "step": 7924 }, { "epoch": 0.5107301669137075, "grad_norm": 0.0024562087064645146, "learning_rate": 5.440744718940208e-06, "loss": 0.0, "step": 7925 }, { "epoch": 0.5107946123606367, "grad_norm": 0.0013992305215337778, "learning_rate": 5.440028643036163e-06, "loss": 0.0, "step": 7926 }, { "epoch": 0.5108590578075659, "grad_norm": 0.0032199139412421437, "learning_rate": 5.439312567132117e-06, "loss": 0.0, "step": 7927 }, { "epoch": 0.5109235032544951, "grad_norm": 0.23834775559403137, "learning_rate": 5.438596491228071e-06, "loss": 0.0019, "step": 7928 }, { "epoch": 0.5109879487014243, "grad_norm": 0.0745276654029238, "learning_rate": 5.437880415324025e-06, "loss": 0.0001, "step": 7929 }, { "epoch": 0.5110523941483535, "grad_norm": 0.004944950771004873, "learning_rate": 5.437164339419979e-06, "loss": 0.0, "step": 7930 }, { "epoch": 0.5111168395952826, "grad_norm": 0.00038942084221351607, "learning_rate": 5.436448263515933e-06, "loss": 0.0, "step": 7931 }, { "epoch": 0.5111812850422117, "grad_norm": 0.0005447171405046528, "learning_rate": 5.435732187611888e-06, "loss": 0.0, "step": 7932 }, { "epoch": 0.5112457304891409, "grad_norm": 0.002356542261505252, "learning_rate": 5.435016111707842e-06, "loss": 0.0, "step": 7933 }, { "epoch": 0.5113101759360701, "grad_norm": 0.0012706526169710428, "learning_rate": 5.434300035803795e-06, "loss": 0.0, "step": 7934 }, { "epoch": 0.5113746213829993, "grad_norm": 0.07874067125905353, "learning_rate": 5.43358395989975e-06, "loss": 0.0001, "step": 7935 }, { "epoch": 0.5114390668299285, "grad_norm": 0.001029995249023795, "learning_rate": 5.432867883995704e-06, "loss": 0.0, "step": 7936 }, { "epoch": 0.5115035122768576, "grad_norm": 0.001838396980398961, "learning_rate": 5.432151808091658e-06, "loss": 0.0, "step": 7937 }, { "epoch": 0.5115679577237868, "grad_norm": 0.0005883734590697582, "learning_rate": 5.4314357321876125e-06, "loss": 0.0, "step": 7938 }, { "epoch": 0.511632403170716, "grad_norm": 0.0172669531723459, "learning_rate": 5.430719656283566e-06, "loss": 0.0, "step": 7939 }, { "epoch": 0.5116968486176452, "grad_norm": 0.0009709076387414066, "learning_rate": 5.43000358037952e-06, "loss": 0.0, "step": 7940 }, { "epoch": 0.5117612940645744, "grad_norm": 0.008330469998503403, "learning_rate": 5.4292875044754746e-06, "loss": 0.0001, "step": 7941 }, { "epoch": 0.5118257395115036, "grad_norm": 0.008222742107848081, "learning_rate": 5.428571428571429e-06, "loss": 0.0, "step": 7942 }, { "epoch": 0.5118901849584326, "grad_norm": 0.0035724153632476397, "learning_rate": 5.427855352667382e-06, "loss": 0.0, "step": 7943 }, { "epoch": 0.5119546304053618, "grad_norm": 0.0916269789696049, "learning_rate": 5.427139276763337e-06, "loss": 0.0002, "step": 7944 }, { "epoch": 0.512019075852291, "grad_norm": 0.005102800168367895, "learning_rate": 5.426423200859291e-06, "loss": 0.0, "step": 7945 }, { "epoch": 0.5120835212992202, "grad_norm": 0.11785411775680807, "learning_rate": 5.425707124955246e-06, "loss": 0.0001, "step": 7946 }, { "epoch": 0.5121479667461494, "grad_norm": 0.006188772612846142, "learning_rate": 5.4249910490512e-06, "loss": 0.0, "step": 7947 }, { "epoch": 0.5122124121930786, "grad_norm": 0.0010159282271246758, "learning_rate": 5.424274973147155e-06, "loss": 0.0, "step": 7948 }, { "epoch": 0.5122768576400077, "grad_norm": 0.05386086260127591, "learning_rate": 5.423558897243109e-06, "loss": 0.0002, "step": 7949 }, { "epoch": 0.5123413030869369, "grad_norm": 0.15529134635092198, "learning_rate": 5.422842821339062e-06, "loss": 0.0002, "step": 7950 }, { "epoch": 0.5124057485338661, "grad_norm": 0.01658001019660191, "learning_rate": 5.422126745435017e-06, "loss": 0.0001, "step": 7951 }, { "epoch": 0.5124701939807953, "grad_norm": 0.0009362448979712939, "learning_rate": 5.421410669530971e-06, "loss": 0.0, "step": 7952 }, { "epoch": 0.5125346394277245, "grad_norm": 0.00837422541313603, "learning_rate": 5.420694593626925e-06, "loss": 0.0001, "step": 7953 }, { "epoch": 0.5125990848746536, "grad_norm": 0.015030309507146898, "learning_rate": 5.41997851772288e-06, "loss": 0.0, "step": 7954 }, { "epoch": 0.5126635303215827, "grad_norm": 0.00020856912114475138, "learning_rate": 5.419262441818833e-06, "loss": 0.0, "step": 7955 }, { "epoch": 0.5127279757685119, "grad_norm": 0.04200720795641049, "learning_rate": 5.418546365914787e-06, "loss": 0.0001, "step": 7956 }, { "epoch": 0.5127924212154411, "grad_norm": 3.0653002645703564, "learning_rate": 5.417830290010742e-06, "loss": 0.0219, "step": 7957 }, { "epoch": 0.5128568666623703, "grad_norm": 0.0005120299741213488, "learning_rate": 5.417114214106696e-06, "loss": 0.0, "step": 7958 }, { "epoch": 0.5129213121092995, "grad_norm": 0.03933544465942909, "learning_rate": 5.416398138202649e-06, "loss": 0.0001, "step": 7959 }, { "epoch": 0.5129857575562287, "grad_norm": 0.0012020741314226874, "learning_rate": 5.415682062298604e-06, "loss": 0.0, "step": 7960 }, { "epoch": 0.5130502030031578, "grad_norm": 0.0005658398741220849, "learning_rate": 5.414965986394558e-06, "loss": 0.0, "step": 7961 }, { "epoch": 0.513114648450087, "grad_norm": 0.017119137317332913, "learning_rate": 5.414249910490512e-06, "loss": 0.0002, "step": 7962 }, { "epoch": 0.5131790938970162, "grad_norm": 0.002558508419807508, "learning_rate": 5.413533834586467e-06, "loss": 0.0, "step": 7963 }, { "epoch": 0.5132435393439454, "grad_norm": 0.0348350584973022, "learning_rate": 5.41281775868242e-06, "loss": 0.0002, "step": 7964 }, { "epoch": 0.5133079847908745, "grad_norm": 0.0015763089749261437, "learning_rate": 5.412101682778374e-06, "loss": 0.0, "step": 7965 }, { "epoch": 0.5133724302378037, "grad_norm": 0.073376589482896, "learning_rate": 5.411385606874329e-06, "loss": 0.0001, "step": 7966 }, { "epoch": 0.5134368756847328, "grad_norm": 0.008660317254924959, "learning_rate": 5.410669530970283e-06, "loss": 0.0, "step": 7967 }, { "epoch": 0.513501321131662, "grad_norm": 0.00013401005881552363, "learning_rate": 5.409953455066237e-06, "loss": 0.0, "step": 7968 }, { "epoch": 0.5135657665785912, "grad_norm": 0.009542936602791867, "learning_rate": 5.409237379162192e-06, "loss": 0.0, "step": 7969 }, { "epoch": 0.5136302120255204, "grad_norm": 0.06508845696056229, "learning_rate": 5.408521303258147e-06, "loss": 0.0002, "step": 7970 }, { "epoch": 0.5136946574724496, "grad_norm": 0.00033976029903329643, "learning_rate": 5.4078052273541e-06, "loss": 0.0, "step": 7971 }, { "epoch": 0.5137591029193788, "grad_norm": 0.0012616412870784784, "learning_rate": 5.4070891514500544e-06, "loss": 0.0, "step": 7972 }, { "epoch": 0.513823548366308, "grad_norm": 0.03339331166490907, "learning_rate": 5.406373075546009e-06, "loss": 0.0001, "step": 7973 }, { "epoch": 0.5138879938132371, "grad_norm": 0.11922746760816738, "learning_rate": 5.405656999641963e-06, "loss": 0.0002, "step": 7974 }, { "epoch": 0.5139524392601663, "grad_norm": 0.018733981766930537, "learning_rate": 5.4049409237379165e-06, "loss": 0.0001, "step": 7975 }, { "epoch": 0.5140168847070954, "grad_norm": 0.1374454530979648, "learning_rate": 5.404224847833871e-06, "loss": 0.0002, "step": 7976 }, { "epoch": 0.5140813301540246, "grad_norm": 0.2097619022628654, "learning_rate": 5.403508771929825e-06, "loss": 0.001, "step": 7977 }, { "epoch": 0.5141457756009538, "grad_norm": 0.0008893098012604791, "learning_rate": 5.402792696025779e-06, "loss": 0.0, "step": 7978 }, { "epoch": 0.5142102210478829, "grad_norm": 0.00014497031483067468, "learning_rate": 5.402076620121734e-06, "loss": 0.0, "step": 7979 }, { "epoch": 0.5142746664948121, "grad_norm": 0.002657034458652046, "learning_rate": 5.401360544217687e-06, "loss": 0.0, "step": 7980 }, { "epoch": 0.5143391119417413, "grad_norm": 0.001948556525665166, "learning_rate": 5.4006444683136414e-06, "loss": 0.0, "step": 7981 }, { "epoch": 0.5144035573886705, "grad_norm": 0.0023215001057442755, "learning_rate": 5.399928392409596e-06, "loss": 0.0, "step": 7982 }, { "epoch": 0.5144680028355997, "grad_norm": 0.008492666356822762, "learning_rate": 5.39921231650555e-06, "loss": 0.0, "step": 7983 }, { "epoch": 0.5145324482825289, "grad_norm": 0.006237242050533532, "learning_rate": 5.398496240601504e-06, "loss": 0.0, "step": 7984 }, { "epoch": 0.514596893729458, "grad_norm": 0.2957292205100141, "learning_rate": 5.397780164697458e-06, "loss": 0.0025, "step": 7985 }, { "epoch": 0.5146613391763872, "grad_norm": 0.0006000581207181238, "learning_rate": 5.397064088793412e-06, "loss": 0.0, "step": 7986 }, { "epoch": 0.5147257846233164, "grad_norm": 0.0009272405372127528, "learning_rate": 5.396348012889366e-06, "loss": 0.0, "step": 7987 }, { "epoch": 0.5147902300702455, "grad_norm": 0.22983715021964995, "learning_rate": 5.395631936985321e-06, "loss": 0.0005, "step": 7988 }, { "epoch": 0.5148546755171747, "grad_norm": 0.0018526735076136297, "learning_rate": 5.394915861081275e-06, "loss": 0.0, "step": 7989 }, { "epoch": 0.5149191209641039, "grad_norm": 5.234039816139064e-05, "learning_rate": 5.3941997851772284e-06, "loss": 0.0, "step": 7990 }, { "epoch": 0.514983566411033, "grad_norm": 1.0890406449039203, "learning_rate": 5.393483709273183e-06, "loss": 0.0013, "step": 7991 }, { "epoch": 0.5150480118579622, "grad_norm": 0.0010511426046532501, "learning_rate": 5.392767633369138e-06, "loss": 0.0, "step": 7992 }, { "epoch": 0.5151124573048914, "grad_norm": 0.0011704403448952318, "learning_rate": 5.392051557465092e-06, "loss": 0.0, "step": 7993 }, { "epoch": 0.5151769027518206, "grad_norm": 0.001237749221018552, "learning_rate": 5.3913354815610465e-06, "loss": 0.0, "step": 7994 }, { "epoch": 0.5152413481987498, "grad_norm": 0.03349309412923939, "learning_rate": 5.390619405657001e-06, "loss": 0.0002, "step": 7995 }, { "epoch": 0.515305793645679, "grad_norm": 0.16808593343973655, "learning_rate": 5.389903329752954e-06, "loss": 0.0034, "step": 7996 }, { "epoch": 0.5153702390926082, "grad_norm": 0.0013906726617820943, "learning_rate": 5.3891872538489085e-06, "loss": 0.0, "step": 7997 }, { "epoch": 0.5154346845395373, "grad_norm": 0.0045494031450296355, "learning_rate": 5.388471177944863e-06, "loss": 0.0, "step": 7998 }, { "epoch": 0.5154991299864664, "grad_norm": 0.007636372474605849, "learning_rate": 5.387755102040817e-06, "loss": 0.0, "step": 7999 }, { "epoch": 0.5155635754333956, "grad_norm": 0.0015319027312418868, "learning_rate": 5.387039026136771e-06, "loss": 0.0, "step": 8000 }, { "epoch": 0.5156280208803248, "grad_norm": 0.004652866648377999, "learning_rate": 5.386322950232725e-06, "loss": 0.0, "step": 8001 }, { "epoch": 0.515692466327254, "grad_norm": 0.0018567965467686757, "learning_rate": 5.385606874328679e-06, "loss": 0.0, "step": 8002 }, { "epoch": 0.5157569117741831, "grad_norm": 0.038382413997460466, "learning_rate": 5.3848907984246335e-06, "loss": 0.0001, "step": 8003 }, { "epoch": 0.5158213572211123, "grad_norm": 0.0003577640989233756, "learning_rate": 5.384174722520588e-06, "loss": 0.0, "step": 8004 }, { "epoch": 0.5158858026680415, "grad_norm": 0.19394153080352525, "learning_rate": 5.383458646616542e-06, "loss": 0.0002, "step": 8005 }, { "epoch": 0.5159502481149707, "grad_norm": 0.0043557760796160425, "learning_rate": 5.3827425707124955e-06, "loss": 0.0, "step": 8006 }, { "epoch": 0.5160146935618999, "grad_norm": 0.26720070499559867, "learning_rate": 5.38202649480845e-06, "loss": 0.0005, "step": 8007 }, { "epoch": 0.5160791390088291, "grad_norm": 0.1252867271194741, "learning_rate": 5.381310418904404e-06, "loss": 0.0002, "step": 8008 }, { "epoch": 0.5161435844557583, "grad_norm": 0.00033245680958482986, "learning_rate": 5.380594343000358e-06, "loss": 0.0, "step": 8009 }, { "epoch": 0.5162080299026873, "grad_norm": 0.0025579589688075007, "learning_rate": 5.379878267096313e-06, "loss": 0.0, "step": 8010 }, { "epoch": 0.5162724753496165, "grad_norm": 0.009223071042598662, "learning_rate": 5.379162191192266e-06, "loss": 0.0, "step": 8011 }, { "epoch": 0.5163369207965457, "grad_norm": 0.5300760762477967, "learning_rate": 5.3784461152882205e-06, "loss": 0.0006, "step": 8012 }, { "epoch": 0.5164013662434749, "grad_norm": 0.011043418132426576, "learning_rate": 5.377730039384175e-06, "loss": 0.0, "step": 8013 }, { "epoch": 0.5164658116904041, "grad_norm": 0.00021837471094835563, "learning_rate": 5.377013963480129e-06, "loss": 0.0, "step": 8014 }, { "epoch": 0.5165302571373332, "grad_norm": 0.3276306356442377, "learning_rate": 5.376297887576083e-06, "loss": 0.0008, "step": 8015 }, { "epoch": 0.5165947025842624, "grad_norm": 0.2765679994672126, "learning_rate": 5.3755818116720385e-06, "loss": 0.0008, "step": 8016 }, { "epoch": 0.5166591480311916, "grad_norm": 0.18362511711818005, "learning_rate": 5.374865735767992e-06, "loss": 0.001, "step": 8017 }, { "epoch": 0.5167235934781208, "grad_norm": 0.002038531219077466, "learning_rate": 5.374149659863946e-06, "loss": 0.0, "step": 8018 }, { "epoch": 0.51678803892505, "grad_norm": 0.00524951653881258, "learning_rate": 5.3734335839599006e-06, "loss": 0.0, "step": 8019 }, { "epoch": 0.5168524843719792, "grad_norm": 0.003735285914029635, "learning_rate": 5.372717508055855e-06, "loss": 0.0, "step": 8020 }, { "epoch": 0.5169169298189082, "grad_norm": 0.048634071984891634, "learning_rate": 5.372001432151809e-06, "loss": 0.0001, "step": 8021 }, { "epoch": 0.5169813752658374, "grad_norm": 0.007575055206033924, "learning_rate": 5.371285356247763e-06, "loss": 0.0, "step": 8022 }, { "epoch": 0.5170458207127666, "grad_norm": 0.0896404079922022, "learning_rate": 5.370569280343717e-06, "loss": 0.001, "step": 8023 }, { "epoch": 0.5171102661596958, "grad_norm": 0.0009642160243532904, "learning_rate": 5.369853204439671e-06, "loss": 0.0, "step": 8024 }, { "epoch": 0.517174711606625, "grad_norm": 9.580468636606124e-05, "learning_rate": 5.3691371285356255e-06, "loss": 0.0, "step": 8025 }, { "epoch": 0.5172391570535542, "grad_norm": 0.00038877102939403603, "learning_rate": 5.36842105263158e-06, "loss": 0.0, "step": 8026 }, { "epoch": 0.5173036025004834, "grad_norm": 0.017995206744111487, "learning_rate": 5.367704976727533e-06, "loss": 0.0, "step": 8027 }, { "epoch": 0.5173680479474125, "grad_norm": 0.00025071646488426043, "learning_rate": 5.3669889008234875e-06, "loss": 0.0, "step": 8028 }, { "epoch": 0.5174324933943417, "grad_norm": 0.5368235711987875, "learning_rate": 5.366272824919442e-06, "loss": 0.003, "step": 8029 }, { "epoch": 0.5174969388412709, "grad_norm": 0.5099628110958586, "learning_rate": 5.365556749015396e-06, "loss": 0.0036, "step": 8030 }, { "epoch": 0.5175613842882001, "grad_norm": 0.016237080453299548, "learning_rate": 5.3648406731113504e-06, "loss": 0.0001, "step": 8031 }, { "epoch": 0.5176258297351292, "grad_norm": 0.037514827573428736, "learning_rate": 5.364124597207304e-06, "loss": 0.0002, "step": 8032 }, { "epoch": 0.5176902751820583, "grad_norm": 0.007461747262392446, "learning_rate": 5.363408521303258e-06, "loss": 0.0, "step": 8033 }, { "epoch": 0.5177547206289875, "grad_norm": 0.003969606536505866, "learning_rate": 5.3626924453992125e-06, "loss": 0.0, "step": 8034 }, { "epoch": 0.5178191660759167, "grad_norm": 0.0012055136054845725, "learning_rate": 5.361976369495167e-06, "loss": 0.0, "step": 8035 }, { "epoch": 0.5178836115228459, "grad_norm": 0.04324137427823979, "learning_rate": 5.36126029359112e-06, "loss": 0.0002, "step": 8036 }, { "epoch": 0.5179480569697751, "grad_norm": 0.022892261141917725, "learning_rate": 5.3605442176870745e-06, "loss": 0.0, "step": 8037 }, { "epoch": 0.5180125024167043, "grad_norm": 0.0011791094418522764, "learning_rate": 5.359828141783029e-06, "loss": 0.0, "step": 8038 }, { "epoch": 0.5180769478636335, "grad_norm": 0.01512018866252014, "learning_rate": 5.359112065878984e-06, "loss": 0.0, "step": 8039 }, { "epoch": 0.5181413933105626, "grad_norm": 0.17008817142783822, "learning_rate": 5.358395989974938e-06, "loss": 0.0019, "step": 8040 }, { "epoch": 0.5182058387574918, "grad_norm": 0.06252434748028268, "learning_rate": 5.357679914070893e-06, "loss": 0.0003, "step": 8041 }, { "epoch": 0.518270284204421, "grad_norm": 0.0023996196919451067, "learning_rate": 5.356963838166847e-06, "loss": 0.0, "step": 8042 }, { "epoch": 0.5183347296513501, "grad_norm": 0.0007908011827102855, "learning_rate": 5.3562477622628e-06, "loss": 0.0, "step": 8043 }, { "epoch": 0.5183991750982793, "grad_norm": 0.02134104283681624, "learning_rate": 5.355531686358755e-06, "loss": 0.0003, "step": 8044 }, { "epoch": 0.5184636205452084, "grad_norm": 0.03731752513019512, "learning_rate": 5.354815610454709e-06, "loss": 0.0001, "step": 8045 }, { "epoch": 0.5185280659921376, "grad_norm": 0.04644805216348859, "learning_rate": 5.354099534550663e-06, "loss": 0.0003, "step": 8046 }, { "epoch": 0.5185925114390668, "grad_norm": 0.35393408777423346, "learning_rate": 5.3533834586466175e-06, "loss": 0.0017, "step": 8047 }, { "epoch": 0.518656956885996, "grad_norm": 0.007836727577587311, "learning_rate": 5.352667382742571e-06, "loss": 0.0, "step": 8048 }, { "epoch": 0.5187214023329252, "grad_norm": 0.002620074872959019, "learning_rate": 5.351951306838525e-06, "loss": 0.0, "step": 8049 }, { "epoch": 0.5187858477798544, "grad_norm": 0.0004816107082796707, "learning_rate": 5.35123523093448e-06, "loss": 0.0, "step": 8050 }, { "epoch": 0.5188502932267836, "grad_norm": 0.00032189141713238927, "learning_rate": 5.350519155030434e-06, "loss": 0.0, "step": 8051 }, { "epoch": 0.5189147386737127, "grad_norm": 0.001981909159571749, "learning_rate": 5.349803079126387e-06, "loss": 0.0, "step": 8052 }, { "epoch": 0.5189791841206419, "grad_norm": 0.004482247778658294, "learning_rate": 5.349087003222342e-06, "loss": 0.0, "step": 8053 }, { "epoch": 0.519043629567571, "grad_norm": 0.0429455602448583, "learning_rate": 5.348370927318296e-06, "loss": 0.0001, "step": 8054 }, { "epoch": 0.5191080750145002, "grad_norm": 0.00010109813583239728, "learning_rate": 5.34765485141425e-06, "loss": 0.0, "step": 8055 }, { "epoch": 0.5191725204614294, "grad_norm": 0.005013169175907346, "learning_rate": 5.3469387755102045e-06, "loss": 0.0, "step": 8056 }, { "epoch": 0.5192369659083585, "grad_norm": 0.0026910242392564347, "learning_rate": 5.346222699606158e-06, "loss": 0.0, "step": 8057 }, { "epoch": 0.5193014113552877, "grad_norm": 0.001115195455531372, "learning_rate": 5.345506623702112e-06, "loss": 0.0, "step": 8058 }, { "epoch": 0.5193658568022169, "grad_norm": 0.0036148613287218584, "learning_rate": 5.3447905477980666e-06, "loss": 0.0, "step": 8059 }, { "epoch": 0.5194303022491461, "grad_norm": 0.012834000600551449, "learning_rate": 5.344074471894021e-06, "loss": 0.0, "step": 8060 }, { "epoch": 0.5194947476960753, "grad_norm": 0.00019800514969051393, "learning_rate": 5.343358395989975e-06, "loss": 0.0015, "step": 8061 }, { "epoch": 0.5195591931430045, "grad_norm": 7.261156807335496e-05, "learning_rate": 5.34264232008593e-06, "loss": 0.0, "step": 8062 }, { "epoch": 0.5196236385899337, "grad_norm": 0.00010452706061990569, "learning_rate": 5.341926244181885e-06, "loss": 0.0, "step": 8063 }, { "epoch": 0.5196880840368628, "grad_norm": 0.0015938797537969481, "learning_rate": 5.341210168277838e-06, "loss": 0.0, "step": 8064 }, { "epoch": 0.519752529483792, "grad_norm": 0.0006986058601608112, "learning_rate": 5.340494092373792e-06, "loss": 0.0, "step": 8065 }, { "epoch": 0.5198169749307211, "grad_norm": 0.00018862228638009165, "learning_rate": 5.339778016469747e-06, "loss": 0.0, "step": 8066 }, { "epoch": 0.5198814203776503, "grad_norm": 0.04569480241493098, "learning_rate": 5.339061940565701e-06, "loss": 0.0004, "step": 8067 }, { "epoch": 0.5199458658245795, "grad_norm": 0.00048435932877395973, "learning_rate": 5.338345864661654e-06, "loss": 0.0, "step": 8068 }, { "epoch": 0.5200103112715087, "grad_norm": 4.264877117970066e-05, "learning_rate": 5.337629788757609e-06, "loss": 0.0, "step": 8069 }, { "epoch": 0.5200747567184378, "grad_norm": 0.022674035570682753, "learning_rate": 5.336913712853563e-06, "loss": 0.0001, "step": 8070 }, { "epoch": 0.520139202165367, "grad_norm": 7.506351649127817e-05, "learning_rate": 5.336197636949517e-06, "loss": 0.0, "step": 8071 }, { "epoch": 0.5202036476122962, "grad_norm": 0.0002121658204572475, "learning_rate": 5.335481561045472e-06, "loss": 0.0, "step": 8072 }, { "epoch": 0.5202680930592254, "grad_norm": 0.0005612859992496004, "learning_rate": 5.334765485141425e-06, "loss": 0.0, "step": 8073 }, { "epoch": 0.5203325385061546, "grad_norm": 0.0005723835744671824, "learning_rate": 5.334049409237379e-06, "loss": 0.0, "step": 8074 }, { "epoch": 0.5203969839530838, "grad_norm": 4.74043528609633e-05, "learning_rate": 5.333333333333334e-06, "loss": 0.0, "step": 8075 }, { "epoch": 0.5204614294000129, "grad_norm": 2.987999870269243e-05, "learning_rate": 5.332617257429288e-06, "loss": 0.0, "step": 8076 }, { "epoch": 0.520525874846942, "grad_norm": 0.00460482292988574, "learning_rate": 5.331901181525242e-06, "loss": 0.0, "step": 8077 }, { "epoch": 0.5205903202938712, "grad_norm": 0.0010893569519421812, "learning_rate": 5.331185105621196e-06, "loss": 0.0015, "step": 8078 }, { "epoch": 0.5206547657408004, "grad_norm": 0.0019560104342554994, "learning_rate": 5.33046902971715e-06, "loss": 0.0, "step": 8079 }, { "epoch": 0.5207192111877296, "grad_norm": 0.00010963162534686023, "learning_rate": 5.329752953813104e-06, "loss": 0.0, "step": 8080 }, { "epoch": 0.5207836566346588, "grad_norm": 0.0014649577549688212, "learning_rate": 5.329036877909059e-06, "loss": 0.0, "step": 8081 }, { "epoch": 0.5208481020815879, "grad_norm": 0.0045492230178957975, "learning_rate": 5.328320802005013e-06, "loss": 0.0, "step": 8082 }, { "epoch": 0.5209125475285171, "grad_norm": 0.0012380836922672056, "learning_rate": 5.327604726100966e-06, "loss": 0.0, "step": 8083 }, { "epoch": 0.5209769929754463, "grad_norm": 0.006471011870845555, "learning_rate": 5.326888650196921e-06, "loss": 0.0, "step": 8084 }, { "epoch": 0.5210414384223755, "grad_norm": 0.0273098650513487, "learning_rate": 5.326172574292875e-06, "loss": 0.0001, "step": 8085 }, { "epoch": 0.5211058838693047, "grad_norm": 0.2248162519329055, "learning_rate": 5.32545649838883e-06, "loss": 0.0005, "step": 8086 }, { "epoch": 0.5211703293162339, "grad_norm": 0.0010722583432270054, "learning_rate": 5.324740422484784e-06, "loss": 0.0, "step": 8087 }, { "epoch": 0.5212347747631629, "grad_norm": 0.0011142265211501097, "learning_rate": 5.324024346580739e-06, "loss": 0.0, "step": 8088 }, { "epoch": 0.5212992202100921, "grad_norm": 0.11535493960735047, "learning_rate": 5.323308270676692e-06, "loss": 0.0003, "step": 8089 }, { "epoch": 0.5213636656570213, "grad_norm": 0.009380588396358299, "learning_rate": 5.3225921947726464e-06, "loss": 0.0, "step": 8090 }, { "epoch": 0.5214281111039505, "grad_norm": 0.002236862676547502, "learning_rate": 5.321876118868601e-06, "loss": 0.0, "step": 8091 }, { "epoch": 0.5214925565508797, "grad_norm": 0.0032438128252277815, "learning_rate": 5.321160042964555e-06, "loss": 0.0, "step": 8092 }, { "epoch": 0.5215570019978089, "grad_norm": 0.0006600547852672644, "learning_rate": 5.320443967060509e-06, "loss": 0.0, "step": 8093 }, { "epoch": 0.521621447444738, "grad_norm": 0.00015368027513390677, "learning_rate": 5.319727891156463e-06, "loss": 0.0, "step": 8094 }, { "epoch": 0.5216858928916672, "grad_norm": 0.02774108911226277, "learning_rate": 5.319011815252417e-06, "loss": 0.0002, "step": 8095 }, { "epoch": 0.5217503383385964, "grad_norm": 0.0006598481528583075, "learning_rate": 5.318295739348371e-06, "loss": 0.0, "step": 8096 }, { "epoch": 0.5218147837855256, "grad_norm": 0.0009686613851743172, "learning_rate": 5.317579663444326e-06, "loss": 0.0, "step": 8097 }, { "epoch": 0.5218792292324548, "grad_norm": 0.0003071446257228951, "learning_rate": 5.31686358754028e-06, "loss": 0.0, "step": 8098 }, { "epoch": 0.5219436746793839, "grad_norm": 0.0007522031735325496, "learning_rate": 5.3161475116362334e-06, "loss": 0.0, "step": 8099 }, { "epoch": 0.522008120126313, "grad_norm": 0.00565657191007196, "learning_rate": 5.315431435732188e-06, "loss": 0.0, "step": 8100 }, { "epoch": 0.5220725655732422, "grad_norm": 0.011783286177244336, "learning_rate": 5.314715359828142e-06, "loss": 0.0002, "step": 8101 }, { "epoch": 0.5221370110201714, "grad_norm": 0.0004662790406560697, "learning_rate": 5.313999283924096e-06, "loss": 0.0, "step": 8102 }, { "epoch": 0.5222014564671006, "grad_norm": 0.001986001404947262, "learning_rate": 5.313283208020051e-06, "loss": 0.0, "step": 8103 }, { "epoch": 0.5222659019140298, "grad_norm": 0.0005110061619616537, "learning_rate": 5.312567132116004e-06, "loss": 0.0, "step": 8104 }, { "epoch": 0.522330347360959, "grad_norm": 6.456255465957766e-05, "learning_rate": 5.311851056211958e-06, "loss": 0.0, "step": 8105 }, { "epoch": 0.5223947928078881, "grad_norm": 0.003485680539695944, "learning_rate": 5.311134980307913e-06, "loss": 0.0, "step": 8106 }, { "epoch": 0.5224592382548173, "grad_norm": 0.027200001693703183, "learning_rate": 5.310418904403867e-06, "loss": 0.0003, "step": 8107 }, { "epoch": 0.5225236837017465, "grad_norm": 0.00010678184588055318, "learning_rate": 5.309702828499821e-06, "loss": 0.0, "step": 8108 }, { "epoch": 0.5225881291486757, "grad_norm": 0.009661019632250483, "learning_rate": 5.3089867525957764e-06, "loss": 0.0, "step": 8109 }, { "epoch": 0.5226525745956048, "grad_norm": 0.16898934666960477, "learning_rate": 5.30827067669173e-06, "loss": 0.0021, "step": 8110 }, { "epoch": 0.522717020042534, "grad_norm": 0.648830777139313, "learning_rate": 5.307554600787684e-06, "loss": 0.0027, "step": 8111 }, { "epoch": 0.5227814654894631, "grad_norm": 2.3984561151492033e-05, "learning_rate": 5.3068385248836385e-06, "loss": 0.0, "step": 8112 }, { "epoch": 0.5228459109363923, "grad_norm": 0.1974333531613506, "learning_rate": 5.306122448979593e-06, "loss": 0.0011, "step": 8113 }, { "epoch": 0.5229103563833215, "grad_norm": 0.00015204341681974146, "learning_rate": 5.305406373075547e-06, "loss": 0.0, "step": 8114 }, { "epoch": 0.5229748018302507, "grad_norm": 0.0036347013021713263, "learning_rate": 5.3046902971715005e-06, "loss": 0.0, "step": 8115 }, { "epoch": 0.5230392472771799, "grad_norm": 0.0037800083078743346, "learning_rate": 5.303974221267455e-06, "loss": 0.0, "step": 8116 }, { "epoch": 0.5231036927241091, "grad_norm": 0.0013677929468042499, "learning_rate": 5.303258145363409e-06, "loss": 0.0, "step": 8117 }, { "epoch": 0.5231681381710382, "grad_norm": 0.024809124082378647, "learning_rate": 5.302542069459363e-06, "loss": 0.0002, "step": 8118 }, { "epoch": 0.5232325836179674, "grad_norm": 0.017704296459074436, "learning_rate": 5.301825993555318e-06, "loss": 0.0001, "step": 8119 }, { "epoch": 0.5232970290648966, "grad_norm": 0.008421566509266936, "learning_rate": 5.301109917651271e-06, "loss": 0.0, "step": 8120 }, { "epoch": 0.5233614745118257, "grad_norm": 0.00030410408926729963, "learning_rate": 5.3003938417472255e-06, "loss": 0.0, "step": 8121 }, { "epoch": 0.5234259199587549, "grad_norm": 0.031790182885038674, "learning_rate": 5.29967776584318e-06, "loss": 0.0, "step": 8122 }, { "epoch": 0.523490365405684, "grad_norm": 0.0006871531393244011, "learning_rate": 5.298961689939134e-06, "loss": 0.0, "step": 8123 }, { "epoch": 0.5235548108526132, "grad_norm": 0.004459484505413135, "learning_rate": 5.298245614035088e-06, "loss": 0.0, "step": 8124 }, { "epoch": 0.5236192562995424, "grad_norm": 0.0018307072091210933, "learning_rate": 5.297529538131042e-06, "loss": 0.0, "step": 8125 }, { "epoch": 0.5236837017464716, "grad_norm": 0.0038162593027634127, "learning_rate": 5.296813462226996e-06, "loss": 0.0, "step": 8126 }, { "epoch": 0.5237481471934008, "grad_norm": 7.791748082360972e-05, "learning_rate": 5.29609738632295e-06, "loss": 0.0, "step": 8127 }, { "epoch": 0.52381259264033, "grad_norm": 0.0016177164245403997, "learning_rate": 5.295381310418905e-06, "loss": 0.0, "step": 8128 }, { "epoch": 0.5238770380872592, "grad_norm": 0.018440304189078296, "learning_rate": 5.294665234514859e-06, "loss": 0.0001, "step": 8129 }, { "epoch": 0.5239414835341883, "grad_norm": 0.00045727140499551784, "learning_rate": 5.2939491586108125e-06, "loss": 0.0, "step": 8130 }, { "epoch": 0.5240059289811175, "grad_norm": 0.001961379100123223, "learning_rate": 5.293233082706767e-06, "loss": 0.0, "step": 8131 }, { "epoch": 0.5240703744280466, "grad_norm": 0.0017229937831158926, "learning_rate": 5.292517006802722e-06, "loss": 0.0, "step": 8132 }, { "epoch": 0.5241348198749758, "grad_norm": 0.0035674278089422567, "learning_rate": 5.291800930898676e-06, "loss": 0.0, "step": 8133 }, { "epoch": 0.524199265321905, "grad_norm": 0.05173934526094626, "learning_rate": 5.2910848549946305e-06, "loss": 0.0003, "step": 8134 }, { "epoch": 0.5242637107688342, "grad_norm": 0.0022714186955937123, "learning_rate": 5.290368779090585e-06, "loss": 0.0, "step": 8135 }, { "epoch": 0.5243281562157633, "grad_norm": 0.04544742654464294, "learning_rate": 5.289652703186538e-06, "loss": 0.0001, "step": 8136 }, { "epoch": 0.5243926016626925, "grad_norm": 0.0011263308024880082, "learning_rate": 5.2889366272824926e-06, "loss": 0.0, "step": 8137 }, { "epoch": 0.5244570471096217, "grad_norm": 0.0005061771864201109, "learning_rate": 5.288220551378447e-06, "loss": 0.0, "step": 8138 }, { "epoch": 0.5245214925565509, "grad_norm": 0.0009123932009742532, "learning_rate": 5.287504475474401e-06, "loss": 0.0, "step": 8139 }, { "epoch": 0.5245859380034801, "grad_norm": 0.001865122440905928, "learning_rate": 5.2867883995703554e-06, "loss": 0.0, "step": 8140 }, { "epoch": 0.5246503834504093, "grad_norm": 0.005351867999890206, "learning_rate": 5.286072323666309e-06, "loss": 0.0, "step": 8141 }, { "epoch": 0.5247148288973384, "grad_norm": 0.0012923710822036933, "learning_rate": 5.285356247762263e-06, "loss": 0.0, "step": 8142 }, { "epoch": 0.5247792743442676, "grad_norm": 0.07148435050672101, "learning_rate": 5.2846401718582175e-06, "loss": 0.0002, "step": 8143 }, { "epoch": 0.5248437197911967, "grad_norm": 0.0003850860287028189, "learning_rate": 5.283924095954172e-06, "loss": 0.0, "step": 8144 }, { "epoch": 0.5249081652381259, "grad_norm": 0.29063587886178854, "learning_rate": 5.283208020050126e-06, "loss": 0.0009, "step": 8145 }, { "epoch": 0.5249726106850551, "grad_norm": 0.0005251628566615367, "learning_rate": 5.2824919441460795e-06, "loss": 0.0, "step": 8146 }, { "epoch": 0.5250370561319843, "grad_norm": 0.0034821841120536135, "learning_rate": 5.281775868242034e-06, "loss": 0.0001, "step": 8147 }, { "epoch": 0.5251015015789134, "grad_norm": 0.030426072261242363, "learning_rate": 5.281059792337988e-06, "loss": 0.0, "step": 8148 }, { "epoch": 0.5251659470258426, "grad_norm": 0.05582442107006137, "learning_rate": 5.2803437164339424e-06, "loss": 0.0001, "step": 8149 }, { "epoch": 0.5252303924727718, "grad_norm": 0.009250913006427195, "learning_rate": 5.279627640529896e-06, "loss": 0.0, "step": 8150 }, { "epoch": 0.525294837919701, "grad_norm": 0.0034705167538564173, "learning_rate": 5.27891156462585e-06, "loss": 0.0, "step": 8151 }, { "epoch": 0.5253592833666302, "grad_norm": 0.0016301703975253937, "learning_rate": 5.2781954887218045e-06, "loss": 0.0, "step": 8152 }, { "epoch": 0.5254237288135594, "grad_norm": 0.004285375626315951, "learning_rate": 5.277479412817759e-06, "loss": 0.0, "step": 8153 }, { "epoch": 0.5254881742604885, "grad_norm": 0.0001443931223535041, "learning_rate": 5.276763336913713e-06, "loss": 0.0, "step": 8154 }, { "epoch": 0.5255526197074176, "grad_norm": 0.0012174254517221168, "learning_rate": 5.2760472610096665e-06, "loss": 0.0, "step": 8155 }, { "epoch": 0.5256170651543468, "grad_norm": 0.1638833555172645, "learning_rate": 5.2753311851056225e-06, "loss": 0.0018, "step": 8156 }, { "epoch": 0.525681510601276, "grad_norm": 0.011755288426830504, "learning_rate": 5.274615109201576e-06, "loss": 0.0, "step": 8157 }, { "epoch": 0.5257459560482052, "grad_norm": 0.0003608457318295168, "learning_rate": 5.27389903329753e-06, "loss": 0.0, "step": 8158 }, { "epoch": 0.5258104014951344, "grad_norm": 0.0014071326382772225, "learning_rate": 5.273182957393485e-06, "loss": 0.0, "step": 8159 }, { "epoch": 0.5258748469420635, "grad_norm": 0.0002052865912161992, "learning_rate": 5.272466881489439e-06, "loss": 0.0, "step": 8160 }, { "epoch": 0.5259392923889927, "grad_norm": 0.11002314063391534, "learning_rate": 5.271750805585393e-06, "loss": 0.0002, "step": 8161 }, { "epoch": 0.5260037378359219, "grad_norm": 0.1543719917652916, "learning_rate": 5.271034729681347e-06, "loss": 0.0016, "step": 8162 }, { "epoch": 0.5260681832828511, "grad_norm": 0.0013602805663139417, "learning_rate": 5.270318653777301e-06, "loss": 0.0, "step": 8163 }, { "epoch": 0.5261326287297803, "grad_norm": 0.028567334380291513, "learning_rate": 5.269602577873255e-06, "loss": 0.0, "step": 8164 }, { "epoch": 0.5261970741767095, "grad_norm": 0.030342511622000475, "learning_rate": 5.2688865019692095e-06, "loss": 0.0, "step": 8165 }, { "epoch": 0.5262615196236385, "grad_norm": 0.0024130707987652417, "learning_rate": 5.268170426065163e-06, "loss": 0.0, "step": 8166 }, { "epoch": 0.5263259650705677, "grad_norm": 0.0004052014762314221, "learning_rate": 5.267454350161117e-06, "loss": 0.0, "step": 8167 }, { "epoch": 0.5263904105174969, "grad_norm": 0.000613029688713524, "learning_rate": 5.266738274257072e-06, "loss": 0.0, "step": 8168 }, { "epoch": 0.5264548559644261, "grad_norm": 0.007337527303468706, "learning_rate": 5.266022198353026e-06, "loss": 0.0, "step": 8169 }, { "epoch": 0.5265193014113553, "grad_norm": 0.007629737369463775, "learning_rate": 5.26530612244898e-06, "loss": 0.0, "step": 8170 }, { "epoch": 0.5265837468582845, "grad_norm": 0.019203614179758664, "learning_rate": 5.264590046544934e-06, "loss": 0.0, "step": 8171 }, { "epoch": 0.5266481923052136, "grad_norm": 0.009774924112548477, "learning_rate": 5.263873970640888e-06, "loss": 0.0, "step": 8172 }, { "epoch": 0.5267126377521428, "grad_norm": 0.13492138472105183, "learning_rate": 5.263157894736842e-06, "loss": 0.0001, "step": 8173 }, { "epoch": 0.526777083199072, "grad_norm": 0.17494896310555966, "learning_rate": 5.2624418188327965e-06, "loss": 0.0026, "step": 8174 }, { "epoch": 0.5268415286460012, "grad_norm": 0.0030930372243162135, "learning_rate": 5.261725742928751e-06, "loss": 0.0, "step": 8175 }, { "epoch": 0.5269059740929304, "grad_norm": 0.00018265507632032822, "learning_rate": 5.261009667024704e-06, "loss": 0.0, "step": 8176 }, { "epoch": 0.5269704195398595, "grad_norm": 0.005233237911499148, "learning_rate": 5.2602935911206586e-06, "loss": 0.0, "step": 8177 }, { "epoch": 0.5270348649867886, "grad_norm": 0.005108504047076498, "learning_rate": 5.259577515216613e-06, "loss": 0.0, "step": 8178 }, { "epoch": 0.5270993104337178, "grad_norm": 0.001656930633442291, "learning_rate": 5.258861439312568e-06, "loss": 0.0, "step": 8179 }, { "epoch": 0.527163755880647, "grad_norm": 0.0015388414270932735, "learning_rate": 5.258145363408522e-06, "loss": 0.0, "step": 8180 }, { "epoch": 0.5272282013275762, "grad_norm": 0.001493593354986821, "learning_rate": 5.257429287504477e-06, "loss": 0.0, "step": 8181 }, { "epoch": 0.5272926467745054, "grad_norm": 0.0015624622198609356, "learning_rate": 5.25671321160043e-06, "loss": 0.0, "step": 8182 }, { "epoch": 0.5273570922214346, "grad_norm": 0.00023563587226363962, "learning_rate": 5.255997135696384e-06, "loss": 0.0, "step": 8183 }, { "epoch": 0.5274215376683637, "grad_norm": 0.0008716355655753914, "learning_rate": 5.255281059792339e-06, "loss": 0.0, "step": 8184 }, { "epoch": 0.5274859831152929, "grad_norm": 0.00041412078705236374, "learning_rate": 5.254564983888293e-06, "loss": 0.0, "step": 8185 }, { "epoch": 0.5275504285622221, "grad_norm": 0.02280689810042121, "learning_rate": 5.253848907984247e-06, "loss": 0.0001, "step": 8186 }, { "epoch": 0.5276148740091513, "grad_norm": 0.0002442367796725076, "learning_rate": 5.253132832080201e-06, "loss": 0.0, "step": 8187 }, { "epoch": 0.5276793194560804, "grad_norm": 0.0001824094471575746, "learning_rate": 5.252416756176155e-06, "loss": 0.0, "step": 8188 }, { "epoch": 0.5277437649030096, "grad_norm": 0.10367689975011432, "learning_rate": 5.251700680272109e-06, "loss": 0.0006, "step": 8189 }, { "epoch": 0.5278082103499387, "grad_norm": 0.04179084686455239, "learning_rate": 5.250984604368064e-06, "loss": 0.0003, "step": 8190 }, { "epoch": 0.5278726557968679, "grad_norm": 0.054257698556740556, "learning_rate": 5.250268528464018e-06, "loss": 0.0002, "step": 8191 }, { "epoch": 0.5279371012437971, "grad_norm": 0.00045334327345424616, "learning_rate": 5.249552452559971e-06, "loss": 0.0, "step": 8192 }, { "epoch": 0.5280015466907263, "grad_norm": 0.002689039349698105, "learning_rate": 5.248836376655926e-06, "loss": 0.0, "step": 8193 }, { "epoch": 0.5280659921376555, "grad_norm": 0.003820872434692222, "learning_rate": 5.24812030075188e-06, "loss": 0.0, "step": 8194 }, { "epoch": 0.5281304375845847, "grad_norm": 0.0031079245168158607, "learning_rate": 5.247404224847834e-06, "loss": 0.0, "step": 8195 }, { "epoch": 0.5281948830315139, "grad_norm": 0.008044136523780283, "learning_rate": 5.2466881489437886e-06, "loss": 0.0, "step": 8196 }, { "epoch": 0.528259328478443, "grad_norm": 0.0013573262889266815, "learning_rate": 5.245972073039742e-06, "loss": 0.0, "step": 8197 }, { "epoch": 0.5283237739253722, "grad_norm": 0.00013557121331184024, "learning_rate": 5.245255997135696e-06, "loss": 0.0, "step": 8198 }, { "epoch": 0.5283882193723013, "grad_norm": 0.006549067277275383, "learning_rate": 5.244539921231651e-06, "loss": 0.0, "step": 8199 }, { "epoch": 0.5284526648192305, "grad_norm": 0.00038068579052549697, "learning_rate": 5.243823845327605e-06, "loss": 0.0, "step": 8200 }, { "epoch": 0.5285171102661597, "grad_norm": 0.001630613111818217, "learning_rate": 5.243107769423559e-06, "loss": 0.0, "step": 8201 }, { "epoch": 0.5285815557130888, "grad_norm": 0.005277173181096403, "learning_rate": 5.242391693519514e-06, "loss": 0.0, "step": 8202 }, { "epoch": 0.528646001160018, "grad_norm": 0.0022489153954235027, "learning_rate": 5.241675617615468e-06, "loss": 0.0, "step": 8203 }, { "epoch": 0.5287104466069472, "grad_norm": 0.0007811213053585651, "learning_rate": 5.240959541711422e-06, "loss": 0.0, "step": 8204 }, { "epoch": 0.5287748920538764, "grad_norm": 0.15758120777290088, "learning_rate": 5.240243465807376e-06, "loss": 0.0008, "step": 8205 }, { "epoch": 0.5288393375008056, "grad_norm": 0.226937066591774, "learning_rate": 5.239527389903331e-06, "loss": 0.001, "step": 8206 }, { "epoch": 0.5289037829477348, "grad_norm": 0.0004585199950160562, "learning_rate": 5.238811313999285e-06, "loss": 0.0, "step": 8207 }, { "epoch": 0.528968228394664, "grad_norm": 0.007970610709894329, "learning_rate": 5.2380952380952384e-06, "loss": 0.0, "step": 8208 }, { "epoch": 0.5290326738415931, "grad_norm": 0.0008556513303001802, "learning_rate": 5.237379162191193e-06, "loss": 0.0, "step": 8209 }, { "epoch": 0.5290971192885222, "grad_norm": 0.0013647344791251014, "learning_rate": 5.236663086287147e-06, "loss": 0.0, "step": 8210 }, { "epoch": 0.5291615647354514, "grad_norm": 0.0008613043011201751, "learning_rate": 5.235947010383101e-06, "loss": 0.0, "step": 8211 }, { "epoch": 0.5292260101823806, "grad_norm": 0.0011533930435283846, "learning_rate": 5.235230934479056e-06, "loss": 0.0, "step": 8212 }, { "epoch": 0.5292904556293098, "grad_norm": 0.004177880975011757, "learning_rate": 5.234514858575009e-06, "loss": 0.0, "step": 8213 }, { "epoch": 0.529354901076239, "grad_norm": 8.555679965205073e-05, "learning_rate": 5.233798782670963e-06, "loss": 0.0, "step": 8214 }, { "epoch": 0.5294193465231681, "grad_norm": 0.06403724198652394, "learning_rate": 5.233082706766918e-06, "loss": 0.0002, "step": 8215 }, { "epoch": 0.5294837919700973, "grad_norm": 0.0004445335666127929, "learning_rate": 5.232366630862872e-06, "loss": 0.0, "step": 8216 }, { "epoch": 0.5295482374170265, "grad_norm": 0.0018955943196318668, "learning_rate": 5.231650554958826e-06, "loss": 0.0, "step": 8217 }, { "epoch": 0.5296126828639557, "grad_norm": 0.00044187092058860976, "learning_rate": 5.23093447905478e-06, "loss": 0.0, "step": 8218 }, { "epoch": 0.5296771283108849, "grad_norm": 0.015893673273748708, "learning_rate": 5.230218403150734e-06, "loss": 0.0001, "step": 8219 }, { "epoch": 0.529741573757814, "grad_norm": 0.0035433591799342505, "learning_rate": 5.229502327246688e-06, "loss": 0.0, "step": 8220 }, { "epoch": 0.5298060192047432, "grad_norm": 0.0010208689897191162, "learning_rate": 5.228786251342643e-06, "loss": 0.0, "step": 8221 }, { "epoch": 0.5298704646516723, "grad_norm": 7.476281600967891e-05, "learning_rate": 5.228070175438597e-06, "loss": 0.0, "step": 8222 }, { "epoch": 0.5299349100986015, "grad_norm": 0.03769369368956137, "learning_rate": 5.22735409953455e-06, "loss": 0.0002, "step": 8223 }, { "epoch": 0.5299993555455307, "grad_norm": 0.5087324466067144, "learning_rate": 5.226638023630505e-06, "loss": 0.001, "step": 8224 }, { "epoch": 0.5300638009924599, "grad_norm": 0.0004116131397795408, "learning_rate": 5.225921947726459e-06, "loss": 0.0, "step": 8225 }, { "epoch": 0.530128246439389, "grad_norm": 0.12127374146854736, "learning_rate": 5.225205871822414e-06, "loss": 0.0003, "step": 8226 }, { "epoch": 0.5301926918863182, "grad_norm": 0.43693211006198096, "learning_rate": 5.2244897959183684e-06, "loss": 0.0006, "step": 8227 }, { "epoch": 0.5302571373332474, "grad_norm": 0.0016617944626236237, "learning_rate": 5.223773720014323e-06, "loss": 0.0, "step": 8228 }, { "epoch": 0.5303215827801766, "grad_norm": 0.274345728826985, "learning_rate": 5.223057644110276e-06, "loss": 0.0006, "step": 8229 }, { "epoch": 0.5303860282271058, "grad_norm": 0.005449729470843885, "learning_rate": 5.2223415682062305e-06, "loss": 0.0, "step": 8230 }, { "epoch": 0.530450473674035, "grad_norm": 0.0013428693602989992, "learning_rate": 5.221625492302185e-06, "loss": 0.0, "step": 8231 }, { "epoch": 0.5305149191209642, "grad_norm": 0.025936283526849124, "learning_rate": 5.220909416398139e-06, "loss": 0.0, "step": 8232 }, { "epoch": 0.5305793645678932, "grad_norm": 0.015376020230364216, "learning_rate": 5.220193340494093e-06, "loss": 0.0001, "step": 8233 }, { "epoch": 0.5306438100148224, "grad_norm": 0.00035719175370790775, "learning_rate": 5.219477264590047e-06, "loss": 0.0, "step": 8234 }, { "epoch": 0.5307082554617516, "grad_norm": 0.6565876046945677, "learning_rate": 5.218761188686001e-06, "loss": 0.005, "step": 8235 }, { "epoch": 0.5307727009086808, "grad_norm": 0.011372042509931924, "learning_rate": 5.218045112781955e-06, "loss": 0.0, "step": 8236 }, { "epoch": 0.53083714635561, "grad_norm": 0.002884196527175304, "learning_rate": 5.21732903687791e-06, "loss": 0.0, "step": 8237 }, { "epoch": 0.5309015918025392, "grad_norm": 0.0003135085556914037, "learning_rate": 5.216612960973864e-06, "loss": 0.0, "step": 8238 }, { "epoch": 0.5309660372494683, "grad_norm": 0.01590281361846434, "learning_rate": 5.2158968850698175e-06, "loss": 0.0002, "step": 8239 }, { "epoch": 0.5310304826963975, "grad_norm": 0.011128835166307135, "learning_rate": 5.215180809165772e-06, "loss": 0.0, "step": 8240 }, { "epoch": 0.5310949281433267, "grad_norm": 0.08628985929142845, "learning_rate": 5.214464733261726e-06, "loss": 0.0004, "step": 8241 }, { "epoch": 0.5311593735902559, "grad_norm": 0.0002728034723986173, "learning_rate": 5.21374865735768e-06, "loss": 0.0, "step": 8242 }, { "epoch": 0.5312238190371851, "grad_norm": 0.0019110246639428413, "learning_rate": 5.213032581453634e-06, "loss": 0.0, "step": 8243 }, { "epoch": 0.5312882644841141, "grad_norm": 0.12502401836431717, "learning_rate": 5.212316505549588e-06, "loss": 0.0056, "step": 8244 }, { "epoch": 0.5313527099310433, "grad_norm": 0.0008021568203203074, "learning_rate": 5.211600429645542e-06, "loss": 0.0, "step": 8245 }, { "epoch": 0.5314171553779725, "grad_norm": 0.012489563526513548, "learning_rate": 5.210884353741497e-06, "loss": 0.0, "step": 8246 }, { "epoch": 0.5314816008249017, "grad_norm": 0.566265930684311, "learning_rate": 5.210168277837451e-06, "loss": 0.002, "step": 8247 }, { "epoch": 0.5315460462718309, "grad_norm": 0.16108968909072638, "learning_rate": 5.2094522019334045e-06, "loss": 0.0002, "step": 8248 }, { "epoch": 0.5316104917187601, "grad_norm": 0.0007321626886337238, "learning_rate": 5.2087361260293605e-06, "loss": 0.0, "step": 8249 }, { "epoch": 0.5316749371656893, "grad_norm": 0.10953813121065938, "learning_rate": 5.208020050125314e-06, "loss": 0.0001, "step": 8250 }, { "epoch": 0.5317393826126184, "grad_norm": 0.007629119868102849, "learning_rate": 5.207303974221268e-06, "loss": 0.0, "step": 8251 }, { "epoch": 0.5318038280595476, "grad_norm": 0.0022120117547238275, "learning_rate": 5.2065878983172225e-06, "loss": 0.0, "step": 8252 }, { "epoch": 0.5318682735064768, "grad_norm": 9.754262784845069e-05, "learning_rate": 5.205871822413177e-06, "loss": 0.0, "step": 8253 }, { "epoch": 0.531932718953406, "grad_norm": 0.03304436715002747, "learning_rate": 5.205155746509131e-06, "loss": 0.0, "step": 8254 }, { "epoch": 0.5319971644003351, "grad_norm": 0.002980140074258487, "learning_rate": 5.2044396706050846e-06, "loss": 0.0, "step": 8255 }, { "epoch": 0.5320616098472642, "grad_norm": 0.00030013782604176344, "learning_rate": 5.203723594701039e-06, "loss": 0.0, "step": 8256 }, { "epoch": 0.5321260552941934, "grad_norm": 0.0005443334783101526, "learning_rate": 5.203007518796993e-06, "loss": 0.0, "step": 8257 }, { "epoch": 0.5321905007411226, "grad_norm": 0.03163681132386296, "learning_rate": 5.2022914428929474e-06, "loss": 0.0001, "step": 8258 }, { "epoch": 0.5322549461880518, "grad_norm": 0.04303355504829882, "learning_rate": 5.201575366988901e-06, "loss": 0.0001, "step": 8259 }, { "epoch": 0.532319391634981, "grad_norm": 0.17966033896124106, "learning_rate": 5.200859291084855e-06, "loss": 0.0001, "step": 8260 }, { "epoch": 0.5323838370819102, "grad_norm": 0.001579268255010264, "learning_rate": 5.2001432151808095e-06, "loss": 0.0, "step": 8261 }, { "epoch": 0.5324482825288394, "grad_norm": 0.0009424939141334193, "learning_rate": 5.199427139276764e-06, "loss": 0.0, "step": 8262 }, { "epoch": 0.5325127279757685, "grad_norm": 0.0009968826784912033, "learning_rate": 5.198711063372718e-06, "loss": 0.0, "step": 8263 }, { "epoch": 0.5325771734226977, "grad_norm": 0.008419342363879062, "learning_rate": 5.1979949874686715e-06, "loss": 0.0001, "step": 8264 }, { "epoch": 0.5326416188696269, "grad_norm": 0.003526839644485888, "learning_rate": 5.197278911564626e-06, "loss": 0.0, "step": 8265 }, { "epoch": 0.532706064316556, "grad_norm": 0.0004590648824850025, "learning_rate": 5.19656283566058e-06, "loss": 0.0, "step": 8266 }, { "epoch": 0.5327705097634852, "grad_norm": 0.3312528124276014, "learning_rate": 5.1958467597565344e-06, "loss": 0.0009, "step": 8267 }, { "epoch": 0.5328349552104144, "grad_norm": 0.018709853089610762, "learning_rate": 5.195130683852489e-06, "loss": 0.0001, "step": 8268 }, { "epoch": 0.5328994006573435, "grad_norm": 0.014009747720029147, "learning_rate": 5.194414607948442e-06, "loss": 0.0, "step": 8269 }, { "epoch": 0.5329638461042727, "grad_norm": 0.0016731185805280674, "learning_rate": 5.1936985320443965e-06, "loss": 0.0, "step": 8270 }, { "epoch": 0.5330282915512019, "grad_norm": 0.0074413456102401035, "learning_rate": 5.192982456140351e-06, "loss": 0.0, "step": 8271 }, { "epoch": 0.5330927369981311, "grad_norm": 0.2735715265142258, "learning_rate": 5.192266380236306e-06, "loss": 0.0021, "step": 8272 }, { "epoch": 0.5331571824450603, "grad_norm": 0.07160542783589822, "learning_rate": 5.19155030433226e-06, "loss": 0.0008, "step": 8273 }, { "epoch": 0.5332216278919895, "grad_norm": 0.00013488290769220058, "learning_rate": 5.1908342284282145e-06, "loss": 0.0, "step": 8274 }, { "epoch": 0.5332860733389186, "grad_norm": 2.876226943198003e-05, "learning_rate": 5.190118152524168e-06, "loss": 0.0, "step": 8275 }, { "epoch": 0.5333505187858478, "grad_norm": 0.0012743140065523632, "learning_rate": 5.189402076620122e-06, "loss": 0.0, "step": 8276 }, { "epoch": 0.5334149642327769, "grad_norm": 0.004809027917468691, "learning_rate": 5.188686000716077e-06, "loss": 0.0, "step": 8277 }, { "epoch": 0.5334794096797061, "grad_norm": 0.003701628229133183, "learning_rate": 5.187969924812031e-06, "loss": 0.0, "step": 8278 }, { "epoch": 0.5335438551266353, "grad_norm": 0.00039285142383473234, "learning_rate": 5.187253848907985e-06, "loss": 0.0, "step": 8279 }, { "epoch": 0.5336083005735645, "grad_norm": 0.0047367452853752864, "learning_rate": 5.186537773003939e-06, "loss": 0.0, "step": 8280 }, { "epoch": 0.5336727460204936, "grad_norm": 0.003703470161721052, "learning_rate": 5.185821697099893e-06, "loss": 0.0, "step": 8281 }, { "epoch": 0.5337371914674228, "grad_norm": 4.95614077371797e-05, "learning_rate": 5.185105621195847e-06, "loss": 0.0, "step": 8282 }, { "epoch": 0.533801636914352, "grad_norm": 3.902805141580079e-05, "learning_rate": 5.1843895452918015e-06, "loss": 0.0, "step": 8283 }, { "epoch": 0.5338660823612812, "grad_norm": 0.2618037697781461, "learning_rate": 5.183673469387756e-06, "loss": 0.0004, "step": 8284 }, { "epoch": 0.5339305278082104, "grad_norm": 0.10652270259820498, "learning_rate": 5.182957393483709e-06, "loss": 0.0002, "step": 8285 }, { "epoch": 0.5339949732551396, "grad_norm": 0.05615630343200612, "learning_rate": 5.182241317579664e-06, "loss": 0.0001, "step": 8286 }, { "epoch": 0.5340594187020687, "grad_norm": 8.943660066176195e-05, "learning_rate": 5.181525241675618e-06, "loss": 0.0, "step": 8287 }, { "epoch": 0.5341238641489978, "grad_norm": 0.00048379422864961943, "learning_rate": 5.180809165771572e-06, "loss": 0.0, "step": 8288 }, { "epoch": 0.534188309595927, "grad_norm": 0.0003209484811010145, "learning_rate": 5.1800930898675265e-06, "loss": 0.0, "step": 8289 }, { "epoch": 0.5342527550428562, "grad_norm": 0.000557616408584314, "learning_rate": 5.17937701396348e-06, "loss": 0.0, "step": 8290 }, { "epoch": 0.5343172004897854, "grad_norm": 0.06915313841843637, "learning_rate": 5.178660938059434e-06, "loss": 0.0001, "step": 8291 }, { "epoch": 0.5343816459367146, "grad_norm": 7.130504738155417e-05, "learning_rate": 5.1779448621553885e-06, "loss": 0.0, "step": 8292 }, { "epoch": 0.5344460913836437, "grad_norm": 0.026762604545379487, "learning_rate": 5.177228786251343e-06, "loss": 0.0002, "step": 8293 }, { "epoch": 0.5345105368305729, "grad_norm": 0.1844287713085006, "learning_rate": 5.176512710347297e-06, "loss": 0.0007, "step": 8294 }, { "epoch": 0.5345749822775021, "grad_norm": 0.006867867960209021, "learning_rate": 5.1757966344432506e-06, "loss": 0.0, "step": 8295 }, { "epoch": 0.5346394277244313, "grad_norm": 0.03740969652855573, "learning_rate": 5.175080558539206e-06, "loss": 0.0002, "step": 8296 }, { "epoch": 0.5347038731713605, "grad_norm": 0.0006198577460679115, "learning_rate": 5.17436448263516e-06, "loss": 0.0, "step": 8297 }, { "epoch": 0.5347683186182897, "grad_norm": 0.0001554785987894876, "learning_rate": 5.173648406731114e-06, "loss": 0.0, "step": 8298 }, { "epoch": 0.5348327640652188, "grad_norm": 0.0020449257013498948, "learning_rate": 5.172932330827069e-06, "loss": 0.0, "step": 8299 }, { "epoch": 0.5348972095121479, "grad_norm": 9.068199803593524e-05, "learning_rate": 5.172216254923023e-06, "loss": 0.0, "step": 8300 }, { "epoch": 0.5349616549590771, "grad_norm": 0.0018346562331147103, "learning_rate": 5.171500179018976e-06, "loss": 0.0, "step": 8301 }, { "epoch": 0.5350261004060063, "grad_norm": 0.06669650386462536, "learning_rate": 5.170784103114931e-06, "loss": 0.0005, "step": 8302 }, { "epoch": 0.5350905458529355, "grad_norm": 3.8839867272744435, "learning_rate": 5.170068027210885e-06, "loss": 0.0224, "step": 8303 }, { "epoch": 0.5351549912998647, "grad_norm": 0.0004139125755270844, "learning_rate": 5.169351951306839e-06, "loss": 0.0, "step": 8304 }, { "epoch": 0.5352194367467938, "grad_norm": 0.23567786488833725, "learning_rate": 5.1686358754027936e-06, "loss": 0.0005, "step": 8305 }, { "epoch": 0.535283882193723, "grad_norm": 0.0032149324484725223, "learning_rate": 5.167919799498747e-06, "loss": 0.0, "step": 8306 }, { "epoch": 0.5353483276406522, "grad_norm": 0.001745209537508723, "learning_rate": 5.167203723594701e-06, "loss": 0.0, "step": 8307 }, { "epoch": 0.5354127730875814, "grad_norm": 0.0003493430009983679, "learning_rate": 5.166487647690656e-06, "loss": 0.0, "step": 8308 }, { "epoch": 0.5354772185345106, "grad_norm": 0.0006592212059325943, "learning_rate": 5.16577157178661e-06, "loss": 0.0, "step": 8309 }, { "epoch": 0.5355416639814398, "grad_norm": 0.009202076173033253, "learning_rate": 5.165055495882564e-06, "loss": 0.0001, "step": 8310 }, { "epoch": 0.5356061094283688, "grad_norm": 0.002701766323453557, "learning_rate": 5.164339419978518e-06, "loss": 0.0, "step": 8311 }, { "epoch": 0.535670554875298, "grad_norm": 0.0005170194706688815, "learning_rate": 5.163623344074472e-06, "loss": 0.0, "step": 8312 }, { "epoch": 0.5357350003222272, "grad_norm": 0.12185486281755308, "learning_rate": 5.162907268170426e-06, "loss": 0.0001, "step": 8313 }, { "epoch": 0.5357994457691564, "grad_norm": 0.00047911325761194646, "learning_rate": 5.1621911922663806e-06, "loss": 0.0, "step": 8314 }, { "epoch": 0.5358638912160856, "grad_norm": 0.0002798710818400487, "learning_rate": 5.161475116362335e-06, "loss": 0.0, "step": 8315 }, { "epoch": 0.5359283366630148, "grad_norm": 0.01226373081079961, "learning_rate": 5.160759040458288e-06, "loss": 0.0001, "step": 8316 }, { "epoch": 0.5359927821099439, "grad_norm": 0.00038180829833394726, "learning_rate": 5.160042964554243e-06, "loss": 0.0, "step": 8317 }, { "epoch": 0.5360572275568731, "grad_norm": 4.5729150975741886e-05, "learning_rate": 5.159326888650197e-06, "loss": 0.0, "step": 8318 }, { "epoch": 0.5361216730038023, "grad_norm": 0.7364370331298571, "learning_rate": 5.158610812746152e-06, "loss": 0.0017, "step": 8319 }, { "epoch": 0.5361861184507315, "grad_norm": 0.0003008430119444691, "learning_rate": 5.157894736842106e-06, "loss": 0.0, "step": 8320 }, { "epoch": 0.5362505638976607, "grad_norm": 0.0008920872326398447, "learning_rate": 5.157178660938061e-06, "loss": 0.0, "step": 8321 }, { "epoch": 0.5363150093445898, "grad_norm": 0.030279863544600275, "learning_rate": 5.156462585034014e-06, "loss": 0.0002, "step": 8322 }, { "epoch": 0.5363794547915189, "grad_norm": 0.00022730484292783038, "learning_rate": 5.155746509129968e-06, "loss": 0.0, "step": 8323 }, { "epoch": 0.5364439002384481, "grad_norm": 6.012959763087097e-05, "learning_rate": 5.155030433225923e-06, "loss": 0.0, "step": 8324 }, { "epoch": 0.5365083456853773, "grad_norm": 0.004307225892368384, "learning_rate": 5.154314357321877e-06, "loss": 0.0, "step": 8325 }, { "epoch": 0.5365727911323065, "grad_norm": 0.14500555619742644, "learning_rate": 5.153598281417831e-06, "loss": 0.0013, "step": 8326 }, { "epoch": 0.5366372365792357, "grad_norm": 0.003633286377291637, "learning_rate": 5.152882205513785e-06, "loss": 0.0, "step": 8327 }, { "epoch": 0.5367016820261649, "grad_norm": 0.0030499388817333785, "learning_rate": 5.152166129609739e-06, "loss": 0.0, "step": 8328 }, { "epoch": 0.536766127473094, "grad_norm": 0.003460639557092233, "learning_rate": 5.151450053705693e-06, "loss": 0.0, "step": 8329 }, { "epoch": 0.5368305729200232, "grad_norm": 0.0654827941475252, "learning_rate": 5.150733977801648e-06, "loss": 0.0002, "step": 8330 }, { "epoch": 0.5368950183669524, "grad_norm": 0.0006291733494983527, "learning_rate": 5.150017901897602e-06, "loss": 0.0, "step": 8331 }, { "epoch": 0.5369594638138816, "grad_norm": 0.004459620943301213, "learning_rate": 5.149301825993555e-06, "loss": 0.0001, "step": 8332 }, { "epoch": 0.5370239092608107, "grad_norm": 0.00040403846333492225, "learning_rate": 5.14858575008951e-06, "loss": 0.0, "step": 8333 }, { "epoch": 0.5370883547077399, "grad_norm": 0.0007446600423081939, "learning_rate": 5.147869674185464e-06, "loss": 0.0, "step": 8334 }, { "epoch": 0.537152800154669, "grad_norm": 0.0001650185458354627, "learning_rate": 5.147153598281418e-06, "loss": 0.0, "step": 8335 }, { "epoch": 0.5372172456015982, "grad_norm": 1.7263584494515336e-05, "learning_rate": 5.146437522377372e-06, "loss": 0.0, "step": 8336 }, { "epoch": 0.5372816910485274, "grad_norm": 0.002230091483141834, "learning_rate": 5.145721446473326e-06, "loss": 0.0, "step": 8337 }, { "epoch": 0.5373461364954566, "grad_norm": 0.0005651914951587413, "learning_rate": 5.14500537056928e-06, "loss": 0.0, "step": 8338 }, { "epoch": 0.5374105819423858, "grad_norm": 0.00022583757860816512, "learning_rate": 5.144289294665235e-06, "loss": 0.0, "step": 8339 }, { "epoch": 0.537475027389315, "grad_norm": 0.170381111795957, "learning_rate": 5.143573218761189e-06, "loss": 0.0004, "step": 8340 }, { "epoch": 0.5375394728362441, "grad_norm": 0.002303433150644348, "learning_rate": 5.142857142857142e-06, "loss": 0.0, "step": 8341 }, { "epoch": 0.5376039182831733, "grad_norm": 0.002125040189832157, "learning_rate": 5.142141066953097e-06, "loss": 0.0, "step": 8342 }, { "epoch": 0.5376683637301025, "grad_norm": 0.00013921987677024776, "learning_rate": 5.141424991049052e-06, "loss": 0.0, "step": 8343 }, { "epoch": 0.5377328091770316, "grad_norm": 0.00022435381485150807, "learning_rate": 5.140708915145006e-06, "loss": 0.0, "step": 8344 }, { "epoch": 0.5377972546239608, "grad_norm": 0.003466804477732005, "learning_rate": 5.1399928392409604e-06, "loss": 0.0, "step": 8345 }, { "epoch": 0.53786170007089, "grad_norm": 0.00026267158997037504, "learning_rate": 5.139276763336915e-06, "loss": 0.0, "step": 8346 }, { "epoch": 0.5379261455178191, "grad_norm": 0.0022633420166437274, "learning_rate": 5.138560687432869e-06, "loss": 0.0, "step": 8347 }, { "epoch": 0.5379905909647483, "grad_norm": 0.025317271810819834, "learning_rate": 5.1378446115288225e-06, "loss": 0.0001, "step": 8348 }, { "epoch": 0.5380550364116775, "grad_norm": 0.000673680858811099, "learning_rate": 5.137128535624777e-06, "loss": 0.0, "step": 8349 }, { "epoch": 0.5381194818586067, "grad_norm": 0.0018941198791382194, "learning_rate": 5.136412459720731e-06, "loss": 0.0, "step": 8350 }, { "epoch": 0.5381839273055359, "grad_norm": 0.5314760288276836, "learning_rate": 5.135696383816685e-06, "loss": 0.0032, "step": 8351 }, { "epoch": 0.5382483727524651, "grad_norm": 0.010573634830043211, "learning_rate": 5.134980307912639e-06, "loss": 0.0001, "step": 8352 }, { "epoch": 0.5383128181993942, "grad_norm": 0.00018203131654043764, "learning_rate": 5.134264232008593e-06, "loss": 0.0, "step": 8353 }, { "epoch": 0.5383772636463234, "grad_norm": 1.1062119213382617, "learning_rate": 5.133548156104547e-06, "loss": 0.0072, "step": 8354 }, { "epoch": 0.5384417090932525, "grad_norm": 0.010292150074225268, "learning_rate": 5.132832080200502e-06, "loss": 0.0, "step": 8355 }, { "epoch": 0.5385061545401817, "grad_norm": 0.011232534638379256, "learning_rate": 5.132116004296456e-06, "loss": 0.0001, "step": 8356 }, { "epoch": 0.5385705999871109, "grad_norm": 0.006481248957277151, "learning_rate": 5.1313999283924095e-06, "loss": 0.0, "step": 8357 }, { "epoch": 0.5386350454340401, "grad_norm": 0.0103300801049018, "learning_rate": 5.130683852488364e-06, "loss": 0.0, "step": 8358 }, { "epoch": 0.5386994908809692, "grad_norm": 0.0007205325030744387, "learning_rate": 5.129967776584318e-06, "loss": 0.0, "step": 8359 }, { "epoch": 0.5387639363278984, "grad_norm": 0.001971069018050957, "learning_rate": 5.129251700680272e-06, "loss": 0.0, "step": 8360 }, { "epoch": 0.5388283817748276, "grad_norm": 0.00025740076583384244, "learning_rate": 5.128535624776227e-06, "loss": 0.0, "step": 8361 }, { "epoch": 0.5388928272217568, "grad_norm": 0.004754505978913037, "learning_rate": 5.12781954887218e-06, "loss": 0.0001, "step": 8362 }, { "epoch": 0.538957272668686, "grad_norm": 6.622904868150729e-05, "learning_rate": 5.127103472968134e-06, "loss": 0.0, "step": 8363 }, { "epoch": 0.5390217181156152, "grad_norm": 0.3479271808147232, "learning_rate": 5.126387397064089e-06, "loss": 0.0217, "step": 8364 }, { "epoch": 0.5390861635625444, "grad_norm": 8.444964625094108e-05, "learning_rate": 5.125671321160043e-06, "loss": 0.0, "step": 8365 }, { "epoch": 0.5391506090094735, "grad_norm": 0.00021665471768633301, "learning_rate": 5.124955245255998e-06, "loss": 0.0, "step": 8366 }, { "epoch": 0.5392150544564026, "grad_norm": 0.00021705941240832152, "learning_rate": 5.1242391693519525e-06, "loss": 0.0, "step": 8367 }, { "epoch": 0.5392794999033318, "grad_norm": 0.05381334252700333, "learning_rate": 5.123523093447906e-06, "loss": 0.0002, "step": 8368 }, { "epoch": 0.539343945350261, "grad_norm": 0.0011084411287508335, "learning_rate": 5.12280701754386e-06, "loss": 0.0, "step": 8369 }, { "epoch": 0.5394083907971902, "grad_norm": 0.002692409573749917, "learning_rate": 5.1220909416398145e-06, "loss": 0.0, "step": 8370 }, { "epoch": 0.5394728362441193, "grad_norm": 0.0019404386497965766, "learning_rate": 5.121374865735769e-06, "loss": 0.0, "step": 8371 }, { "epoch": 0.5395372816910485, "grad_norm": 0.000830474063169443, "learning_rate": 5.120658789831723e-06, "loss": 0.0, "step": 8372 }, { "epoch": 0.5396017271379777, "grad_norm": 0.0001285680201385665, "learning_rate": 5.1199427139276766e-06, "loss": 0.0, "step": 8373 }, { "epoch": 0.5396661725849069, "grad_norm": 1.688643150650448e-05, "learning_rate": 5.119226638023631e-06, "loss": 0.0, "step": 8374 }, { "epoch": 0.5397306180318361, "grad_norm": 0.010633813313598334, "learning_rate": 5.118510562119585e-06, "loss": 0.0002, "step": 8375 }, { "epoch": 0.5397950634787653, "grad_norm": 0.2513697768569286, "learning_rate": 5.1177944862155394e-06, "loss": 0.0004, "step": 8376 }, { "epoch": 0.5398595089256945, "grad_norm": 0.011243210437085592, "learning_rate": 5.117078410311494e-06, "loss": 0.0, "step": 8377 }, { "epoch": 0.5399239543726235, "grad_norm": 0.0634902939249464, "learning_rate": 5.116362334407447e-06, "loss": 0.0001, "step": 8378 }, { "epoch": 0.5399883998195527, "grad_norm": 0.009778168936793704, "learning_rate": 5.1156462585034015e-06, "loss": 0.0001, "step": 8379 }, { "epoch": 0.5400528452664819, "grad_norm": 4.753302020719344e-05, "learning_rate": 5.114930182599356e-06, "loss": 0.0, "step": 8380 }, { "epoch": 0.5401172907134111, "grad_norm": 0.0010928865291910395, "learning_rate": 5.11421410669531e-06, "loss": 0.0, "step": 8381 }, { "epoch": 0.5401817361603403, "grad_norm": 6.587222646677347e-06, "learning_rate": 5.113498030791264e-06, "loss": 0.0, "step": 8382 }, { "epoch": 0.5402461816072694, "grad_norm": 0.0003763542620900478, "learning_rate": 5.112781954887218e-06, "loss": 0.0, "step": 8383 }, { "epoch": 0.5403106270541986, "grad_norm": 0.0005508190320432071, "learning_rate": 5.112065878983172e-06, "loss": 0.0, "step": 8384 }, { "epoch": 0.5403750725011278, "grad_norm": 0.0001657109248957889, "learning_rate": 5.1113498030791264e-06, "loss": 0.0, "step": 8385 }, { "epoch": 0.540439517948057, "grad_norm": 0.000585394327920387, "learning_rate": 5.110633727175081e-06, "loss": 0.0, "step": 8386 }, { "epoch": 0.5405039633949862, "grad_norm": 0.12383700047162453, "learning_rate": 5.109917651271035e-06, "loss": 0.0003, "step": 8387 }, { "epoch": 0.5405684088419154, "grad_norm": 0.0012521166326916959, "learning_rate": 5.1092015753669885e-06, "loss": 0.0, "step": 8388 }, { "epoch": 0.5406328542888444, "grad_norm": 0.024399225972084052, "learning_rate": 5.108485499462944e-06, "loss": 0.0002, "step": 8389 }, { "epoch": 0.5406972997357736, "grad_norm": 0.0009896365504723618, "learning_rate": 5.107769423558898e-06, "loss": 0.0, "step": 8390 }, { "epoch": 0.5407617451827028, "grad_norm": 0.41062065354460814, "learning_rate": 5.107053347654852e-06, "loss": 0.003, "step": 8391 }, { "epoch": 0.540826190629632, "grad_norm": 0.00020028325207377276, "learning_rate": 5.1063372717508065e-06, "loss": 0.0, "step": 8392 }, { "epoch": 0.5408906360765612, "grad_norm": 0.0021295931855767553, "learning_rate": 5.105621195846761e-06, "loss": 0.0, "step": 8393 }, { "epoch": 0.5409550815234904, "grad_norm": 0.003080648669282957, "learning_rate": 5.104905119942714e-06, "loss": 0.0, "step": 8394 }, { "epoch": 0.5410195269704196, "grad_norm": 0.037808258822707816, "learning_rate": 5.104189044038669e-06, "loss": 0.0001, "step": 8395 }, { "epoch": 0.5410839724173487, "grad_norm": 1.003356723819026, "learning_rate": 5.103472968134623e-06, "loss": 0.006, "step": 8396 }, { "epoch": 0.5411484178642779, "grad_norm": 1.2780228888800633, "learning_rate": 5.102756892230577e-06, "loss": 0.0034, "step": 8397 }, { "epoch": 0.5412128633112071, "grad_norm": 0.006901590997431245, "learning_rate": 5.1020408163265315e-06, "loss": 0.0, "step": 8398 }, { "epoch": 0.5412773087581363, "grad_norm": 0.020858812785563983, "learning_rate": 5.101324740422485e-06, "loss": 0.0001, "step": 8399 }, { "epoch": 0.5413417542050654, "grad_norm": 0.0006388117841880102, "learning_rate": 5.100608664518439e-06, "loss": 0.0, "step": 8400 }, { "epoch": 0.5414061996519945, "grad_norm": 0.09992779997527523, "learning_rate": 5.0998925886143935e-06, "loss": 0.0002, "step": 8401 }, { "epoch": 0.5414706450989237, "grad_norm": 0.01728350633489314, "learning_rate": 5.099176512710348e-06, "loss": 0.0, "step": 8402 }, { "epoch": 0.5415350905458529, "grad_norm": 0.010191441552771597, "learning_rate": 5.098460436806302e-06, "loss": 0.0001, "step": 8403 }, { "epoch": 0.5415995359927821, "grad_norm": 0.0006010007948885225, "learning_rate": 5.097744360902256e-06, "loss": 0.0, "step": 8404 }, { "epoch": 0.5416639814397113, "grad_norm": 0.002163450281916386, "learning_rate": 5.09702828499821e-06, "loss": 0.0, "step": 8405 }, { "epoch": 0.5417284268866405, "grad_norm": 0.2445328803815511, "learning_rate": 5.096312209094164e-06, "loss": 0.0004, "step": 8406 }, { "epoch": 0.5417928723335697, "grad_norm": 0.00222522598216728, "learning_rate": 5.0955961331901185e-06, "loss": 0.0, "step": 8407 }, { "epoch": 0.5418573177804988, "grad_norm": 0.016346940554171265, "learning_rate": 5.094880057286073e-06, "loss": 0.0, "step": 8408 }, { "epoch": 0.541921763227428, "grad_norm": 0.0017265815881555501, "learning_rate": 5.094163981382026e-06, "loss": 0.0, "step": 8409 }, { "epoch": 0.5419862086743572, "grad_norm": 0.002953750292921564, "learning_rate": 5.0934479054779805e-06, "loss": 0.0, "step": 8410 }, { "epoch": 0.5420506541212863, "grad_norm": 0.0017401369012034575, "learning_rate": 5.092731829573935e-06, "loss": 0.0, "step": 8411 }, { "epoch": 0.5421150995682155, "grad_norm": 0.17858228586602923, "learning_rate": 5.092015753669889e-06, "loss": 0.0002, "step": 8412 }, { "epoch": 0.5421795450151446, "grad_norm": 0.07280302949534724, "learning_rate": 5.091299677765844e-06, "loss": 0.0003, "step": 8413 }, { "epoch": 0.5422439904620738, "grad_norm": 0.02793595399615248, "learning_rate": 5.0905836018617986e-06, "loss": 0.0, "step": 8414 }, { "epoch": 0.542308435909003, "grad_norm": 0.07168111059143023, "learning_rate": 5.089867525957752e-06, "loss": 0.0002, "step": 8415 }, { "epoch": 0.5423728813559322, "grad_norm": 0.013141592770366813, "learning_rate": 5.089151450053706e-06, "loss": 0.0, "step": 8416 }, { "epoch": 0.5424373268028614, "grad_norm": 0.008050688347397276, "learning_rate": 5.088435374149661e-06, "loss": 0.0, "step": 8417 }, { "epoch": 0.5425017722497906, "grad_norm": 0.01882217632422738, "learning_rate": 5.087719298245615e-06, "loss": 0.0, "step": 8418 }, { "epoch": 0.5425662176967198, "grad_norm": 0.0018837017851688694, "learning_rate": 5.087003222341569e-06, "loss": 0.0, "step": 8419 }, { "epoch": 0.5426306631436489, "grad_norm": 0.0027659480170431224, "learning_rate": 5.086287146437523e-06, "loss": 0.0, "step": 8420 }, { "epoch": 0.5426951085905781, "grad_norm": 0.02249851271354615, "learning_rate": 5.085571070533477e-06, "loss": 0.0, "step": 8421 }, { "epoch": 0.5427595540375072, "grad_norm": 0.24873905024797605, "learning_rate": 5.084854994629431e-06, "loss": 0.0004, "step": 8422 }, { "epoch": 0.5428239994844364, "grad_norm": 0.0633032486789857, "learning_rate": 5.0841389187253856e-06, "loss": 0.0003, "step": 8423 }, { "epoch": 0.5428884449313656, "grad_norm": 0.00014077413343572848, "learning_rate": 5.08342284282134e-06, "loss": 0.0, "step": 8424 }, { "epoch": 0.5429528903782948, "grad_norm": 0.16562388707632744, "learning_rate": 5.082706766917293e-06, "loss": 0.0004, "step": 8425 }, { "epoch": 0.5430173358252239, "grad_norm": 0.020381830748196907, "learning_rate": 5.081990691013248e-06, "loss": 0.0001, "step": 8426 }, { "epoch": 0.5430817812721531, "grad_norm": 0.0020281902919562383, "learning_rate": 5.081274615109202e-06, "loss": 0.0, "step": 8427 }, { "epoch": 0.5431462267190823, "grad_norm": 0.0002616038828169522, "learning_rate": 5.080558539205156e-06, "loss": 0.0, "step": 8428 }, { "epoch": 0.5432106721660115, "grad_norm": 0.043275989112033196, "learning_rate": 5.0798424633011105e-06, "loss": 0.0002, "step": 8429 }, { "epoch": 0.5432751176129407, "grad_norm": 0.00038925996925701165, "learning_rate": 5.079126387397064e-06, "loss": 0.0, "step": 8430 }, { "epoch": 0.5433395630598699, "grad_norm": 0.1549475627115091, "learning_rate": 5.078410311493018e-06, "loss": 0.0007, "step": 8431 }, { "epoch": 0.543404008506799, "grad_norm": 0.22845921268417974, "learning_rate": 5.0776942355889726e-06, "loss": 0.0004, "step": 8432 }, { "epoch": 0.5434684539537281, "grad_norm": 0.02679208840814703, "learning_rate": 5.076978159684927e-06, "loss": 0.0001, "step": 8433 }, { "epoch": 0.5435328994006573, "grad_norm": 0.004714868407184482, "learning_rate": 5.07626208378088e-06, "loss": 0.0, "step": 8434 }, { "epoch": 0.5435973448475865, "grad_norm": 0.00257031806453596, "learning_rate": 5.075546007876835e-06, "loss": 0.0, "step": 8435 }, { "epoch": 0.5436617902945157, "grad_norm": 0.021800592707464768, "learning_rate": 5.07482993197279e-06, "loss": 0.0, "step": 8436 }, { "epoch": 0.5437262357414449, "grad_norm": 0.006127748959277537, "learning_rate": 5.074113856068744e-06, "loss": 0.0, "step": 8437 }, { "epoch": 0.543790681188374, "grad_norm": 0.6883689850433048, "learning_rate": 5.073397780164698e-06, "loss": 0.0046, "step": 8438 }, { "epoch": 0.5438551266353032, "grad_norm": 0.001150318426266875, "learning_rate": 5.072681704260653e-06, "loss": 0.0, "step": 8439 }, { "epoch": 0.5439195720822324, "grad_norm": 0.002072532651337089, "learning_rate": 5.071965628356607e-06, "loss": 0.0, "step": 8440 }, { "epoch": 0.5439840175291616, "grad_norm": 0.0002500489370783692, "learning_rate": 5.07124955245256e-06, "loss": 0.0, "step": 8441 }, { "epoch": 0.5440484629760908, "grad_norm": 0.3168147475990133, "learning_rate": 5.070533476548515e-06, "loss": 0.0007, "step": 8442 }, { "epoch": 0.54411290842302, "grad_norm": 0.8963605674738248, "learning_rate": 5.069817400644469e-06, "loss": 0.0054, "step": 8443 }, { "epoch": 0.5441773538699491, "grad_norm": 0.001800277014813923, "learning_rate": 5.069101324740423e-06, "loss": 0.0, "step": 8444 }, { "epoch": 0.5442417993168782, "grad_norm": 0.010620852971074788, "learning_rate": 5.068385248836378e-06, "loss": 0.0, "step": 8445 }, { "epoch": 0.5443062447638074, "grad_norm": 0.012435870960374875, "learning_rate": 5.067669172932331e-06, "loss": 0.0, "step": 8446 }, { "epoch": 0.5443706902107366, "grad_norm": 0.016274537573299074, "learning_rate": 5.066953097028285e-06, "loss": 0.0002, "step": 8447 }, { "epoch": 0.5444351356576658, "grad_norm": 0.046486270889920786, "learning_rate": 5.06623702112424e-06, "loss": 0.0, "step": 8448 }, { "epoch": 0.544499581104595, "grad_norm": 0.2916972351517615, "learning_rate": 5.065520945220194e-06, "loss": 0.0006, "step": 8449 }, { "epoch": 0.5445640265515241, "grad_norm": 0.006940782184560781, "learning_rate": 5.064804869316147e-06, "loss": 0.0, "step": 8450 }, { "epoch": 0.5446284719984533, "grad_norm": 0.016796576003806782, "learning_rate": 5.064088793412102e-06, "loss": 0.0, "step": 8451 }, { "epoch": 0.5446929174453825, "grad_norm": 0.1291454725284565, "learning_rate": 5.063372717508056e-06, "loss": 0.0004, "step": 8452 }, { "epoch": 0.5447573628923117, "grad_norm": 0.21752034007255436, "learning_rate": 5.06265664160401e-06, "loss": 0.0037, "step": 8453 }, { "epoch": 0.5448218083392409, "grad_norm": 0.21748016693135402, "learning_rate": 5.061940565699965e-06, "loss": 0.0007, "step": 8454 }, { "epoch": 0.5448862537861701, "grad_norm": 0.013149752274337866, "learning_rate": 5.061224489795918e-06, "loss": 0.0, "step": 8455 }, { "epoch": 0.5449506992330991, "grad_norm": 0.00546176216623649, "learning_rate": 5.060508413891872e-06, "loss": 0.0, "step": 8456 }, { "epoch": 0.5450151446800283, "grad_norm": 0.1679995530738449, "learning_rate": 5.059792337987827e-06, "loss": 0.0003, "step": 8457 }, { "epoch": 0.5450795901269575, "grad_norm": 0.005963703038636505, "learning_rate": 5.059076262083781e-06, "loss": 0.0, "step": 8458 }, { "epoch": 0.5451440355738867, "grad_norm": 0.0013005704425181387, "learning_rate": 5.058360186179736e-06, "loss": 0.0, "step": 8459 }, { "epoch": 0.5452084810208159, "grad_norm": 0.28935846435973755, "learning_rate": 5.05764411027569e-06, "loss": 0.0004, "step": 8460 }, { "epoch": 0.5452729264677451, "grad_norm": 0.05144240661139117, "learning_rate": 5.056928034371645e-06, "loss": 0.0001, "step": 8461 }, { "epoch": 0.5453373719146742, "grad_norm": 0.25995588863752056, "learning_rate": 5.056211958467598e-06, "loss": 0.0014, "step": 8462 }, { "epoch": 0.5454018173616034, "grad_norm": 0.0034649733590266948, "learning_rate": 5.0554958825635524e-06, "loss": 0.0, "step": 8463 }, { "epoch": 0.5454662628085326, "grad_norm": 0.00736018183046089, "learning_rate": 5.054779806659507e-06, "loss": 0.0, "step": 8464 }, { "epoch": 0.5455307082554618, "grad_norm": 0.20396441547188474, "learning_rate": 5.054063730755461e-06, "loss": 0.0009, "step": 8465 }, { "epoch": 0.545595153702391, "grad_norm": 0.0818718599215992, "learning_rate": 5.0533476548514145e-06, "loss": 0.0001, "step": 8466 }, { "epoch": 0.54565959914932, "grad_norm": 0.571786751738957, "learning_rate": 5.052631578947369e-06, "loss": 0.0005, "step": 8467 }, { "epoch": 0.5457240445962492, "grad_norm": 0.03189593553225645, "learning_rate": 5.051915503043323e-06, "loss": 0.0, "step": 8468 }, { "epoch": 0.5457884900431784, "grad_norm": 0.2562977042803086, "learning_rate": 5.051199427139277e-06, "loss": 0.0055, "step": 8469 }, { "epoch": 0.5458529354901076, "grad_norm": 0.1811014294086341, "learning_rate": 5.050483351235232e-06, "loss": 0.0001, "step": 8470 }, { "epoch": 0.5459173809370368, "grad_norm": 0.00438817026625853, "learning_rate": 5.049767275331185e-06, "loss": 0.0, "step": 8471 }, { "epoch": 0.545981826383966, "grad_norm": 0.05164839767589543, "learning_rate": 5.049051199427139e-06, "loss": 0.0001, "step": 8472 }, { "epoch": 0.5460462718308952, "grad_norm": 0.0001833333554516947, "learning_rate": 5.048335123523094e-06, "loss": 0.0, "step": 8473 }, { "epoch": 0.5461107172778243, "grad_norm": 0.014345555426445568, "learning_rate": 5.047619047619048e-06, "loss": 0.0, "step": 8474 }, { "epoch": 0.5461751627247535, "grad_norm": 0.0011238233457995053, "learning_rate": 5.046902971715002e-06, "loss": 0.0, "step": 8475 }, { "epoch": 0.5462396081716827, "grad_norm": 1.2545085185504081, "learning_rate": 5.046186895810956e-06, "loss": 0.0033, "step": 8476 }, { "epoch": 0.5463040536186119, "grad_norm": 0.5972288261339238, "learning_rate": 5.04547081990691e-06, "loss": 0.0017, "step": 8477 }, { "epoch": 0.546368499065541, "grad_norm": 0.00844234305207491, "learning_rate": 5.044754744002864e-06, "loss": 0.0, "step": 8478 }, { "epoch": 0.5464329445124702, "grad_norm": 0.0011451975272744536, "learning_rate": 5.044038668098819e-06, "loss": 0.0, "step": 8479 }, { "epoch": 0.5464973899593993, "grad_norm": 0.05387772975290664, "learning_rate": 5.043322592194773e-06, "loss": 0.0002, "step": 8480 }, { "epoch": 0.5465618354063285, "grad_norm": 0.0005193757903388635, "learning_rate": 5.042606516290726e-06, "loss": 0.0, "step": 8481 }, { "epoch": 0.5466262808532577, "grad_norm": 5.494419434045538e-05, "learning_rate": 5.041890440386681e-06, "loss": 0.0, "step": 8482 }, { "epoch": 0.5466907263001869, "grad_norm": 0.027016486452575494, "learning_rate": 5.041174364482636e-06, "loss": 0.0, "step": 8483 }, { "epoch": 0.5467551717471161, "grad_norm": 0.0043791513808389085, "learning_rate": 5.04045828857859e-06, "loss": 0.0, "step": 8484 }, { "epoch": 0.5468196171940453, "grad_norm": 0.29107433725311255, "learning_rate": 5.0397422126745445e-06, "loss": 0.0023, "step": 8485 }, { "epoch": 0.5468840626409744, "grad_norm": 0.1543428437047449, "learning_rate": 5.039026136770499e-06, "loss": 0.001, "step": 8486 }, { "epoch": 0.5469485080879036, "grad_norm": 0.008881782261700416, "learning_rate": 5.038310060866452e-06, "loss": 0.0, "step": 8487 }, { "epoch": 0.5470129535348328, "grad_norm": 0.06710262718182129, "learning_rate": 5.0375939849624065e-06, "loss": 0.0003, "step": 8488 }, { "epoch": 0.5470773989817619, "grad_norm": 0.04351480667082749, "learning_rate": 5.036877909058361e-06, "loss": 0.0016, "step": 8489 }, { "epoch": 0.5471418444286911, "grad_norm": 0.0027709001929434106, "learning_rate": 5.036161833154315e-06, "loss": 0.0, "step": 8490 }, { "epoch": 0.5472062898756203, "grad_norm": 0.008246434111247947, "learning_rate": 5.035445757250269e-06, "loss": 0.0, "step": 8491 }, { "epoch": 0.5472707353225494, "grad_norm": 0.001855429937951199, "learning_rate": 5.034729681346223e-06, "loss": 0.0, "step": 8492 }, { "epoch": 0.5473351807694786, "grad_norm": 0.054930165588919964, "learning_rate": 5.034013605442177e-06, "loss": 0.0002, "step": 8493 }, { "epoch": 0.5473996262164078, "grad_norm": 0.005980705986750074, "learning_rate": 5.0332975295381314e-06, "loss": 0.0, "step": 8494 }, { "epoch": 0.547464071663337, "grad_norm": 0.0031414896183599274, "learning_rate": 5.032581453634086e-06, "loss": 0.0, "step": 8495 }, { "epoch": 0.5475285171102662, "grad_norm": 0.0005091821035342204, "learning_rate": 5.03186537773004e-06, "loss": 0.0, "step": 8496 }, { "epoch": 0.5475929625571954, "grad_norm": 0.0017150736231015301, "learning_rate": 5.0311493018259935e-06, "loss": 0.0, "step": 8497 }, { "epoch": 0.5476574080041245, "grad_norm": 0.19840486551000913, "learning_rate": 5.030433225921948e-06, "loss": 0.0002, "step": 8498 }, { "epoch": 0.5477218534510537, "grad_norm": 1.3321290190046273, "learning_rate": 5.029717150017902e-06, "loss": 0.0099, "step": 8499 }, { "epoch": 0.5477862988979828, "grad_norm": 0.00012402391058209097, "learning_rate": 5.029001074113856e-06, "loss": 0.0, "step": 8500 }, { "epoch": 0.547850744344912, "grad_norm": 0.1594086270292522, "learning_rate": 5.028284998209811e-06, "loss": 0.0033, "step": 8501 }, { "epoch": 0.5479151897918412, "grad_norm": 0.005608211860773984, "learning_rate": 5.027568922305764e-06, "loss": 0.0, "step": 8502 }, { "epoch": 0.5479796352387704, "grad_norm": 0.000615421787577941, "learning_rate": 5.0268528464017184e-06, "loss": 0.0, "step": 8503 }, { "epoch": 0.5480440806856995, "grad_norm": 0.002077126853143437, "learning_rate": 5.026136770497673e-06, "loss": 0.0, "step": 8504 }, { "epoch": 0.5481085261326287, "grad_norm": 0.06163211903730178, "learning_rate": 5.025420694593627e-06, "loss": 0.0036, "step": 8505 }, { "epoch": 0.5481729715795579, "grad_norm": 0.016333306029010678, "learning_rate": 5.024704618689582e-06, "loss": 0.0, "step": 8506 }, { "epoch": 0.5482374170264871, "grad_norm": 0.002153469672369918, "learning_rate": 5.0239885427855365e-06, "loss": 0.0, "step": 8507 }, { "epoch": 0.5483018624734163, "grad_norm": 0.1651638620042547, "learning_rate": 5.02327246688149e-06, "loss": 0.0011, "step": 8508 }, { "epoch": 0.5483663079203455, "grad_norm": 0.00636054384750868, "learning_rate": 5.022556390977444e-06, "loss": 0.0001, "step": 8509 }, { "epoch": 0.5484307533672746, "grad_norm": 0.0033881972217375076, "learning_rate": 5.0218403150733985e-06, "loss": 0.0, "step": 8510 }, { "epoch": 0.5484951988142037, "grad_norm": 0.0018993068083641498, "learning_rate": 5.021124239169353e-06, "loss": 0.0, "step": 8511 }, { "epoch": 0.5485596442611329, "grad_norm": 0.005748847183468727, "learning_rate": 5.020408163265307e-06, "loss": 0.0, "step": 8512 }, { "epoch": 0.5486240897080621, "grad_norm": 0.27499401671229495, "learning_rate": 5.019692087361261e-06, "loss": 0.0009, "step": 8513 }, { "epoch": 0.5486885351549913, "grad_norm": 0.017018584289137592, "learning_rate": 5.018976011457215e-06, "loss": 0.0, "step": 8514 }, { "epoch": 0.5487529806019205, "grad_norm": 0.0014632281008861163, "learning_rate": 5.018259935553169e-06, "loss": 0.0, "step": 8515 }, { "epoch": 0.5488174260488496, "grad_norm": 0.012847125648462757, "learning_rate": 5.0175438596491235e-06, "loss": 0.0, "step": 8516 }, { "epoch": 0.5488818714957788, "grad_norm": 0.007008510181476973, "learning_rate": 5.016827783745078e-06, "loss": 0.0, "step": 8517 }, { "epoch": 0.548946316942708, "grad_norm": 0.11917408488960375, "learning_rate": 5.016111707841031e-06, "loss": 0.0003, "step": 8518 }, { "epoch": 0.5490107623896372, "grad_norm": 0.007272636048923761, "learning_rate": 5.0153956319369855e-06, "loss": 0.0001, "step": 8519 }, { "epoch": 0.5490752078365664, "grad_norm": 0.004351227458944417, "learning_rate": 5.01467955603294e-06, "loss": 0.0, "step": 8520 }, { "epoch": 0.5491396532834956, "grad_norm": 0.19051117742905466, "learning_rate": 5.013963480128894e-06, "loss": 0.0034, "step": 8521 }, { "epoch": 0.5492040987304248, "grad_norm": 0.00026021408293215944, "learning_rate": 5.0132474042248484e-06, "loss": 0.0, "step": 8522 }, { "epoch": 0.5492685441773538, "grad_norm": 0.002791641585007276, "learning_rate": 5.012531328320802e-06, "loss": 0.0, "step": 8523 }, { "epoch": 0.549332989624283, "grad_norm": 0.005355082463114203, "learning_rate": 5.011815252416756e-06, "loss": 0.0, "step": 8524 }, { "epoch": 0.5493974350712122, "grad_norm": 0.005519173063785302, "learning_rate": 5.0110991765127105e-06, "loss": 0.0, "step": 8525 }, { "epoch": 0.5494618805181414, "grad_norm": 0.0020975079614027133, "learning_rate": 5.010383100608665e-06, "loss": 0.0, "step": 8526 }, { "epoch": 0.5495263259650706, "grad_norm": 0.015470825862644465, "learning_rate": 5.009667024704618e-06, "loss": 0.0001, "step": 8527 }, { "epoch": 0.5495907714119997, "grad_norm": 0.0014627428871791936, "learning_rate": 5.0089509488005725e-06, "loss": 0.0, "step": 8528 }, { "epoch": 0.5496552168589289, "grad_norm": 0.012332215954442869, "learning_rate": 5.008234872896528e-06, "loss": 0.0001, "step": 8529 }, { "epoch": 0.5497196623058581, "grad_norm": 0.000407497928952683, "learning_rate": 5.007518796992482e-06, "loss": 0.0, "step": 8530 }, { "epoch": 0.5497841077527873, "grad_norm": 0.0013019234173674507, "learning_rate": 5.006802721088436e-06, "loss": 0.0, "step": 8531 }, { "epoch": 0.5498485531997165, "grad_norm": 0.0013845580062636382, "learning_rate": 5.0060866451843906e-06, "loss": 0.0, "step": 8532 }, { "epoch": 0.5499129986466457, "grad_norm": 0.2245196370524226, "learning_rate": 5.005370569280345e-06, "loss": 0.0005, "step": 8533 }, { "epoch": 0.5499774440935747, "grad_norm": 8.993623459536706e-05, "learning_rate": 5.004654493376298e-06, "loss": 0.0, "step": 8534 }, { "epoch": 0.5500418895405039, "grad_norm": 0.3626066527828885, "learning_rate": 5.003938417472253e-06, "loss": 0.0022, "step": 8535 }, { "epoch": 0.5501063349874331, "grad_norm": 0.020626980673355562, "learning_rate": 5.003222341568207e-06, "loss": 0.0, "step": 8536 }, { "epoch": 0.5501707804343623, "grad_norm": 0.10646967652793571, "learning_rate": 5.002506265664161e-06, "loss": 0.0004, "step": 8537 }, { "epoch": 0.5502352258812915, "grad_norm": 0.09317456412157121, "learning_rate": 5.0017901897601155e-06, "loss": 0.0002, "step": 8538 }, { "epoch": 0.5502996713282207, "grad_norm": 0.036085198902033974, "learning_rate": 5.001074113856069e-06, "loss": 0.0002, "step": 8539 }, { "epoch": 0.5503641167751498, "grad_norm": 0.0037343965925228746, "learning_rate": 5.000358037952023e-06, "loss": 0.0, "step": 8540 }, { "epoch": 0.550428562222079, "grad_norm": 0.009326869675734522, "learning_rate": 4.9996419620479776e-06, "loss": 0.0, "step": 8541 }, { "epoch": 0.5504930076690082, "grad_norm": 0.021823792039814445, "learning_rate": 4.998925886143932e-06, "loss": 0.0001, "step": 8542 }, { "epoch": 0.5505574531159374, "grad_norm": 0.0001219656029408, "learning_rate": 4.998209810239885e-06, "loss": 0.0, "step": 8543 }, { "epoch": 0.5506218985628666, "grad_norm": 0.03387806160686626, "learning_rate": 4.99749373433584e-06, "loss": 0.0, "step": 8544 }, { "epoch": 0.5506863440097957, "grad_norm": 0.00012018551306236546, "learning_rate": 4.996777658431794e-06, "loss": 0.0, "step": 8545 }, { "epoch": 0.5507507894567248, "grad_norm": 0.007050738607783566, "learning_rate": 4.996061582527748e-06, "loss": 0.0, "step": 8546 }, { "epoch": 0.550815234903654, "grad_norm": 0.0021130126048716142, "learning_rate": 4.9953455066237025e-06, "loss": 0.0, "step": 8547 }, { "epoch": 0.5508796803505832, "grad_norm": 0.0006315345436932797, "learning_rate": 4.994629430719657e-06, "loss": 0.0, "step": 8548 }, { "epoch": 0.5509441257975124, "grad_norm": 0.0007102576706969377, "learning_rate": 4.993913354815611e-06, "loss": 0.0, "step": 8549 }, { "epoch": 0.5510085712444416, "grad_norm": 0.001955949521665762, "learning_rate": 4.993197278911565e-06, "loss": 0.0, "step": 8550 }, { "epoch": 0.5510730166913708, "grad_norm": 0.009190210993093906, "learning_rate": 4.992481203007519e-06, "loss": 0.0, "step": 8551 }, { "epoch": 0.5511374621383, "grad_norm": 0.401591291768431, "learning_rate": 4.991765127103473e-06, "loss": 0.0007, "step": 8552 }, { "epoch": 0.5512019075852291, "grad_norm": 0.0005988339569598564, "learning_rate": 4.9910490511994274e-06, "loss": 0.0, "step": 8553 }, { "epoch": 0.5512663530321583, "grad_norm": 0.001051486590964813, "learning_rate": 4.990332975295382e-06, "loss": 0.0, "step": 8554 }, { "epoch": 0.5513307984790875, "grad_norm": 0.006394037454805913, "learning_rate": 4.989616899391336e-06, "loss": 0.0, "step": 8555 }, { "epoch": 0.5513952439260166, "grad_norm": 5.165007726001237e-05, "learning_rate": 4.9889008234872895e-06, "loss": 0.0, "step": 8556 }, { "epoch": 0.5514596893729458, "grad_norm": 0.002047369514226012, "learning_rate": 4.988184747583244e-06, "loss": 0.0, "step": 8557 }, { "epoch": 0.551524134819875, "grad_norm": 0.0012906349742674791, "learning_rate": 4.987468671679198e-06, "loss": 0.0, "step": 8558 }, { "epoch": 0.5515885802668041, "grad_norm": 0.0029618502007701924, "learning_rate": 4.986752595775152e-06, "loss": 0.0, "step": 8559 }, { "epoch": 0.5516530257137333, "grad_norm": 0.01238850996303165, "learning_rate": 4.986036519871107e-06, "loss": 0.0, "step": 8560 }, { "epoch": 0.5517174711606625, "grad_norm": 0.0009714262530897783, "learning_rate": 4.985320443967061e-06, "loss": 0.0, "step": 8561 }, { "epoch": 0.5517819166075917, "grad_norm": 0.0010781651410204844, "learning_rate": 4.984604368063015e-06, "loss": 0.0, "step": 8562 }, { "epoch": 0.5518463620545209, "grad_norm": 0.15714035869658133, "learning_rate": 4.98388829215897e-06, "loss": 0.0004, "step": 8563 }, { "epoch": 0.55191080750145, "grad_norm": 0.0034370084976843578, "learning_rate": 4.983172216254923e-06, "loss": 0.0, "step": 8564 }, { "epoch": 0.5519752529483792, "grad_norm": 0.0002537463466807135, "learning_rate": 4.982456140350877e-06, "loss": 0.0, "step": 8565 }, { "epoch": 0.5520396983953084, "grad_norm": 0.0035772505393363426, "learning_rate": 4.981740064446832e-06, "loss": 0.0, "step": 8566 }, { "epoch": 0.5521041438422375, "grad_norm": 0.0007178845574760114, "learning_rate": 4.981023988542786e-06, "loss": 0.0, "step": 8567 }, { "epoch": 0.5521685892891667, "grad_norm": 0.005066510748939021, "learning_rate": 4.98030791263874e-06, "loss": 0.0, "step": 8568 }, { "epoch": 0.5522330347360959, "grad_norm": 0.0005125547267131619, "learning_rate": 4.979591836734694e-06, "loss": 0.0, "step": 8569 }, { "epoch": 0.552297480183025, "grad_norm": 0.003153537373562369, "learning_rate": 4.978875760830649e-06, "loss": 0.0, "step": 8570 }, { "epoch": 0.5523619256299542, "grad_norm": 0.006392792439787252, "learning_rate": 4.978159684926603e-06, "loss": 0.0, "step": 8571 }, { "epoch": 0.5524263710768834, "grad_norm": 4.826716838034971, "learning_rate": 4.977443609022557e-06, "loss": 0.0123, "step": 8572 }, { "epoch": 0.5524908165238126, "grad_norm": 9.228853949010394e-05, "learning_rate": 4.976727533118511e-06, "loss": 0.0, "step": 8573 }, { "epoch": 0.5525552619707418, "grad_norm": 0.007953029032488415, "learning_rate": 4.976011457214465e-06, "loss": 0.0, "step": 8574 }, { "epoch": 0.552619707417671, "grad_norm": 0.0009689809464016709, "learning_rate": 4.9752953813104195e-06, "loss": 0.0, "step": 8575 }, { "epoch": 0.5526841528646002, "grad_norm": 0.0024152789138099672, "learning_rate": 4.974579305406374e-06, "loss": 0.0, "step": 8576 }, { "epoch": 0.5527485983115293, "grad_norm": 9.91049513405085e-05, "learning_rate": 4.973863229502327e-06, "loss": 0.0, "step": 8577 }, { "epoch": 0.5528130437584584, "grad_norm": 0.025710516224901427, "learning_rate": 4.9731471535982815e-06, "loss": 0.0001, "step": 8578 }, { "epoch": 0.5528774892053876, "grad_norm": 0.0009076876494428708, "learning_rate": 4.972431077694236e-06, "loss": 0.0, "step": 8579 }, { "epoch": 0.5529419346523168, "grad_norm": 0.3465730529980872, "learning_rate": 4.97171500179019e-06, "loss": 0.0018, "step": 8580 }, { "epoch": 0.553006380099246, "grad_norm": 0.0037399123513236405, "learning_rate": 4.9709989258861444e-06, "loss": 0.0, "step": 8581 }, { "epoch": 0.5530708255461751, "grad_norm": 0.0002229589558730772, "learning_rate": 4.970282849982099e-06, "loss": 0.0, "step": 8582 }, { "epoch": 0.5531352709931043, "grad_norm": 0.004994821684799948, "learning_rate": 4.969566774078053e-06, "loss": 0.0, "step": 8583 }, { "epoch": 0.5531997164400335, "grad_norm": 0.0001422551604455913, "learning_rate": 4.968850698174007e-06, "loss": 0.0, "step": 8584 }, { "epoch": 0.5532641618869627, "grad_norm": 3.6293142231062747, "learning_rate": 4.968134622269961e-06, "loss": 0.0243, "step": 8585 }, { "epoch": 0.5533286073338919, "grad_norm": 0.0006342448217312984, "learning_rate": 4.967418546365915e-06, "loss": 0.0, "step": 8586 }, { "epoch": 0.5533930527808211, "grad_norm": 0.011555769561746025, "learning_rate": 4.966702470461869e-06, "loss": 0.0, "step": 8587 }, { "epoch": 0.5534574982277503, "grad_norm": 0.01095507667123741, "learning_rate": 4.965986394557824e-06, "loss": 0.0001, "step": 8588 }, { "epoch": 0.5535219436746793, "grad_norm": 1.259846859798293e-05, "learning_rate": 4.965270318653778e-06, "loss": 0.0, "step": 8589 }, { "epoch": 0.5535863891216085, "grad_norm": 0.005156511472907512, "learning_rate": 4.964554242749731e-06, "loss": 0.0, "step": 8590 }, { "epoch": 0.5536508345685377, "grad_norm": 0.2383249586696943, "learning_rate": 4.963838166845686e-06, "loss": 0.0009, "step": 8591 }, { "epoch": 0.5537152800154669, "grad_norm": 0.012284478435519698, "learning_rate": 4.96312209094164e-06, "loss": 0.0, "step": 8592 }, { "epoch": 0.5537797254623961, "grad_norm": 0.0055611878877656114, "learning_rate": 4.962406015037594e-06, "loss": 0.0, "step": 8593 }, { "epoch": 0.5538441709093253, "grad_norm": 0.32462081338167953, "learning_rate": 4.961689939133549e-06, "loss": 0.0023, "step": 8594 }, { "epoch": 0.5539086163562544, "grad_norm": 0.0036361403139943778, "learning_rate": 4.960973863229503e-06, "loss": 0.0, "step": 8595 }, { "epoch": 0.5539730618031836, "grad_norm": 0.01589423451383375, "learning_rate": 4.960257787325457e-06, "loss": 0.0, "step": 8596 }, { "epoch": 0.5540375072501128, "grad_norm": 0.0011489116491658889, "learning_rate": 4.9595417114214115e-06, "loss": 0.0, "step": 8597 }, { "epoch": 0.554101952697042, "grad_norm": 0.0010378343495381565, "learning_rate": 4.958825635517365e-06, "loss": 0.0, "step": 8598 }, { "epoch": 0.5541663981439712, "grad_norm": 0.00014715845418476452, "learning_rate": 4.958109559613319e-06, "loss": 0.0, "step": 8599 }, { "epoch": 0.5542308435909004, "grad_norm": 0.0020139445294210146, "learning_rate": 4.9573934837092736e-06, "loss": 0.0, "step": 8600 }, { "epoch": 0.5542952890378294, "grad_norm": 0.0007146691062408835, "learning_rate": 4.956677407805228e-06, "loss": 0.0, "step": 8601 }, { "epoch": 0.5543597344847586, "grad_norm": 0.006505887901441304, "learning_rate": 4.955961331901182e-06, "loss": 0.0, "step": 8602 }, { "epoch": 0.5544241799316878, "grad_norm": 0.009405139009455533, "learning_rate": 4.955245255997136e-06, "loss": 0.0, "step": 8603 }, { "epoch": 0.554488625378617, "grad_norm": 0.003237377086330774, "learning_rate": 4.95452918009309e-06, "loss": 0.0, "step": 8604 }, { "epoch": 0.5545530708255462, "grad_norm": 0.0031621215893174203, "learning_rate": 4.953813104189045e-06, "loss": 0.0, "step": 8605 }, { "epoch": 0.5546175162724754, "grad_norm": 0.19976517593339282, "learning_rate": 4.9530970282849985e-06, "loss": 0.0004, "step": 8606 }, { "epoch": 0.5546819617194045, "grad_norm": 0.0014662636392100076, "learning_rate": 4.952380952380953e-06, "loss": 0.0, "step": 8607 }, { "epoch": 0.5547464071663337, "grad_norm": 0.3341552899371035, "learning_rate": 4.951664876476907e-06, "loss": 0.0034, "step": 8608 }, { "epoch": 0.5548108526132629, "grad_norm": 0.004519190199484597, "learning_rate": 4.950948800572861e-06, "loss": 0.0, "step": 8609 }, { "epoch": 0.5548752980601921, "grad_norm": 0.25093081164918085, "learning_rate": 4.950232724668816e-06, "loss": 0.0005, "step": 8610 }, { "epoch": 0.5549397435071213, "grad_norm": 0.005922211232974903, "learning_rate": 4.949516648764769e-06, "loss": 0.0, "step": 8611 }, { "epoch": 0.5550041889540503, "grad_norm": 0.0003468587612752949, "learning_rate": 4.9488005728607234e-06, "loss": 0.0, "step": 8612 }, { "epoch": 0.5550686344009795, "grad_norm": 0.14501370633433025, "learning_rate": 4.948084496956678e-06, "loss": 0.0002, "step": 8613 }, { "epoch": 0.5551330798479087, "grad_norm": 0.1744257960372159, "learning_rate": 4.947368421052632e-06, "loss": 0.0004, "step": 8614 }, { "epoch": 0.5551975252948379, "grad_norm": 0.0036943899184547248, "learning_rate": 4.946652345148586e-06, "loss": 0.0, "step": 8615 }, { "epoch": 0.5552619707417671, "grad_norm": 0.0005746585162564398, "learning_rate": 4.94593626924454e-06, "loss": 0.0, "step": 8616 }, { "epoch": 0.5553264161886963, "grad_norm": 0.0054436829334644915, "learning_rate": 4.945220193340495e-06, "loss": 0.0, "step": 8617 }, { "epoch": 0.5553908616356255, "grad_norm": 0.00019123731125635397, "learning_rate": 4.944504117436449e-06, "loss": 0.0, "step": 8618 }, { "epoch": 0.5554553070825546, "grad_norm": 0.31377869303838934, "learning_rate": 4.943788041532403e-06, "loss": 0.0007, "step": 8619 }, { "epoch": 0.5555197525294838, "grad_norm": 0.009279498678199789, "learning_rate": 4.943071965628357e-06, "loss": 0.0, "step": 8620 }, { "epoch": 0.555584197976413, "grad_norm": 0.0013721914906434604, "learning_rate": 4.942355889724311e-06, "loss": 0.0, "step": 8621 }, { "epoch": 0.5556486434233422, "grad_norm": 0.00026775923044403474, "learning_rate": 4.941639813820266e-06, "loss": 0.0, "step": 8622 }, { "epoch": 0.5557130888702713, "grad_norm": 0.22156785673329588, "learning_rate": 4.94092373791622e-06, "loss": 0.0007, "step": 8623 }, { "epoch": 0.5557775343172005, "grad_norm": 0.01439473978710645, "learning_rate": 4.940207662012173e-06, "loss": 0.0, "step": 8624 }, { "epoch": 0.5558419797641296, "grad_norm": 0.0032194399396147095, "learning_rate": 4.939491586108128e-06, "loss": 0.0, "step": 8625 }, { "epoch": 0.5559064252110588, "grad_norm": 0.00022568911306749794, "learning_rate": 4.938775510204082e-06, "loss": 0.0, "step": 8626 }, { "epoch": 0.555970870657988, "grad_norm": 0.02743558809261413, "learning_rate": 4.938059434300036e-06, "loss": 0.0001, "step": 8627 }, { "epoch": 0.5560353161049172, "grad_norm": 0.03291318148000441, "learning_rate": 4.93734335839599e-06, "loss": 0.0003, "step": 8628 }, { "epoch": 0.5560997615518464, "grad_norm": 0.26339233556561864, "learning_rate": 4.936627282491945e-06, "loss": 0.0016, "step": 8629 }, { "epoch": 0.5561642069987756, "grad_norm": 0.03465554861101963, "learning_rate": 4.935911206587899e-06, "loss": 0.0001, "step": 8630 }, { "epoch": 0.5562286524457047, "grad_norm": 0.0010887964390989104, "learning_rate": 4.9351951306838534e-06, "loss": 0.0, "step": 8631 }, { "epoch": 0.5562930978926339, "grad_norm": 0.0033196397485056733, "learning_rate": 4.934479054779807e-06, "loss": 0.0, "step": 8632 }, { "epoch": 0.5563575433395631, "grad_norm": 0.021989216789134153, "learning_rate": 4.933762978875761e-06, "loss": 0.0002, "step": 8633 }, { "epoch": 0.5564219887864922, "grad_norm": 1.53336188593831, "learning_rate": 4.9330469029717155e-06, "loss": 0.003, "step": 8634 }, { "epoch": 0.5564864342334214, "grad_norm": 0.03507442853934759, "learning_rate": 4.93233082706767e-06, "loss": 0.0001, "step": 8635 }, { "epoch": 0.5565508796803506, "grad_norm": 0.671429575159169, "learning_rate": 4.931614751163623e-06, "loss": 0.0026, "step": 8636 }, { "epoch": 0.5566153251272797, "grad_norm": 0.0023686506544780763, "learning_rate": 4.9308986752595775e-06, "loss": 0.0, "step": 8637 }, { "epoch": 0.5566797705742089, "grad_norm": 0.0010374830972492118, "learning_rate": 4.930182599355532e-06, "loss": 0.0, "step": 8638 }, { "epoch": 0.5567442160211381, "grad_norm": 0.019772198207453226, "learning_rate": 4.929466523451486e-06, "loss": 0.0002, "step": 8639 }, { "epoch": 0.5568086614680673, "grad_norm": 0.006079940266471607, "learning_rate": 4.9287504475474404e-06, "loss": 0.0, "step": 8640 }, { "epoch": 0.5568731069149965, "grad_norm": 0.022085923738554603, "learning_rate": 4.928034371643395e-06, "loss": 0.0001, "step": 8641 }, { "epoch": 0.5569375523619257, "grad_norm": 3.3427854198404874, "learning_rate": 4.927318295739349e-06, "loss": 0.0662, "step": 8642 }, { "epoch": 0.5570019978088548, "grad_norm": 0.01589109570954559, "learning_rate": 4.926602219835303e-06, "loss": 0.0, "step": 8643 }, { "epoch": 0.557066443255784, "grad_norm": 0.0008106748129718368, "learning_rate": 4.925886143931257e-06, "loss": 0.0, "step": 8644 }, { "epoch": 0.5571308887027131, "grad_norm": 0.06000058737916828, "learning_rate": 4.925170068027211e-06, "loss": 0.0038, "step": 8645 }, { "epoch": 0.5571953341496423, "grad_norm": 0.0015374185399924462, "learning_rate": 4.924453992123165e-06, "loss": 0.0, "step": 8646 }, { "epoch": 0.5572597795965715, "grad_norm": 0.001674477621343504, "learning_rate": 4.92373791621912e-06, "loss": 0.0, "step": 8647 }, { "epoch": 0.5573242250435007, "grad_norm": 0.0012146313544239115, "learning_rate": 4.923021840315074e-06, "loss": 0.0, "step": 8648 }, { "epoch": 0.5573886704904298, "grad_norm": 0.012782928730319724, "learning_rate": 4.922305764411027e-06, "loss": 0.0001, "step": 8649 }, { "epoch": 0.557453115937359, "grad_norm": 0.07260694958982807, "learning_rate": 4.921589688506982e-06, "loss": 0.001, "step": 8650 }, { "epoch": 0.5575175613842882, "grad_norm": 0.004229980046474155, "learning_rate": 4.920873612602936e-06, "loss": 0.0, "step": 8651 }, { "epoch": 0.5575820068312174, "grad_norm": 0.09238504624841795, "learning_rate": 4.92015753669889e-06, "loss": 0.0002, "step": 8652 }, { "epoch": 0.5576464522781466, "grad_norm": 0.019205153148170562, "learning_rate": 4.919441460794845e-06, "loss": 0.0001, "step": 8653 }, { "epoch": 0.5577108977250758, "grad_norm": 0.004936052066062885, "learning_rate": 4.918725384890799e-06, "loss": 0.0, "step": 8654 }, { "epoch": 0.557775343172005, "grad_norm": 0.006712840825438057, "learning_rate": 4.918009308986753e-06, "loss": 0.0, "step": 8655 }, { "epoch": 0.557839788618934, "grad_norm": 0.002762241064491735, "learning_rate": 4.9172932330827075e-06, "loss": 0.0, "step": 8656 }, { "epoch": 0.5579042340658632, "grad_norm": 0.01698880984180648, "learning_rate": 4.916577157178661e-06, "loss": 0.0001, "step": 8657 }, { "epoch": 0.5579686795127924, "grad_norm": 0.00016716416716050216, "learning_rate": 4.915861081274615e-06, "loss": 0.0, "step": 8658 }, { "epoch": 0.5580331249597216, "grad_norm": 0.07974039068577432, "learning_rate": 4.9151450053705696e-06, "loss": 0.0002, "step": 8659 }, { "epoch": 0.5580975704066508, "grad_norm": 0.013269370565432554, "learning_rate": 4.914428929466524e-06, "loss": 0.0, "step": 8660 }, { "epoch": 0.5581620158535799, "grad_norm": 0.005166149279363144, "learning_rate": 4.913712853562478e-06, "loss": 0.0, "step": 8661 }, { "epoch": 0.5582264613005091, "grad_norm": 0.003897579659758297, "learning_rate": 4.912996777658432e-06, "loss": 0.0, "step": 8662 }, { "epoch": 0.5582909067474383, "grad_norm": 0.006220268919483848, "learning_rate": 4.912280701754386e-06, "loss": 0.0, "step": 8663 }, { "epoch": 0.5583553521943675, "grad_norm": 0.005305744753739531, "learning_rate": 4.911564625850341e-06, "loss": 0.0, "step": 8664 }, { "epoch": 0.5584197976412967, "grad_norm": 0.0005758029329325327, "learning_rate": 4.9108485499462945e-06, "loss": 0.0, "step": 8665 }, { "epoch": 0.5584842430882259, "grad_norm": 0.000827279042110068, "learning_rate": 4.910132474042249e-06, "loss": 0.0, "step": 8666 }, { "epoch": 0.558548688535155, "grad_norm": 0.004102911737880749, "learning_rate": 4.909416398138203e-06, "loss": 0.0, "step": 8667 }, { "epoch": 0.5586131339820841, "grad_norm": 0.0004710363920028894, "learning_rate": 4.908700322234157e-06, "loss": 0.0, "step": 8668 }, { "epoch": 0.5586775794290133, "grad_norm": 0.00015958688110151233, "learning_rate": 4.907984246330112e-06, "loss": 0.0, "step": 8669 }, { "epoch": 0.5587420248759425, "grad_norm": 0.02376513779259615, "learning_rate": 4.907268170426065e-06, "loss": 0.0, "step": 8670 }, { "epoch": 0.5588064703228717, "grad_norm": 0.002388357422968757, "learning_rate": 4.9065520945220194e-06, "loss": 0.0, "step": 8671 }, { "epoch": 0.5588709157698009, "grad_norm": 0.0016862129693133454, "learning_rate": 4.905836018617974e-06, "loss": 0.0, "step": 8672 }, { "epoch": 0.55893536121673, "grad_norm": 0.004936221946703141, "learning_rate": 4.905119942713928e-06, "loss": 0.0, "step": 8673 }, { "epoch": 0.5589998066636592, "grad_norm": 0.0015385476593219008, "learning_rate": 4.904403866809882e-06, "loss": 0.0, "step": 8674 }, { "epoch": 0.5590642521105884, "grad_norm": 0.09754154621517012, "learning_rate": 4.903687790905837e-06, "loss": 0.0013, "step": 8675 }, { "epoch": 0.5591286975575176, "grad_norm": 0.0025453102097556697, "learning_rate": 4.902971715001791e-06, "loss": 0.0, "step": 8676 }, { "epoch": 0.5591931430044468, "grad_norm": 0.000572278693487006, "learning_rate": 4.902255639097745e-06, "loss": 0.0, "step": 8677 }, { "epoch": 0.559257588451376, "grad_norm": 3.171789459428063, "learning_rate": 4.901539563193699e-06, "loss": 0.0115, "step": 8678 }, { "epoch": 0.559322033898305, "grad_norm": 0.005036475146354532, "learning_rate": 4.900823487289653e-06, "loss": 0.0, "step": 8679 }, { "epoch": 0.5593864793452342, "grad_norm": 0.039560586102517295, "learning_rate": 4.900107411385607e-06, "loss": 0.0001, "step": 8680 }, { "epoch": 0.5594509247921634, "grad_norm": 0.1343451450139708, "learning_rate": 4.899391335481562e-06, "loss": 0.0006, "step": 8681 }, { "epoch": 0.5595153702390926, "grad_norm": 0.012012192767179632, "learning_rate": 4.898675259577516e-06, "loss": 0.0001, "step": 8682 }, { "epoch": 0.5595798156860218, "grad_norm": 0.002232516437988932, "learning_rate": 4.897959183673469e-06, "loss": 0.0, "step": 8683 }, { "epoch": 0.559644261132951, "grad_norm": 0.0005160028869192366, "learning_rate": 4.897243107769424e-06, "loss": 0.0, "step": 8684 }, { "epoch": 0.5597087065798801, "grad_norm": 0.17146186593991028, "learning_rate": 4.896527031865378e-06, "loss": 0.001, "step": 8685 }, { "epoch": 0.5597731520268093, "grad_norm": 0.24822877236891366, "learning_rate": 4.895810955961332e-06, "loss": 0.0006, "step": 8686 }, { "epoch": 0.5598375974737385, "grad_norm": 0.039314318138136045, "learning_rate": 4.8950948800572865e-06, "loss": 0.0004, "step": 8687 }, { "epoch": 0.5599020429206677, "grad_norm": 0.8952721272098672, "learning_rate": 4.894378804153241e-06, "loss": 0.0025, "step": 8688 }, { "epoch": 0.5599664883675969, "grad_norm": 0.000244431402831098, "learning_rate": 4.893662728249195e-06, "loss": 0.0, "step": 8689 }, { "epoch": 0.560030933814526, "grad_norm": 0.003176688002738185, "learning_rate": 4.8929466523451494e-06, "loss": 0.0, "step": 8690 }, { "epoch": 0.5600953792614551, "grad_norm": 0.0003265274312593406, "learning_rate": 4.892230576441103e-06, "loss": 0.0, "step": 8691 }, { "epoch": 0.5601598247083843, "grad_norm": 0.0016430742264626347, "learning_rate": 4.891514500537057e-06, "loss": 0.0, "step": 8692 }, { "epoch": 0.5602242701553135, "grad_norm": 0.0009334472852909937, "learning_rate": 4.8907984246330115e-06, "loss": 0.0, "step": 8693 }, { "epoch": 0.5602887156022427, "grad_norm": 7.442905227648027e-05, "learning_rate": 4.890082348728966e-06, "loss": 0.0, "step": 8694 }, { "epoch": 0.5603531610491719, "grad_norm": 0.16791826420947753, "learning_rate": 4.88936627282492e-06, "loss": 0.0007, "step": 8695 }, { "epoch": 0.5604176064961011, "grad_norm": 0.3082393107668559, "learning_rate": 4.8886501969208735e-06, "loss": 0.0007, "step": 8696 }, { "epoch": 0.5604820519430302, "grad_norm": 0.00016087311292047117, "learning_rate": 4.887934121016828e-06, "loss": 0.0, "step": 8697 }, { "epoch": 0.5605464973899594, "grad_norm": 0.16002245291919298, "learning_rate": 4.887218045112782e-06, "loss": 0.0008, "step": 8698 }, { "epoch": 0.5606109428368886, "grad_norm": 0.004248675914345726, "learning_rate": 4.8865019692087364e-06, "loss": 0.0, "step": 8699 }, { "epoch": 0.5606753882838178, "grad_norm": 0.006593991119702283, "learning_rate": 4.885785893304691e-06, "loss": 0.0, "step": 8700 }, { "epoch": 0.5607398337307469, "grad_norm": 0.32584103920711627, "learning_rate": 4.885069817400645e-06, "loss": 0.0014, "step": 8701 }, { "epoch": 0.5608042791776761, "grad_norm": 0.0002263789195140541, "learning_rate": 4.884353741496599e-06, "loss": 0.0, "step": 8702 }, { "epoch": 0.5608687246246052, "grad_norm": 0.7640283245408404, "learning_rate": 4.883637665592554e-06, "loss": 0.0059, "step": 8703 }, { "epoch": 0.5609331700715344, "grad_norm": 7.212601342396701e-05, "learning_rate": 4.882921589688507e-06, "loss": 0.0, "step": 8704 }, { "epoch": 0.5609976155184636, "grad_norm": 0.010067607272151306, "learning_rate": 4.882205513784461e-06, "loss": 0.0, "step": 8705 }, { "epoch": 0.5610620609653928, "grad_norm": 0.010785156772366373, "learning_rate": 4.881489437880416e-06, "loss": 0.0001, "step": 8706 }, { "epoch": 0.561126506412322, "grad_norm": 0.020743954743965017, "learning_rate": 4.88077336197637e-06, "loss": 0.0001, "step": 8707 }, { "epoch": 0.5611909518592512, "grad_norm": 0.01784634922520059, "learning_rate": 4.880057286072324e-06, "loss": 0.0, "step": 8708 }, { "epoch": 0.5612553973061803, "grad_norm": 0.030889615161481417, "learning_rate": 4.879341210168278e-06, "loss": 0.0001, "step": 8709 }, { "epoch": 0.5613198427531095, "grad_norm": 0.08717037831813229, "learning_rate": 4.878625134264233e-06, "loss": 0.0001, "step": 8710 }, { "epoch": 0.5613842882000387, "grad_norm": 4.0069996870915824e-05, "learning_rate": 4.877909058360187e-06, "loss": 0.0, "step": 8711 }, { "epoch": 0.5614487336469678, "grad_norm": 0.21334305206683088, "learning_rate": 4.877192982456141e-06, "loss": 0.0004, "step": 8712 }, { "epoch": 0.561513179093897, "grad_norm": 0.0009662374374246946, "learning_rate": 4.876476906552095e-06, "loss": 0.0, "step": 8713 }, { "epoch": 0.5615776245408262, "grad_norm": 0.001994360683181997, "learning_rate": 4.875760830648049e-06, "loss": 0.0, "step": 8714 }, { "epoch": 0.5616420699877553, "grad_norm": 0.0007313893970778114, "learning_rate": 4.8750447547440035e-06, "loss": 0.0, "step": 8715 }, { "epoch": 0.5617065154346845, "grad_norm": 0.12489726270253314, "learning_rate": 4.874328678839958e-06, "loss": 0.0003, "step": 8716 }, { "epoch": 0.5617709608816137, "grad_norm": 0.023161239719397465, "learning_rate": 4.873612602935911e-06, "loss": 0.0, "step": 8717 }, { "epoch": 0.5618354063285429, "grad_norm": 0.12134647530902526, "learning_rate": 4.8728965270318656e-06, "loss": 0.0004, "step": 8718 }, { "epoch": 0.5618998517754721, "grad_norm": 0.0005077160448647168, "learning_rate": 4.87218045112782e-06, "loss": 0.0, "step": 8719 }, { "epoch": 0.5619642972224013, "grad_norm": 0.4305684855102253, "learning_rate": 4.871464375223774e-06, "loss": 0.0031, "step": 8720 }, { "epoch": 0.5620287426693304, "grad_norm": 0.03279392093936876, "learning_rate": 4.8707482993197285e-06, "loss": 0.0001, "step": 8721 }, { "epoch": 0.5620931881162596, "grad_norm": 0.012182210483527876, "learning_rate": 4.870032223415683e-06, "loss": 0.0001, "step": 8722 }, { "epoch": 0.5621576335631887, "grad_norm": 0.01315964680711356, "learning_rate": 4.869316147511637e-06, "loss": 0.0, "step": 8723 }, { "epoch": 0.5622220790101179, "grad_norm": 0.011839825426168998, "learning_rate": 4.868600071607591e-06, "loss": 0.0, "step": 8724 }, { "epoch": 0.5622865244570471, "grad_norm": 0.003285467399204521, "learning_rate": 4.867883995703545e-06, "loss": 0.0, "step": 8725 }, { "epoch": 0.5623509699039763, "grad_norm": 0.008998222780521375, "learning_rate": 4.867167919799499e-06, "loss": 0.0, "step": 8726 }, { "epoch": 0.5624154153509054, "grad_norm": 0.006015495470261791, "learning_rate": 4.866451843895453e-06, "loss": 0.0, "step": 8727 }, { "epoch": 0.5624798607978346, "grad_norm": 0.0036835391157751737, "learning_rate": 4.865735767991408e-06, "loss": 0.0, "step": 8728 }, { "epoch": 0.5625443062447638, "grad_norm": 0.000797614702290517, "learning_rate": 4.865019692087362e-06, "loss": 0.0, "step": 8729 }, { "epoch": 0.562608751691693, "grad_norm": 0.002094505358365688, "learning_rate": 4.8643036161833154e-06, "loss": 0.0, "step": 8730 }, { "epoch": 0.5626731971386222, "grad_norm": 0.0011268998344081452, "learning_rate": 4.86358754027927e-06, "loss": 0.0, "step": 8731 }, { "epoch": 0.5627376425855514, "grad_norm": 0.0005851215754860948, "learning_rate": 4.862871464375224e-06, "loss": 0.0, "step": 8732 }, { "epoch": 0.5628020880324806, "grad_norm": 0.002780093783662651, "learning_rate": 4.862155388471178e-06, "loss": 0.0, "step": 8733 }, { "epoch": 0.5628665334794096, "grad_norm": 0.0036051815660962795, "learning_rate": 4.861439312567133e-06, "loss": 0.0, "step": 8734 }, { "epoch": 0.5629309789263388, "grad_norm": 0.17071126224493394, "learning_rate": 4.860723236663087e-06, "loss": 0.0007, "step": 8735 }, { "epoch": 0.562995424373268, "grad_norm": 0.0009409020897952011, "learning_rate": 4.860007160759041e-06, "loss": 0.0, "step": 8736 }, { "epoch": 0.5630598698201972, "grad_norm": 0.017214624380895726, "learning_rate": 4.8592910848549955e-06, "loss": 0.0002, "step": 8737 }, { "epoch": 0.5631243152671264, "grad_norm": 0.0001412999669650116, "learning_rate": 4.858575008950949e-06, "loss": 0.0, "step": 8738 }, { "epoch": 0.5631887607140555, "grad_norm": 0.5667308534939416, "learning_rate": 4.857858933046903e-06, "loss": 0.0017, "step": 8739 }, { "epoch": 0.5632532061609847, "grad_norm": 0.0006867199061076417, "learning_rate": 4.857142857142858e-06, "loss": 0.0, "step": 8740 }, { "epoch": 0.5633176516079139, "grad_norm": 0.0001116138075197791, "learning_rate": 4.856426781238812e-06, "loss": 0.0, "step": 8741 }, { "epoch": 0.5633820970548431, "grad_norm": 0.23073332358003387, "learning_rate": 4.855710705334765e-06, "loss": 0.0003, "step": 8742 }, { "epoch": 0.5634465425017723, "grad_norm": 0.0007989920972693515, "learning_rate": 4.85499462943072e-06, "loss": 0.0, "step": 8743 }, { "epoch": 0.5635109879487015, "grad_norm": 0.001161135735589687, "learning_rate": 4.854278553526674e-06, "loss": 0.0, "step": 8744 }, { "epoch": 0.5635754333956307, "grad_norm": 0.019831510081458233, "learning_rate": 4.853562477622629e-06, "loss": 0.0001, "step": 8745 }, { "epoch": 0.5636398788425597, "grad_norm": 0.0007333301662223194, "learning_rate": 4.8528464017185825e-06, "loss": 0.0, "step": 8746 }, { "epoch": 0.5637043242894889, "grad_norm": 0.0004206232000712696, "learning_rate": 4.852130325814537e-06, "loss": 0.0, "step": 8747 }, { "epoch": 0.5637687697364181, "grad_norm": 0.00020189099769625215, "learning_rate": 4.851414249910491e-06, "loss": 0.0, "step": 8748 }, { "epoch": 0.5638332151833473, "grad_norm": 0.23797480623700412, "learning_rate": 4.8506981740064454e-06, "loss": 0.0018, "step": 8749 }, { "epoch": 0.5638976606302765, "grad_norm": 0.002936163898924804, "learning_rate": 4.849982098102399e-06, "loss": 0.0, "step": 8750 }, { "epoch": 0.5639621060772056, "grad_norm": 0.001416732870031649, "learning_rate": 4.849266022198353e-06, "loss": 0.0, "step": 8751 }, { "epoch": 0.5640265515241348, "grad_norm": 0.0005018175610769106, "learning_rate": 4.8485499462943075e-06, "loss": 0.0, "step": 8752 }, { "epoch": 0.564090996971064, "grad_norm": 0.20247256512586775, "learning_rate": 4.847833870390262e-06, "loss": 0.004, "step": 8753 }, { "epoch": 0.5641554424179932, "grad_norm": 0.004722470804156485, "learning_rate": 4.847117794486216e-06, "loss": 0.0001, "step": 8754 }, { "epoch": 0.5642198878649224, "grad_norm": 0.0189753211178013, "learning_rate": 4.8464017185821695e-06, "loss": 0.0, "step": 8755 }, { "epoch": 0.5642843333118516, "grad_norm": 0.1526467964911563, "learning_rate": 4.845685642678124e-06, "loss": 0.0005, "step": 8756 }, { "epoch": 0.5643487787587806, "grad_norm": 0.002193407182671908, "learning_rate": 4.844969566774079e-06, "loss": 0.0, "step": 8757 }, { "epoch": 0.5644132242057098, "grad_norm": 0.0024595912356725554, "learning_rate": 4.8442534908700324e-06, "loss": 0.0, "step": 8758 }, { "epoch": 0.564477669652639, "grad_norm": 0.16334430991807178, "learning_rate": 4.843537414965987e-06, "loss": 0.0001, "step": 8759 }, { "epoch": 0.5645421150995682, "grad_norm": 6.383292916162537e-05, "learning_rate": 4.842821339061941e-06, "loss": 0.0, "step": 8760 }, { "epoch": 0.5646065605464974, "grad_norm": 0.0006328202426825101, "learning_rate": 4.842105263157895e-06, "loss": 0.0, "step": 8761 }, { "epoch": 0.5646710059934266, "grad_norm": 0.0036656038926473366, "learning_rate": 4.84138918725385e-06, "loss": 0.0, "step": 8762 }, { "epoch": 0.5647354514403558, "grad_norm": 0.00020467357590308498, "learning_rate": 4.840673111349803e-06, "loss": 0.0, "step": 8763 }, { "epoch": 0.5647998968872849, "grad_norm": 0.00020467357590308498, "learning_rate": 4.840673111349803e-06, "loss": 0.0068, "step": 8764 }, { "epoch": 0.5648643423342141, "grad_norm": 0.3235758531595586, "learning_rate": 4.839957035445757e-06, "loss": 0.0026, "step": 8765 }, { "epoch": 0.5649287877811433, "grad_norm": 0.004006022682251447, "learning_rate": 4.839240959541712e-06, "loss": 0.0, "step": 8766 }, { "epoch": 0.5649932332280725, "grad_norm": 0.004125043265111191, "learning_rate": 4.838524883637666e-06, "loss": 0.0, "step": 8767 }, { "epoch": 0.5650576786750016, "grad_norm": 0.5681165458293576, "learning_rate": 4.83780880773362e-06, "loss": 0.003, "step": 8768 }, { "epoch": 0.5651221241219307, "grad_norm": 0.007982689949264836, "learning_rate": 4.837092731829574e-06, "loss": 0.0, "step": 8769 }, { "epoch": 0.5651865695688599, "grad_norm": 0.006878737484773677, "learning_rate": 4.836376655925529e-06, "loss": 0.0001, "step": 8770 }, { "epoch": 0.5652510150157891, "grad_norm": 0.0037331182480409924, "learning_rate": 4.835660580021483e-06, "loss": 0.0, "step": 8771 }, { "epoch": 0.5653154604627183, "grad_norm": 0.2563085617045643, "learning_rate": 4.834944504117437e-06, "loss": 0.0004, "step": 8772 }, { "epoch": 0.5653799059096475, "grad_norm": 0.009190612379956239, "learning_rate": 4.834228428213391e-06, "loss": 0.0001, "step": 8773 }, { "epoch": 0.5654443513565767, "grad_norm": 0.006375923284434245, "learning_rate": 4.833512352309345e-06, "loss": 0.0, "step": 8774 }, { "epoch": 0.5655087968035059, "grad_norm": 0.796118919865634, "learning_rate": 4.8327962764052995e-06, "loss": 0.0015, "step": 8775 }, { "epoch": 0.565573242250435, "grad_norm": 0.00342534803484227, "learning_rate": 4.832080200501254e-06, "loss": 0.0, "step": 8776 }, { "epoch": 0.5656376876973642, "grad_norm": 0.0312884269577183, "learning_rate": 4.831364124597207e-06, "loss": 0.0016, "step": 8777 }, { "epoch": 0.5657021331442934, "grad_norm": 0.051568192290613435, "learning_rate": 4.8306480486931616e-06, "loss": 0.0005, "step": 8778 }, { "epoch": 0.5657665785912225, "grad_norm": 0.0001771488648946319, "learning_rate": 4.829931972789116e-06, "loss": 0.0, "step": 8779 }, { "epoch": 0.5658310240381517, "grad_norm": 0.005487712499418206, "learning_rate": 4.82921589688507e-06, "loss": 0.0001, "step": 8780 }, { "epoch": 0.5658954694850808, "grad_norm": 0.001248246749537538, "learning_rate": 4.8284998209810245e-06, "loss": 0.0, "step": 8781 }, { "epoch": 0.56595991493201, "grad_norm": 0.009656013818239738, "learning_rate": 4.827783745076979e-06, "loss": 0.0, "step": 8782 }, { "epoch": 0.5660243603789392, "grad_norm": 0.001758210468587272, "learning_rate": 4.827067669172933e-06, "loss": 0.0, "step": 8783 }, { "epoch": 0.5660888058258684, "grad_norm": 0.3298186687581501, "learning_rate": 4.826351593268887e-06, "loss": 0.0013, "step": 8784 }, { "epoch": 0.5661532512727976, "grad_norm": 1.0338854780988707, "learning_rate": 4.825635517364841e-06, "loss": 0.0159, "step": 8785 }, { "epoch": 0.5662176967197268, "grad_norm": 0.013471690213689116, "learning_rate": 4.824919441460795e-06, "loss": 0.0, "step": 8786 }, { "epoch": 0.566282142166656, "grad_norm": 0.00011633145196054886, "learning_rate": 4.824203365556749e-06, "loss": 0.0, "step": 8787 }, { "epoch": 0.5663465876135851, "grad_norm": 0.005370732892713071, "learning_rate": 4.823487289652704e-06, "loss": 0.0, "step": 8788 }, { "epoch": 0.5664110330605143, "grad_norm": 1.9052808205184348, "learning_rate": 4.822771213748658e-06, "loss": 0.0155, "step": 8789 }, { "epoch": 0.5664754785074434, "grad_norm": 0.0016993295556823635, "learning_rate": 4.8220551378446114e-06, "loss": 0.0, "step": 8790 }, { "epoch": 0.5665399239543726, "grad_norm": 0.21898209656732134, "learning_rate": 4.821339061940566e-06, "loss": 0.0008, "step": 8791 }, { "epoch": 0.5666043694013018, "grad_norm": 0.0029171804599055747, "learning_rate": 4.82062298603652e-06, "loss": 0.0, "step": 8792 }, { "epoch": 0.566668814848231, "grad_norm": 0.0011682263683127836, "learning_rate": 4.819906910132474e-06, "loss": 0.0, "step": 8793 }, { "epoch": 0.5667332602951601, "grad_norm": 0.02120118758689934, "learning_rate": 4.819190834228429e-06, "loss": 0.0002, "step": 8794 }, { "epoch": 0.5667977057420893, "grad_norm": 0.016868041783098704, "learning_rate": 4.818474758324383e-06, "loss": 0.0001, "step": 8795 }, { "epoch": 0.5668621511890185, "grad_norm": 0.0024044416183440355, "learning_rate": 4.817758682420337e-06, "loss": 0.0, "step": 8796 }, { "epoch": 0.5669265966359477, "grad_norm": 1.069360351283807, "learning_rate": 4.8170426065162915e-06, "loss": 0.0072, "step": 8797 }, { "epoch": 0.5669910420828769, "grad_norm": 0.012245140194462465, "learning_rate": 4.816326530612245e-06, "loss": 0.0, "step": 8798 }, { "epoch": 0.5670554875298061, "grad_norm": 0.00043831334825429955, "learning_rate": 4.815610454708199e-06, "loss": 0.0, "step": 8799 }, { "epoch": 0.5671199329767352, "grad_norm": 0.0032662984748576328, "learning_rate": 4.814894378804154e-06, "loss": 0.0, "step": 8800 }, { "epoch": 0.5671843784236643, "grad_norm": 0.06767947089195707, "learning_rate": 4.814178302900108e-06, "loss": 0.0, "step": 8801 }, { "epoch": 0.5672488238705935, "grad_norm": 0.040585778805621656, "learning_rate": 4.813462226996062e-06, "loss": 0.0001, "step": 8802 }, { "epoch": 0.5673132693175227, "grad_norm": 0.0005241659708540031, "learning_rate": 4.812746151092016e-06, "loss": 0.0, "step": 8803 }, { "epoch": 0.5673777147644519, "grad_norm": 0.06185363157920903, "learning_rate": 4.81203007518797e-06, "loss": 0.0002, "step": 8804 }, { "epoch": 0.567442160211381, "grad_norm": 0.012243238039889377, "learning_rate": 4.811313999283925e-06, "loss": 0.0, "step": 8805 }, { "epoch": 0.5675066056583102, "grad_norm": 0.04625774070281256, "learning_rate": 4.8105979233798785e-06, "loss": 0.0003, "step": 8806 }, { "epoch": 0.5675710511052394, "grad_norm": 0.00486547121355555, "learning_rate": 4.809881847475833e-06, "loss": 0.0, "step": 8807 }, { "epoch": 0.5676354965521686, "grad_norm": 0.0404367972194789, "learning_rate": 4.809165771571787e-06, "loss": 0.0001, "step": 8808 }, { "epoch": 0.5676999419990978, "grad_norm": 0.21987166805627087, "learning_rate": 4.8084496956677414e-06, "loss": 0.0002, "step": 8809 }, { "epoch": 0.567764387446027, "grad_norm": 0.00850806290664799, "learning_rate": 4.807733619763696e-06, "loss": 0.0, "step": 8810 }, { "epoch": 0.5678288328929562, "grad_norm": 0.046005889230199014, "learning_rate": 4.807017543859649e-06, "loss": 0.0002, "step": 8811 }, { "epoch": 0.5678932783398852, "grad_norm": 0.33100387723458397, "learning_rate": 4.8063014679556035e-06, "loss": 0.0003, "step": 8812 }, { "epoch": 0.5679577237868144, "grad_norm": 0.11855455379698503, "learning_rate": 4.805585392051558e-06, "loss": 0.0001, "step": 8813 }, { "epoch": 0.5680221692337436, "grad_norm": 0.016615890357752884, "learning_rate": 4.804869316147512e-06, "loss": 0.0, "step": 8814 }, { "epoch": 0.5680866146806728, "grad_norm": 0.669933129930625, "learning_rate": 4.804153240243466e-06, "loss": 0.0017, "step": 8815 }, { "epoch": 0.568151060127602, "grad_norm": 0.3022767576768994, "learning_rate": 4.803437164339421e-06, "loss": 0.0009, "step": 8816 }, { "epoch": 0.5682155055745312, "grad_norm": 0.09943305098401042, "learning_rate": 4.802721088435375e-06, "loss": 0.0001, "step": 8817 }, { "epoch": 0.5682799510214603, "grad_norm": 0.0142782273969829, "learning_rate": 4.802005012531329e-06, "loss": 0.0, "step": 8818 }, { "epoch": 0.5683443964683895, "grad_norm": 0.23304219371967497, "learning_rate": 4.801288936627283e-06, "loss": 0.0008, "step": 8819 }, { "epoch": 0.5684088419153187, "grad_norm": 0.02275632003333956, "learning_rate": 4.800572860723237e-06, "loss": 0.0, "step": 8820 }, { "epoch": 0.5684732873622479, "grad_norm": 0.01564516688304527, "learning_rate": 4.799856784819191e-06, "loss": 0.0, "step": 8821 }, { "epoch": 0.5685377328091771, "grad_norm": 0.001371723968120945, "learning_rate": 4.799140708915146e-06, "loss": 0.0, "step": 8822 }, { "epoch": 0.5686021782561063, "grad_norm": 0.012505581892835328, "learning_rate": 4.7984246330111e-06, "loss": 0.0, "step": 8823 }, { "epoch": 0.5686666237030353, "grad_norm": 0.008130933934212818, "learning_rate": 4.797708557107053e-06, "loss": 0.0, "step": 8824 }, { "epoch": 0.5687310691499645, "grad_norm": 0.030915509771977535, "learning_rate": 4.796992481203008e-06, "loss": 0.0001, "step": 8825 }, { "epoch": 0.5687955145968937, "grad_norm": 0.8509190779648602, "learning_rate": 4.796276405298962e-06, "loss": 0.0059, "step": 8826 }, { "epoch": 0.5688599600438229, "grad_norm": 3.1909198740549973, "learning_rate": 4.795560329394916e-06, "loss": 0.0556, "step": 8827 }, { "epoch": 0.5689244054907521, "grad_norm": 0.23147505557006737, "learning_rate": 4.7948442534908706e-06, "loss": 0.0005, "step": 8828 }, { "epoch": 0.5689888509376813, "grad_norm": 0.027221213537766676, "learning_rate": 4.794128177586825e-06, "loss": 0.0002, "step": 8829 }, { "epoch": 0.5690532963846104, "grad_norm": 0.8506112691652553, "learning_rate": 4.793412101682779e-06, "loss": 0.003, "step": 8830 }, { "epoch": 0.5691177418315396, "grad_norm": 0.5891452973962943, "learning_rate": 4.7926960257787335e-06, "loss": 0.0032, "step": 8831 }, { "epoch": 0.5691821872784688, "grad_norm": 0.006008513606025011, "learning_rate": 4.791979949874687e-06, "loss": 0.0001, "step": 8832 }, { "epoch": 0.569246632725398, "grad_norm": 0.0008348159052347049, "learning_rate": 4.791263873970641e-06, "loss": 0.0, "step": 8833 }, { "epoch": 0.5693110781723272, "grad_norm": 0.019444771138941552, "learning_rate": 4.7905477980665955e-06, "loss": 0.0, "step": 8834 }, { "epoch": 0.5693755236192563, "grad_norm": 0.06374701691057286, "learning_rate": 4.78983172216255e-06, "loss": 0.0002, "step": 8835 }, { "epoch": 0.5694399690661854, "grad_norm": 0.004703231092509179, "learning_rate": 4.789115646258503e-06, "loss": 0.0001, "step": 8836 }, { "epoch": 0.5695044145131146, "grad_norm": 0.001044352791871368, "learning_rate": 4.7883995703544576e-06, "loss": 0.0, "step": 8837 }, { "epoch": 0.5695688599600438, "grad_norm": 0.0020537983650882334, "learning_rate": 4.787683494450412e-06, "loss": 0.0, "step": 8838 }, { "epoch": 0.569633305406973, "grad_norm": 0.00620996218740712, "learning_rate": 4.786967418546366e-06, "loss": 0.0, "step": 8839 }, { "epoch": 0.5696977508539022, "grad_norm": 0.005034392248727269, "learning_rate": 4.7862513426423205e-06, "loss": 0.0, "step": 8840 }, { "epoch": 0.5697621963008314, "grad_norm": 0.004523870500418789, "learning_rate": 4.785535266738275e-06, "loss": 0.0, "step": 8841 }, { "epoch": 0.5698266417477605, "grad_norm": 0.04456405299394487, "learning_rate": 4.784819190834229e-06, "loss": 0.0005, "step": 8842 }, { "epoch": 0.5698910871946897, "grad_norm": 5.05333046130126e-05, "learning_rate": 4.784103114930183e-06, "loss": 0.0, "step": 8843 }, { "epoch": 0.5699555326416189, "grad_norm": 0.000751981987644549, "learning_rate": 4.783387039026137e-06, "loss": 0.0, "step": 8844 }, { "epoch": 0.5700199780885481, "grad_norm": 0.00312416821242081, "learning_rate": 4.782670963122091e-06, "loss": 0.0, "step": 8845 }, { "epoch": 0.5700844235354772, "grad_norm": 0.0016447265891001916, "learning_rate": 4.781954887218045e-06, "loss": 0.0, "step": 8846 }, { "epoch": 0.5701488689824064, "grad_norm": 0.005598044842178874, "learning_rate": 4.781238811314e-06, "loss": 0.0001, "step": 8847 }, { "epoch": 0.5702133144293355, "grad_norm": 0.01762638982733986, "learning_rate": 4.780522735409954e-06, "loss": 0.0, "step": 8848 }, { "epoch": 0.5702777598762647, "grad_norm": 0.010449044572078174, "learning_rate": 4.7798066595059074e-06, "loss": 0.0, "step": 8849 }, { "epoch": 0.5703422053231939, "grad_norm": 0.0002665962713856165, "learning_rate": 4.779090583601862e-06, "loss": 0.0, "step": 8850 }, { "epoch": 0.5704066507701231, "grad_norm": 0.06352423931488883, "learning_rate": 4.778374507697817e-06, "loss": 0.0007, "step": 8851 }, { "epoch": 0.5704710962170523, "grad_norm": 0.38984084577376893, "learning_rate": 4.77765843179377e-06, "loss": 0.0017, "step": 8852 }, { "epoch": 0.5705355416639815, "grad_norm": 0.3330555543209313, "learning_rate": 4.776942355889725e-06, "loss": 0.0025, "step": 8853 }, { "epoch": 0.5705999871109106, "grad_norm": 0.0028222350733292994, "learning_rate": 4.776226279985679e-06, "loss": 0.0, "step": 8854 }, { "epoch": 0.5706644325578398, "grad_norm": 0.001275800740573039, "learning_rate": 4.775510204081633e-06, "loss": 0.0, "step": 8855 }, { "epoch": 0.570728878004769, "grad_norm": 0.0010790753775213522, "learning_rate": 4.7747941281775875e-06, "loss": 0.0, "step": 8856 }, { "epoch": 0.5707933234516981, "grad_norm": 0.04619489207028863, "learning_rate": 4.774078052273541e-06, "loss": 0.0, "step": 8857 }, { "epoch": 0.5708577688986273, "grad_norm": 0.020743763233173337, "learning_rate": 4.773361976369495e-06, "loss": 0.0001, "step": 8858 }, { "epoch": 0.5709222143455565, "grad_norm": 0.0006698125467998965, "learning_rate": 4.77264590046545e-06, "loss": 0.0, "step": 8859 }, { "epoch": 0.5709866597924856, "grad_norm": 0.0014080113945894896, "learning_rate": 4.771929824561404e-06, "loss": 0.0, "step": 8860 }, { "epoch": 0.5710511052394148, "grad_norm": 0.27785739883434873, "learning_rate": 4.771213748657358e-06, "loss": 0.0003, "step": 8861 }, { "epoch": 0.571115550686344, "grad_norm": 0.0027483904368733616, "learning_rate": 4.770497672753312e-06, "loss": 0.0, "step": 8862 }, { "epoch": 0.5711799961332732, "grad_norm": 0.5726181350659572, "learning_rate": 4.769781596849267e-06, "loss": 0.002, "step": 8863 }, { "epoch": 0.5712444415802024, "grad_norm": 0.00208850930236962, "learning_rate": 4.769065520945221e-06, "loss": 0.0, "step": 8864 }, { "epoch": 0.5713088870271316, "grad_norm": 0.0028141424681147045, "learning_rate": 4.7683494450411745e-06, "loss": 0.0, "step": 8865 }, { "epoch": 0.5713733324740607, "grad_norm": 0.05520224991306442, "learning_rate": 4.767633369137129e-06, "loss": 0.0001, "step": 8866 }, { "epoch": 0.5714377779209899, "grad_norm": 0.0017825190273745063, "learning_rate": 4.766917293233083e-06, "loss": 0.0, "step": 8867 }, { "epoch": 0.571502223367919, "grad_norm": 0.2973295170699638, "learning_rate": 4.7662012173290374e-06, "loss": 0.0032, "step": 8868 }, { "epoch": 0.5715666688148482, "grad_norm": 0.004041302913021656, "learning_rate": 4.765485141424992e-06, "loss": 0.0, "step": 8869 }, { "epoch": 0.5716311142617774, "grad_norm": 0.003423076298219085, "learning_rate": 4.764769065520945e-06, "loss": 0.0, "step": 8870 }, { "epoch": 0.5716955597087066, "grad_norm": 0.001335184445791643, "learning_rate": 4.7640529896168995e-06, "loss": 0.0, "step": 8871 }, { "epoch": 0.5717600051556357, "grad_norm": 0.006451075758302448, "learning_rate": 4.763336913712854e-06, "loss": 0.0, "step": 8872 }, { "epoch": 0.5718244506025649, "grad_norm": 0.2841419575082844, "learning_rate": 4.762620837808808e-06, "loss": 0.0005, "step": 8873 }, { "epoch": 0.5718888960494941, "grad_norm": 0.0009039202676003914, "learning_rate": 4.761904761904762e-06, "loss": 0.0, "step": 8874 }, { "epoch": 0.5719533414964233, "grad_norm": 0.0006223950228870037, "learning_rate": 4.761188686000717e-06, "loss": 0.0, "step": 8875 }, { "epoch": 0.5720177869433525, "grad_norm": 0.0020699808052104343, "learning_rate": 4.760472610096671e-06, "loss": 0.0, "step": 8876 }, { "epoch": 0.5720822323902817, "grad_norm": 0.33732607430765, "learning_rate": 4.759756534192625e-06, "loss": 0.0015, "step": 8877 }, { "epoch": 0.5721466778372108, "grad_norm": 0.005303730736312988, "learning_rate": 4.759040458288579e-06, "loss": 0.0, "step": 8878 }, { "epoch": 0.5722111232841399, "grad_norm": 0.012692532555947952, "learning_rate": 4.758324382384533e-06, "loss": 0.0001, "step": 8879 }, { "epoch": 0.5722755687310691, "grad_norm": 0.00019626527068705832, "learning_rate": 4.757608306480487e-06, "loss": 0.0, "step": 8880 }, { "epoch": 0.5723400141779983, "grad_norm": 0.002095381014958743, "learning_rate": 4.756892230576442e-06, "loss": 0.0, "step": 8881 }, { "epoch": 0.5724044596249275, "grad_norm": 0.1159781279800337, "learning_rate": 4.756176154672396e-06, "loss": 0.0001, "step": 8882 }, { "epoch": 0.5724689050718567, "grad_norm": 0.01648991749511099, "learning_rate": 4.755460078768349e-06, "loss": 0.0, "step": 8883 }, { "epoch": 0.5725333505187858, "grad_norm": 0.20105396444792106, "learning_rate": 4.754744002864304e-06, "loss": 0.0002, "step": 8884 }, { "epoch": 0.572597795965715, "grad_norm": 0.17178626370759914, "learning_rate": 4.754027926960258e-06, "loss": 0.0011, "step": 8885 }, { "epoch": 0.5726622414126442, "grad_norm": 0.010614898100775799, "learning_rate": 4.753311851056212e-06, "loss": 0.0001, "step": 8886 }, { "epoch": 0.5727266868595734, "grad_norm": 0.001217281913593414, "learning_rate": 4.7525957751521666e-06, "loss": 0.0, "step": 8887 }, { "epoch": 0.5727911323065026, "grad_norm": 0.0057027341480303745, "learning_rate": 4.751879699248121e-06, "loss": 0.0, "step": 8888 }, { "epoch": 0.5728555777534318, "grad_norm": 0.8818495232939141, "learning_rate": 4.751163623344075e-06, "loss": 0.001, "step": 8889 }, { "epoch": 0.5729200232003608, "grad_norm": 0.00742720014691036, "learning_rate": 4.7504475474400295e-06, "loss": 0.0001, "step": 8890 }, { "epoch": 0.57298446864729, "grad_norm": 0.027254013778446516, "learning_rate": 4.749731471535983e-06, "loss": 0.0, "step": 8891 }, { "epoch": 0.5730489140942192, "grad_norm": 0.03194781425699038, "learning_rate": 4.749015395631937e-06, "loss": 0.0, "step": 8892 }, { "epoch": 0.5731133595411484, "grad_norm": 0.0026582619085988384, "learning_rate": 4.7482993197278915e-06, "loss": 0.0, "step": 8893 }, { "epoch": 0.5731778049880776, "grad_norm": 0.127752705813193, "learning_rate": 4.747583243823846e-06, "loss": 0.0001, "step": 8894 }, { "epoch": 0.5732422504350068, "grad_norm": 0.002056805451891573, "learning_rate": 4.7468671679198e-06, "loss": 0.0, "step": 8895 }, { "epoch": 0.573306695881936, "grad_norm": 0.004686356561184799, "learning_rate": 4.7461510920157536e-06, "loss": 0.0, "step": 8896 }, { "epoch": 0.5733711413288651, "grad_norm": 0.0038974480489338153, "learning_rate": 4.745435016111708e-06, "loss": 0.0, "step": 8897 }, { "epoch": 0.5734355867757943, "grad_norm": 0.1602997195389966, "learning_rate": 4.744718940207663e-06, "loss": 0.0011, "step": 8898 }, { "epoch": 0.5735000322227235, "grad_norm": 0.0026400329446987615, "learning_rate": 4.7440028643036165e-06, "loss": 0.0, "step": 8899 }, { "epoch": 0.5735644776696527, "grad_norm": 0.003769746465072043, "learning_rate": 4.743286788399571e-06, "loss": 0.0, "step": 8900 }, { "epoch": 0.5736289231165819, "grad_norm": 0.010710992003722329, "learning_rate": 4.742570712495525e-06, "loss": 0.0, "step": 8901 }, { "epoch": 0.5736933685635109, "grad_norm": 0.0006582057125751989, "learning_rate": 4.741854636591479e-06, "loss": 0.0, "step": 8902 }, { "epoch": 0.5737578140104401, "grad_norm": 0.0024639063269701047, "learning_rate": 4.741138560687434e-06, "loss": 0.0, "step": 8903 }, { "epoch": 0.5738222594573693, "grad_norm": 0.8917694769321234, "learning_rate": 4.740422484783387e-06, "loss": 0.0007, "step": 8904 }, { "epoch": 0.5738867049042985, "grad_norm": 0.0032197677652261165, "learning_rate": 4.739706408879341e-06, "loss": 0.0, "step": 8905 }, { "epoch": 0.5739511503512277, "grad_norm": 0.22933696610797213, "learning_rate": 4.738990332975296e-06, "loss": 0.001, "step": 8906 }, { "epoch": 0.5740155957981569, "grad_norm": 0.06729571615328032, "learning_rate": 4.73827425707125e-06, "loss": 0.0002, "step": 8907 }, { "epoch": 0.574080041245086, "grad_norm": 0.19443921359345376, "learning_rate": 4.737558181167204e-06, "loss": 0.0006, "step": 8908 }, { "epoch": 0.5741444866920152, "grad_norm": 0.007514377513820292, "learning_rate": 4.736842105263158e-06, "loss": 0.0, "step": 8909 }, { "epoch": 0.5742089321389444, "grad_norm": 0.00025992034944802154, "learning_rate": 4.736126029359113e-06, "loss": 0.0, "step": 8910 }, { "epoch": 0.5742733775858736, "grad_norm": 0.0007463775014227858, "learning_rate": 4.735409953455067e-06, "loss": 0.0, "step": 8911 }, { "epoch": 0.5743378230328028, "grad_norm": 0.0008458128520093252, "learning_rate": 4.734693877551021e-06, "loss": 0.0, "step": 8912 }, { "epoch": 0.5744022684797319, "grad_norm": 0.02946843736749207, "learning_rate": 4.733977801646975e-06, "loss": 0.0001, "step": 8913 }, { "epoch": 0.574466713926661, "grad_norm": 0.0015844954372983143, "learning_rate": 4.733261725742929e-06, "loss": 0.0, "step": 8914 }, { "epoch": 0.5745311593735902, "grad_norm": 0.0006650693208472371, "learning_rate": 4.7325456498388835e-06, "loss": 0.0, "step": 8915 }, { "epoch": 0.5745956048205194, "grad_norm": 0.00024925103154753225, "learning_rate": 4.731829573934838e-06, "loss": 0.0, "step": 8916 }, { "epoch": 0.5746600502674486, "grad_norm": 0.0006270557073641862, "learning_rate": 4.731113498030791e-06, "loss": 0.0, "step": 8917 }, { "epoch": 0.5747244957143778, "grad_norm": 0.000898050790659976, "learning_rate": 4.730397422126746e-06, "loss": 0.0015, "step": 8918 }, { "epoch": 0.574788941161307, "grad_norm": 0.0010283628729795369, "learning_rate": 4.7296813462227e-06, "loss": 0.0, "step": 8919 }, { "epoch": 0.5748533866082361, "grad_norm": 0.014245992428902015, "learning_rate": 4.728965270318654e-06, "loss": 0.0, "step": 8920 }, { "epoch": 0.5749178320551653, "grad_norm": 0.004830281237210356, "learning_rate": 4.7282491944146085e-06, "loss": 0.0, "step": 8921 }, { "epoch": 0.5749822775020945, "grad_norm": 0.02278564499924832, "learning_rate": 4.727533118510563e-06, "loss": 0.0, "step": 8922 }, { "epoch": 0.5750467229490237, "grad_norm": 0.0002993158678736612, "learning_rate": 4.726817042606517e-06, "loss": 0.0, "step": 8923 }, { "epoch": 0.5751111683959528, "grad_norm": 0.05430118317287579, "learning_rate": 4.726100966702471e-06, "loss": 0.0001, "step": 8924 }, { "epoch": 0.575175613842882, "grad_norm": 0.0014384505586056613, "learning_rate": 4.725384890798425e-06, "loss": 0.0, "step": 8925 }, { "epoch": 0.5752400592898111, "grad_norm": 0.5587276218293904, "learning_rate": 4.724668814894379e-06, "loss": 0.0008, "step": 8926 }, { "epoch": 0.5753045047367403, "grad_norm": 0.009065701688840216, "learning_rate": 4.7239527389903334e-06, "loss": 0.0, "step": 8927 }, { "epoch": 0.5753689501836695, "grad_norm": 0.04705428814279142, "learning_rate": 4.723236663086288e-06, "loss": 0.0005, "step": 8928 }, { "epoch": 0.5754333956305987, "grad_norm": 0.009048597003513747, "learning_rate": 4.722520587182241e-06, "loss": 0.0, "step": 8929 }, { "epoch": 0.5754978410775279, "grad_norm": 0.00033590933556921425, "learning_rate": 4.7218045112781955e-06, "loss": 0.0, "step": 8930 }, { "epoch": 0.5755622865244571, "grad_norm": 0.002176598827963511, "learning_rate": 4.72108843537415e-06, "loss": 0.0, "step": 8931 }, { "epoch": 0.5756267319713863, "grad_norm": 0.00026799353923762494, "learning_rate": 4.720372359470104e-06, "loss": 0.0, "step": 8932 }, { "epoch": 0.5756911774183154, "grad_norm": 0.03386055598732095, "learning_rate": 4.719656283566058e-06, "loss": 0.0001, "step": 8933 }, { "epoch": 0.5757556228652446, "grad_norm": 0.0008778612228956532, "learning_rate": 4.718940207662013e-06, "loss": 0.0, "step": 8934 }, { "epoch": 0.5758200683121737, "grad_norm": 0.0007188580320088237, "learning_rate": 4.718224131757967e-06, "loss": 0.0, "step": 8935 }, { "epoch": 0.5758845137591029, "grad_norm": 0.022633746857609334, "learning_rate": 4.717508055853921e-06, "loss": 0.0001, "step": 8936 }, { "epoch": 0.5759489592060321, "grad_norm": 0.0006597787799743524, "learning_rate": 4.716791979949875e-06, "loss": 0.0, "step": 8937 }, { "epoch": 0.5760134046529612, "grad_norm": 0.017968162787472524, "learning_rate": 4.716075904045829e-06, "loss": 0.0002, "step": 8938 }, { "epoch": 0.5760778500998904, "grad_norm": 0.003721095668540121, "learning_rate": 4.715359828141783e-06, "loss": 0.0, "step": 8939 }, { "epoch": 0.5761422955468196, "grad_norm": 0.1334403917347702, "learning_rate": 4.714643752237738e-06, "loss": 0.0002, "step": 8940 }, { "epoch": 0.5762067409937488, "grad_norm": 0.049999945890129134, "learning_rate": 4.713927676333692e-06, "loss": 0.0005, "step": 8941 }, { "epoch": 0.576271186440678, "grad_norm": 0.04438637225912091, "learning_rate": 4.713211600429645e-06, "loss": 0.0001, "step": 8942 }, { "epoch": 0.5763356318876072, "grad_norm": 0.004939305130353833, "learning_rate": 4.7124955245256e-06, "loss": 0.0, "step": 8943 }, { "epoch": 0.5764000773345364, "grad_norm": 0.010423595187445532, "learning_rate": 4.711779448621554e-06, "loss": 0.0001, "step": 8944 }, { "epoch": 0.5764645227814655, "grad_norm": 0.30387026664487055, "learning_rate": 4.711063372717508e-06, "loss": 0.0003, "step": 8945 }, { "epoch": 0.5765289682283946, "grad_norm": 0.002739366849377702, "learning_rate": 4.7103472968134626e-06, "loss": 0.0, "step": 8946 }, { "epoch": 0.5765934136753238, "grad_norm": 0.21060028318841664, "learning_rate": 4.709631220909417e-06, "loss": 0.0042, "step": 8947 }, { "epoch": 0.576657859122253, "grad_norm": 0.0018765208304858886, "learning_rate": 4.708915145005371e-06, "loss": 0.0, "step": 8948 }, { "epoch": 0.5767223045691822, "grad_norm": 0.0007770944081688407, "learning_rate": 4.7081990691013255e-06, "loss": 0.0, "step": 8949 }, { "epoch": 0.5767867500161113, "grad_norm": 0.002056512481472225, "learning_rate": 4.707482993197279e-06, "loss": 0.0, "step": 8950 }, { "epoch": 0.5768511954630405, "grad_norm": 0.0034826728857168528, "learning_rate": 4.706766917293233e-06, "loss": 0.0, "step": 8951 }, { "epoch": 0.5769156409099697, "grad_norm": 0.0003830165904485001, "learning_rate": 4.7060508413891875e-06, "loss": 0.0, "step": 8952 }, { "epoch": 0.5769800863568989, "grad_norm": 0.043901532936228796, "learning_rate": 4.705334765485142e-06, "loss": 0.0001, "step": 8953 }, { "epoch": 0.5770445318038281, "grad_norm": 0.003480280335613063, "learning_rate": 4.704618689581096e-06, "loss": 0.0, "step": 8954 }, { "epoch": 0.5771089772507573, "grad_norm": 0.09117785697310841, "learning_rate": 4.7039026136770496e-06, "loss": 0.0001, "step": 8955 }, { "epoch": 0.5771734226976865, "grad_norm": 0.0001285398936270668, "learning_rate": 4.703186537773004e-06, "loss": 0.0, "step": 8956 }, { "epoch": 0.5772378681446155, "grad_norm": 0.0008863763737434807, "learning_rate": 4.702470461868959e-06, "loss": 0.0, "step": 8957 }, { "epoch": 0.5773023135915447, "grad_norm": 0.38130637127350175, "learning_rate": 4.7017543859649125e-06, "loss": 0.0016, "step": 8958 }, { "epoch": 0.5773667590384739, "grad_norm": 0.008510612682685685, "learning_rate": 4.701038310060867e-06, "loss": 0.0, "step": 8959 }, { "epoch": 0.5774312044854031, "grad_norm": 0.009348145843828558, "learning_rate": 4.700322234156821e-06, "loss": 0.0001, "step": 8960 }, { "epoch": 0.5774956499323323, "grad_norm": 0.006941331467167952, "learning_rate": 4.699606158252775e-06, "loss": 0.0, "step": 8961 }, { "epoch": 0.5775600953792615, "grad_norm": 0.0013218582692286602, "learning_rate": 4.69889008234873e-06, "loss": 0.0, "step": 8962 }, { "epoch": 0.5776245408261906, "grad_norm": 0.0005961525107431034, "learning_rate": 4.698174006444683e-06, "loss": 0.0, "step": 8963 }, { "epoch": 0.5776889862731198, "grad_norm": 0.5694010257321127, "learning_rate": 4.697457930540637e-06, "loss": 0.0026, "step": 8964 }, { "epoch": 0.577753431720049, "grad_norm": 0.0002869682815712005, "learning_rate": 4.696741854636592e-06, "loss": 0.0, "step": 8965 }, { "epoch": 0.5778178771669782, "grad_norm": 0.006472222447300945, "learning_rate": 4.696025778732546e-06, "loss": 0.0, "step": 8966 }, { "epoch": 0.5778823226139074, "grad_norm": 0.05890671080692376, "learning_rate": 4.6953097028285e-06, "loss": 0.0001, "step": 8967 }, { "epoch": 0.5779467680608364, "grad_norm": 0.0015918262072770278, "learning_rate": 4.694593626924455e-06, "loss": 0.0, "step": 8968 }, { "epoch": 0.5780112135077656, "grad_norm": 0.0012047950646157887, "learning_rate": 4.693877551020409e-06, "loss": 0.0, "step": 8969 }, { "epoch": 0.5780756589546948, "grad_norm": 0.0001741613348393895, "learning_rate": 4.693161475116363e-06, "loss": 0.0, "step": 8970 }, { "epoch": 0.578140104401624, "grad_norm": 9.804336294408929e-05, "learning_rate": 4.692445399212317e-06, "loss": 0.0, "step": 8971 }, { "epoch": 0.5782045498485532, "grad_norm": 0.0002919895338205372, "learning_rate": 4.691729323308271e-06, "loss": 0.0, "step": 8972 }, { "epoch": 0.5782689952954824, "grad_norm": 0.33817765277223377, "learning_rate": 4.691013247404225e-06, "loss": 0.001, "step": 8973 }, { "epoch": 0.5783334407424116, "grad_norm": 0.0005003706800080808, "learning_rate": 4.6902971715001795e-06, "loss": 0.0, "step": 8974 }, { "epoch": 0.5783978861893407, "grad_norm": 0.01697098235105734, "learning_rate": 4.689581095596134e-06, "loss": 0.0, "step": 8975 }, { "epoch": 0.5784623316362699, "grad_norm": 0.01526346802499094, "learning_rate": 4.688865019692087e-06, "loss": 0.0001, "step": 8976 }, { "epoch": 0.5785267770831991, "grad_norm": 0.00011173870781894097, "learning_rate": 4.688148943788042e-06, "loss": 0.0, "step": 8977 }, { "epoch": 0.5785912225301283, "grad_norm": 0.19004203268711622, "learning_rate": 4.687432867883996e-06, "loss": 0.0005, "step": 8978 }, { "epoch": 0.5786556679770575, "grad_norm": 6.244763941013428e-05, "learning_rate": 4.68671679197995e-06, "loss": 0.0, "step": 8979 }, { "epoch": 0.5787201134239865, "grad_norm": 9.843372332926753e-05, "learning_rate": 4.6860007160759045e-06, "loss": 0.0, "step": 8980 }, { "epoch": 0.5787845588709157, "grad_norm": 0.16569536958932962, "learning_rate": 4.685284640171859e-06, "loss": 0.0004, "step": 8981 }, { "epoch": 0.5788490043178449, "grad_norm": 0.0005437621685816029, "learning_rate": 4.684568564267813e-06, "loss": 0.0, "step": 8982 }, { "epoch": 0.5789134497647741, "grad_norm": 0.08751011709068818, "learning_rate": 4.683852488363767e-06, "loss": 0.0047, "step": 8983 }, { "epoch": 0.5789778952117033, "grad_norm": 0.0361179598361197, "learning_rate": 4.683136412459721e-06, "loss": 0.0002, "step": 8984 }, { "epoch": 0.5790423406586325, "grad_norm": 0.0009243908624880609, "learning_rate": 4.682420336555675e-06, "loss": 0.0, "step": 8985 }, { "epoch": 0.5791067861055617, "grad_norm": 0.00028813802643748844, "learning_rate": 4.6817042606516294e-06, "loss": 0.0, "step": 8986 }, { "epoch": 0.5791712315524908, "grad_norm": 0.0014871085523719119, "learning_rate": 4.680988184747584e-06, "loss": 0.0, "step": 8987 }, { "epoch": 0.57923567699942, "grad_norm": 0.0011678639568634536, "learning_rate": 4.680272108843538e-06, "loss": 0.0, "step": 8988 }, { "epoch": 0.5793001224463492, "grad_norm": 0.022646964211210575, "learning_rate": 4.6795560329394915e-06, "loss": 0.0, "step": 8989 }, { "epoch": 0.5793645678932784, "grad_norm": 6.162092306559458e-05, "learning_rate": 4.678839957035446e-06, "loss": 0.0, "step": 8990 }, { "epoch": 0.5794290133402075, "grad_norm": 0.02449790624310379, "learning_rate": 4.6781238811314e-06, "loss": 0.0, "step": 8991 }, { "epoch": 0.5794934587871367, "grad_norm": 0.12489244930395811, "learning_rate": 4.677407805227354e-06, "loss": 0.0001, "step": 8992 }, { "epoch": 0.5795579042340658, "grad_norm": 0.012767780350679178, "learning_rate": 4.676691729323309e-06, "loss": 0.0001, "step": 8993 }, { "epoch": 0.579622349680995, "grad_norm": 0.009526083138262352, "learning_rate": 4.675975653419263e-06, "loss": 0.0, "step": 8994 }, { "epoch": 0.5796867951279242, "grad_norm": 0.021985102148627596, "learning_rate": 4.675259577515217e-06, "loss": 0.0002, "step": 8995 }, { "epoch": 0.5797512405748534, "grad_norm": 0.0004470049212301294, "learning_rate": 4.6745435016111716e-06, "loss": 0.0, "step": 8996 }, { "epoch": 0.5798156860217826, "grad_norm": 0.0004562537429128238, "learning_rate": 4.673827425707125e-06, "loss": 0.0, "step": 8997 }, { "epoch": 0.5798801314687118, "grad_norm": 0.0009392800325101437, "learning_rate": 4.673111349803079e-06, "loss": 0.0, "step": 8998 }, { "epoch": 0.5799445769156409, "grad_norm": 0.008200601472724514, "learning_rate": 4.672395273899034e-06, "loss": 0.0, "step": 8999 }, { "epoch": 0.5800090223625701, "grad_norm": 0.00019648518137536374, "learning_rate": 4.671679197994988e-06, "loss": 0.0, "step": 9000 }, { "epoch": 0.5800734678094993, "grad_norm": 0.001726909860329572, "learning_rate": 4.670963122090942e-06, "loss": 0.0, "step": 9001 }, { "epoch": 0.5801379132564284, "grad_norm": 7.409479499881678e-05, "learning_rate": 4.670247046186896e-06, "loss": 0.0, "step": 9002 }, { "epoch": 0.5802023587033576, "grad_norm": 0.0003329872254956827, "learning_rate": 4.669530970282851e-06, "loss": 0.0, "step": 9003 }, { "epoch": 0.5802668041502868, "grad_norm": 0.0010982760062732727, "learning_rate": 4.668814894378805e-06, "loss": 0.0, "step": 9004 }, { "epoch": 0.5803312495972159, "grad_norm": 0.0035950464528479, "learning_rate": 4.6680988184747586e-06, "loss": 0.0, "step": 9005 }, { "epoch": 0.5803956950441451, "grad_norm": 0.021047836132908507, "learning_rate": 4.667382742570713e-06, "loss": 0.0002, "step": 9006 }, { "epoch": 0.5804601404910743, "grad_norm": 0.0011425220805333551, "learning_rate": 4.666666666666667e-06, "loss": 0.0, "step": 9007 }, { "epoch": 0.5805245859380035, "grad_norm": 0.0008882349360519453, "learning_rate": 4.6659505907626215e-06, "loss": 0.0, "step": 9008 }, { "epoch": 0.5805890313849327, "grad_norm": 0.0002802394128841394, "learning_rate": 4.665234514858576e-06, "loss": 0.0, "step": 9009 }, { "epoch": 0.5806534768318619, "grad_norm": 0.0009635131074359399, "learning_rate": 4.664518438954529e-06, "loss": 0.0, "step": 9010 }, { "epoch": 0.580717922278791, "grad_norm": 0.0009012143762942716, "learning_rate": 4.6638023630504835e-06, "loss": 0.0, "step": 9011 }, { "epoch": 0.5807823677257202, "grad_norm": 0.006541857974311703, "learning_rate": 4.663086287146438e-06, "loss": 0.0, "step": 9012 }, { "epoch": 0.5808468131726493, "grad_norm": 0.06217128394273282, "learning_rate": 4.662370211242392e-06, "loss": 0.0002, "step": 9013 }, { "epoch": 0.5809112586195785, "grad_norm": 0.01966216794474465, "learning_rate": 4.661654135338346e-06, "loss": 0.0, "step": 9014 }, { "epoch": 0.5809757040665077, "grad_norm": 0.0007292552460633633, "learning_rate": 4.660938059434301e-06, "loss": 0.0, "step": 9015 }, { "epoch": 0.5810401495134369, "grad_norm": 0.047384355713321426, "learning_rate": 4.660221983530255e-06, "loss": 0.0001, "step": 9016 }, { "epoch": 0.581104594960366, "grad_norm": 0.004729452570084791, "learning_rate": 4.659505907626209e-06, "loss": 0.0, "step": 9017 }, { "epoch": 0.5811690404072952, "grad_norm": 0.0005329612298227006, "learning_rate": 4.658789831722163e-06, "loss": 0.0, "step": 9018 }, { "epoch": 0.5812334858542244, "grad_norm": 0.09264701461117533, "learning_rate": 4.658073755818117e-06, "loss": 0.001, "step": 9019 }, { "epoch": 0.5812979313011536, "grad_norm": 0.0002257829058000882, "learning_rate": 4.657357679914071e-06, "loss": 0.0, "step": 9020 }, { "epoch": 0.5813623767480828, "grad_norm": 0.010905818191302687, "learning_rate": 4.656641604010026e-06, "loss": 0.0, "step": 9021 }, { "epoch": 0.581426822195012, "grad_norm": 0.005600216922704845, "learning_rate": 4.65592552810598e-06, "loss": 0.0, "step": 9022 }, { "epoch": 0.5814912676419411, "grad_norm": 0.031750284272755426, "learning_rate": 4.655209452201933e-06, "loss": 0.0001, "step": 9023 }, { "epoch": 0.5815557130888702, "grad_norm": 0.0077528691803133536, "learning_rate": 4.654493376297888e-06, "loss": 0.0, "step": 9024 }, { "epoch": 0.5816201585357994, "grad_norm": 0.04543225714500262, "learning_rate": 4.653777300393842e-06, "loss": 0.0001, "step": 9025 }, { "epoch": 0.5816846039827286, "grad_norm": 0.013056369682744871, "learning_rate": 4.653061224489796e-06, "loss": 0.0001, "step": 9026 }, { "epoch": 0.5817490494296578, "grad_norm": 0.17819283640927924, "learning_rate": 4.652345148585751e-06, "loss": 0.0003, "step": 9027 }, { "epoch": 0.581813494876587, "grad_norm": 0.08917160086074113, "learning_rate": 4.651629072681705e-06, "loss": 0.0003, "step": 9028 }, { "epoch": 0.5818779403235161, "grad_norm": 0.006331248638778778, "learning_rate": 4.650912996777659e-06, "loss": 0.0, "step": 9029 }, { "epoch": 0.5819423857704453, "grad_norm": 0.002775633504862486, "learning_rate": 4.6501969208736135e-06, "loss": 0.0, "step": 9030 }, { "epoch": 0.5820068312173745, "grad_norm": 6.992336507404066e-05, "learning_rate": 4.649480844969567e-06, "loss": 0.0, "step": 9031 }, { "epoch": 0.5820712766643037, "grad_norm": 0.00017943615738269465, "learning_rate": 4.648764769065521e-06, "loss": 0.0, "step": 9032 }, { "epoch": 0.5821357221112329, "grad_norm": 0.0058394424584071415, "learning_rate": 4.6480486931614755e-06, "loss": 0.0, "step": 9033 }, { "epoch": 0.5822001675581621, "grad_norm": 0.050562310960973855, "learning_rate": 4.64733261725743e-06, "loss": 0.0002, "step": 9034 }, { "epoch": 0.5822646130050911, "grad_norm": 0.8657970450295585, "learning_rate": 4.646616541353383e-06, "loss": 0.0037, "step": 9035 }, { "epoch": 0.5823290584520203, "grad_norm": 0.00211204704726517, "learning_rate": 4.645900465449338e-06, "loss": 0.0, "step": 9036 }, { "epoch": 0.5823935038989495, "grad_norm": 0.004920183719435545, "learning_rate": 4.645184389545292e-06, "loss": 0.0, "step": 9037 }, { "epoch": 0.5824579493458787, "grad_norm": 0.00663491009308444, "learning_rate": 4.644468313641247e-06, "loss": 0.0, "step": 9038 }, { "epoch": 0.5825223947928079, "grad_norm": 0.0022066924291144698, "learning_rate": 4.6437522377372005e-06, "loss": 0.0, "step": 9039 }, { "epoch": 0.5825868402397371, "grad_norm": 0.0022171824438060417, "learning_rate": 4.643036161833155e-06, "loss": 0.0, "step": 9040 }, { "epoch": 0.5826512856866662, "grad_norm": 0.00032441297467396377, "learning_rate": 4.642320085929109e-06, "loss": 0.0, "step": 9041 }, { "epoch": 0.5827157311335954, "grad_norm": 0.030769737840866715, "learning_rate": 4.641604010025063e-06, "loss": 0.0, "step": 9042 }, { "epoch": 0.5827801765805246, "grad_norm": 0.0019483906462393567, "learning_rate": 4.640887934121017e-06, "loss": 0.0, "step": 9043 }, { "epoch": 0.5828446220274538, "grad_norm": 0.00029194905052751225, "learning_rate": 4.640171858216971e-06, "loss": 0.0, "step": 9044 }, { "epoch": 0.582909067474383, "grad_norm": 0.0002021999263205449, "learning_rate": 4.6394557823129254e-06, "loss": 0.0, "step": 9045 }, { "epoch": 0.5829735129213122, "grad_norm": 0.0014373682518094646, "learning_rate": 4.63873970640888e-06, "loss": 0.0, "step": 9046 }, { "epoch": 0.5830379583682412, "grad_norm": 0.059762924731719465, "learning_rate": 4.638023630504834e-06, "loss": 0.0001, "step": 9047 }, { "epoch": 0.5831024038151704, "grad_norm": 0.4903177897093207, "learning_rate": 4.6373075546007875e-06, "loss": 0.0041, "step": 9048 }, { "epoch": 0.5831668492620996, "grad_norm": 0.0014177705494520779, "learning_rate": 4.636591478696742e-06, "loss": 0.0, "step": 9049 }, { "epoch": 0.5832312947090288, "grad_norm": 0.2566934777913586, "learning_rate": 4.635875402792697e-06, "loss": 0.0021, "step": 9050 }, { "epoch": 0.583295740155958, "grad_norm": 0.018140133354513432, "learning_rate": 4.63515932688865e-06, "loss": 0.0001, "step": 9051 }, { "epoch": 0.5833601856028872, "grad_norm": 0.00788615014582874, "learning_rate": 4.634443250984605e-06, "loss": 0.0, "step": 9052 }, { "epoch": 0.5834246310498163, "grad_norm": 0.01840864843113238, "learning_rate": 4.633727175080559e-06, "loss": 0.0001, "step": 9053 }, { "epoch": 0.5834890764967455, "grad_norm": 0.001822052435323416, "learning_rate": 4.633011099176513e-06, "loss": 0.0, "step": 9054 }, { "epoch": 0.5835535219436747, "grad_norm": 0.017397195278423098, "learning_rate": 4.6322950232724676e-06, "loss": 0.0, "step": 9055 }, { "epoch": 0.5836179673906039, "grad_norm": 0.6295143409970986, "learning_rate": 4.631578947368421e-06, "loss": 0.0036, "step": 9056 }, { "epoch": 0.5836824128375331, "grad_norm": 0.004145135076455736, "learning_rate": 4.630862871464375e-06, "loss": 0.0, "step": 9057 }, { "epoch": 0.5837468582844622, "grad_norm": 0.01922043763141991, "learning_rate": 4.63014679556033e-06, "loss": 0.0001, "step": 9058 }, { "epoch": 0.5838113037313913, "grad_norm": 0.21877145661980707, "learning_rate": 4.629430719656284e-06, "loss": 0.0003, "step": 9059 }, { "epoch": 0.5838757491783205, "grad_norm": 0.06723857036500273, "learning_rate": 4.628714643752238e-06, "loss": 0.0001, "step": 9060 }, { "epoch": 0.5839401946252497, "grad_norm": 0.037172463083185225, "learning_rate": 4.627998567848192e-06, "loss": 0.0, "step": 9061 }, { "epoch": 0.5840046400721789, "grad_norm": 0.0024169997396676244, "learning_rate": 4.627282491944147e-06, "loss": 0.0, "step": 9062 }, { "epoch": 0.5840690855191081, "grad_norm": 0.06419768070761167, "learning_rate": 4.626566416040101e-06, "loss": 0.0, "step": 9063 }, { "epoch": 0.5841335309660373, "grad_norm": 0.01399367865517338, "learning_rate": 4.6258503401360546e-06, "loss": 0.0, "step": 9064 }, { "epoch": 0.5841979764129664, "grad_norm": 0.000952353755274673, "learning_rate": 4.625134264232009e-06, "loss": 0.0, "step": 9065 }, { "epoch": 0.5842624218598956, "grad_norm": 0.0015975798325585998, "learning_rate": 4.624418188327963e-06, "loss": 0.0, "step": 9066 }, { "epoch": 0.5843268673068248, "grad_norm": 0.0026882272384257705, "learning_rate": 4.6237021124239175e-06, "loss": 0.0, "step": 9067 }, { "epoch": 0.584391312753754, "grad_norm": 0.001748996446030553, "learning_rate": 4.622986036519872e-06, "loss": 0.0, "step": 9068 }, { "epoch": 0.5844557582006831, "grad_norm": 0.18220017498674793, "learning_rate": 4.622269960615825e-06, "loss": 0.0017, "step": 9069 }, { "epoch": 0.5845202036476123, "grad_norm": 0.009680095160558684, "learning_rate": 4.6215538847117795e-06, "loss": 0.0, "step": 9070 }, { "epoch": 0.5845846490945414, "grad_norm": 1.2011762913568456, "learning_rate": 4.620837808807734e-06, "loss": 0.0104, "step": 9071 }, { "epoch": 0.5846490945414706, "grad_norm": 0.009852353560086855, "learning_rate": 4.620121732903688e-06, "loss": 0.0, "step": 9072 }, { "epoch": 0.5847135399883998, "grad_norm": 0.0034802751090491258, "learning_rate": 4.619405656999642e-06, "loss": 0.0, "step": 9073 }, { "epoch": 0.584777985435329, "grad_norm": 0.02462778481389003, "learning_rate": 4.618689581095597e-06, "loss": 0.0, "step": 9074 }, { "epoch": 0.5848424308822582, "grad_norm": 0.07292170024709481, "learning_rate": 4.617973505191551e-06, "loss": 0.0001, "step": 9075 }, { "epoch": 0.5849068763291874, "grad_norm": 0.01276836844157095, "learning_rate": 4.617257429287505e-06, "loss": 0.0001, "step": 9076 }, { "epoch": 0.5849713217761165, "grad_norm": 0.0059313510178414935, "learning_rate": 4.616541353383459e-06, "loss": 0.0, "step": 9077 }, { "epoch": 0.5850357672230457, "grad_norm": 0.0050054168634305505, "learning_rate": 4.615825277479413e-06, "loss": 0.0, "step": 9078 }, { "epoch": 0.5851002126699749, "grad_norm": 0.004330369079694027, "learning_rate": 4.615109201575367e-06, "loss": 0.0, "step": 9079 }, { "epoch": 0.585164658116904, "grad_norm": 0.0014310698214295584, "learning_rate": 4.614393125671322e-06, "loss": 0.0, "step": 9080 }, { "epoch": 0.5852291035638332, "grad_norm": 0.09305590243071263, "learning_rate": 4.613677049767276e-06, "loss": 0.0003, "step": 9081 }, { "epoch": 0.5852935490107624, "grad_norm": 0.003215462836571615, "learning_rate": 4.612960973863229e-06, "loss": 0.0, "step": 9082 }, { "epoch": 0.5853579944576915, "grad_norm": 1.087399822312692, "learning_rate": 4.612244897959184e-06, "loss": 0.0105, "step": 9083 }, { "epoch": 0.5854224399046207, "grad_norm": 0.7281104794474664, "learning_rate": 4.611528822055138e-06, "loss": 0.0062, "step": 9084 }, { "epoch": 0.5854868853515499, "grad_norm": 0.00043356516939963686, "learning_rate": 4.610812746151092e-06, "loss": 0.0, "step": 9085 }, { "epoch": 0.5855513307984791, "grad_norm": 0.00033685958252393, "learning_rate": 4.610096670247047e-06, "loss": 0.0, "step": 9086 }, { "epoch": 0.5856157762454083, "grad_norm": 0.002318961319503037, "learning_rate": 4.609380594343001e-06, "loss": 0.0, "step": 9087 }, { "epoch": 0.5856802216923375, "grad_norm": 0.005592278210797931, "learning_rate": 4.608664518438955e-06, "loss": 0.0, "step": 9088 }, { "epoch": 0.5857446671392667, "grad_norm": 0.0243394892930314, "learning_rate": 4.6079484425349095e-06, "loss": 0.0, "step": 9089 }, { "epoch": 0.5858091125861958, "grad_norm": 0.11083153764968887, "learning_rate": 4.607232366630863e-06, "loss": 0.0001, "step": 9090 }, { "epoch": 0.5858735580331249, "grad_norm": 0.0007002490856994532, "learning_rate": 4.606516290726817e-06, "loss": 0.0, "step": 9091 }, { "epoch": 0.5859380034800541, "grad_norm": 0.0017745563231980528, "learning_rate": 4.6058002148227715e-06, "loss": 0.0, "step": 9092 }, { "epoch": 0.5860024489269833, "grad_norm": 0.005978146684738015, "learning_rate": 4.605084138918726e-06, "loss": 0.0, "step": 9093 }, { "epoch": 0.5860668943739125, "grad_norm": 0.05288187124635033, "learning_rate": 4.60436806301468e-06, "loss": 0.0001, "step": 9094 }, { "epoch": 0.5861313398208416, "grad_norm": 0.016551244789661204, "learning_rate": 4.603651987110634e-06, "loss": 0.0002, "step": 9095 }, { "epoch": 0.5861957852677708, "grad_norm": 0.044669416309915146, "learning_rate": 4.602935911206588e-06, "loss": 0.0001, "step": 9096 }, { "epoch": 0.5862602307147, "grad_norm": 0.2125491982896501, "learning_rate": 4.602219835302543e-06, "loss": 0.0002, "step": 9097 }, { "epoch": 0.5863246761616292, "grad_norm": 0.49376460609078615, "learning_rate": 4.6015037593984965e-06, "loss": 0.001, "step": 9098 }, { "epoch": 0.5863891216085584, "grad_norm": 0.016966827455400634, "learning_rate": 4.600787683494451e-06, "loss": 0.0, "step": 9099 }, { "epoch": 0.5864535670554876, "grad_norm": 0.02803030621426667, "learning_rate": 4.600071607590405e-06, "loss": 0.0, "step": 9100 }, { "epoch": 0.5865180125024168, "grad_norm": 0.00504899862022767, "learning_rate": 4.599355531686359e-06, "loss": 0.0, "step": 9101 }, { "epoch": 0.5865824579493458, "grad_norm": 5.1629753872988315, "learning_rate": 4.598639455782314e-06, "loss": 0.0183, "step": 9102 }, { "epoch": 0.586646903396275, "grad_norm": 0.0011573463084333388, "learning_rate": 4.597923379878267e-06, "loss": 0.0, "step": 9103 }, { "epoch": 0.5867113488432042, "grad_norm": 0.009361562436643706, "learning_rate": 4.5972073039742214e-06, "loss": 0.0, "step": 9104 }, { "epoch": 0.5867757942901334, "grad_norm": 0.027741470990613967, "learning_rate": 4.596491228070176e-06, "loss": 0.0, "step": 9105 }, { "epoch": 0.5868402397370626, "grad_norm": 0.00832458852908692, "learning_rate": 4.59577515216613e-06, "loss": 0.0, "step": 9106 }, { "epoch": 0.5869046851839917, "grad_norm": 0.001290437778751822, "learning_rate": 4.595059076262084e-06, "loss": 0.0, "step": 9107 }, { "epoch": 0.5869691306309209, "grad_norm": 0.015002341091636952, "learning_rate": 4.594343000358039e-06, "loss": 0.0, "step": 9108 }, { "epoch": 0.5870335760778501, "grad_norm": 0.004408846988619795, "learning_rate": 4.593626924453993e-06, "loss": 0.0, "step": 9109 }, { "epoch": 0.5870980215247793, "grad_norm": 0.039488889693771616, "learning_rate": 4.592910848549947e-06, "loss": 0.0, "step": 9110 }, { "epoch": 0.5871624669717085, "grad_norm": 0.026583034234040636, "learning_rate": 4.592194772645901e-06, "loss": 0.0003, "step": 9111 }, { "epoch": 0.5872269124186377, "grad_norm": 0.000849249254509577, "learning_rate": 4.591478696741855e-06, "loss": 0.0, "step": 9112 }, { "epoch": 0.5872913578655667, "grad_norm": 0.004329321968183173, "learning_rate": 4.590762620837809e-06, "loss": 0.0, "step": 9113 }, { "epoch": 0.5873558033124959, "grad_norm": 0.04470970489953232, "learning_rate": 4.5900465449337636e-06, "loss": 0.0, "step": 9114 }, { "epoch": 0.5874202487594251, "grad_norm": 0.004520625298732878, "learning_rate": 4.589330469029718e-06, "loss": 0.0, "step": 9115 }, { "epoch": 0.5874846942063543, "grad_norm": 0.00766393875770787, "learning_rate": 4.588614393125671e-06, "loss": 0.0, "step": 9116 }, { "epoch": 0.5875491396532835, "grad_norm": 0.002323407439271178, "learning_rate": 4.587898317221626e-06, "loss": 0.0, "step": 9117 }, { "epoch": 0.5876135851002127, "grad_norm": 0.06852094765522453, "learning_rate": 4.58718224131758e-06, "loss": 0.0001, "step": 9118 }, { "epoch": 0.5876780305471418, "grad_norm": 0.004350183486519918, "learning_rate": 4.586466165413534e-06, "loss": 0.0001, "step": 9119 }, { "epoch": 0.587742475994071, "grad_norm": 0.03648216598243733, "learning_rate": 4.5857500895094885e-06, "loss": 0.0001, "step": 9120 }, { "epoch": 0.5878069214410002, "grad_norm": 0.31417986446623686, "learning_rate": 4.585034013605443e-06, "loss": 0.0014, "step": 9121 }, { "epoch": 0.5878713668879294, "grad_norm": 0.0414216516748464, "learning_rate": 4.584317937701397e-06, "loss": 0.0001, "step": 9122 }, { "epoch": 0.5879358123348586, "grad_norm": 0.05218019741684195, "learning_rate": 4.583601861797351e-06, "loss": 0.0001, "step": 9123 }, { "epoch": 0.5880002577817878, "grad_norm": 0.00462391310842688, "learning_rate": 4.582885785893305e-06, "loss": 0.0, "step": 9124 }, { "epoch": 0.5880647032287168, "grad_norm": 0.020700351989345004, "learning_rate": 4.582169709989259e-06, "loss": 0.0001, "step": 9125 }, { "epoch": 0.588129148675646, "grad_norm": 0.0029324404378681776, "learning_rate": 4.5814536340852135e-06, "loss": 0.0, "step": 9126 }, { "epoch": 0.5881935941225752, "grad_norm": 0.00026724816470433214, "learning_rate": 4.580737558181168e-06, "loss": 0.0, "step": 9127 }, { "epoch": 0.5882580395695044, "grad_norm": 0.00047139101971960793, "learning_rate": 4.580021482277121e-06, "loss": 0.0, "step": 9128 }, { "epoch": 0.5883224850164336, "grad_norm": 0.001048866333167167, "learning_rate": 4.5793054063730755e-06, "loss": 0.0, "step": 9129 }, { "epoch": 0.5883869304633628, "grad_norm": 0.00039698803690931634, "learning_rate": 4.57858933046903e-06, "loss": 0.0, "step": 9130 }, { "epoch": 0.588451375910292, "grad_norm": 0.006695436789118914, "learning_rate": 4.577873254564984e-06, "loss": 0.0001, "step": 9131 }, { "epoch": 0.5885158213572211, "grad_norm": 0.0019064073027897633, "learning_rate": 4.577157178660938e-06, "loss": 0.0, "step": 9132 }, { "epoch": 0.5885802668041503, "grad_norm": 0.0002214748274238106, "learning_rate": 4.576441102756893e-06, "loss": 0.0, "step": 9133 }, { "epoch": 0.5886447122510795, "grad_norm": 0.1768014864419196, "learning_rate": 4.575725026852847e-06, "loss": 0.0032, "step": 9134 }, { "epoch": 0.5887091576980087, "grad_norm": 0.00627439198323087, "learning_rate": 4.575008950948801e-06, "loss": 0.0, "step": 9135 }, { "epoch": 0.5887736031449378, "grad_norm": 0.0019090848411585798, "learning_rate": 4.574292875044755e-06, "loss": 0.0, "step": 9136 }, { "epoch": 0.588838048591867, "grad_norm": 0.023734394041223905, "learning_rate": 4.573576799140709e-06, "loss": 0.0001, "step": 9137 }, { "epoch": 0.5889024940387961, "grad_norm": 0.0012559579821498448, "learning_rate": 4.572860723236663e-06, "loss": 0.0, "step": 9138 }, { "epoch": 0.5889669394857253, "grad_norm": 0.005693433432522691, "learning_rate": 4.572144647332618e-06, "loss": 0.0001, "step": 9139 }, { "epoch": 0.5890313849326545, "grad_norm": 0.009353012652150373, "learning_rate": 4.571428571428572e-06, "loss": 0.0, "step": 9140 }, { "epoch": 0.5890958303795837, "grad_norm": 0.029658588021311515, "learning_rate": 4.570712495524525e-06, "loss": 0.0001, "step": 9141 }, { "epoch": 0.5891602758265129, "grad_norm": 0.003302594123403248, "learning_rate": 4.56999641962048e-06, "loss": 0.0, "step": 9142 }, { "epoch": 0.589224721273442, "grad_norm": 0.001705376414646427, "learning_rate": 4.569280343716435e-06, "loss": 0.0, "step": 9143 }, { "epoch": 0.5892891667203712, "grad_norm": 0.18220126885195806, "learning_rate": 4.568564267812388e-06, "loss": 0.0004, "step": 9144 }, { "epoch": 0.5893536121673004, "grad_norm": 0.0019109073460819275, "learning_rate": 4.567848191908343e-06, "loss": 0.0, "step": 9145 }, { "epoch": 0.5894180576142296, "grad_norm": 0.005024864271140364, "learning_rate": 4.567132116004297e-06, "loss": 0.0, "step": 9146 }, { "epoch": 0.5894825030611587, "grad_norm": 0.00905942092399999, "learning_rate": 4.566416040100251e-06, "loss": 0.0, "step": 9147 }, { "epoch": 0.5895469485080879, "grad_norm": 0.005881856218996886, "learning_rate": 4.5656999641962055e-06, "loss": 0.0, "step": 9148 }, { "epoch": 0.589611393955017, "grad_norm": 0.0008304460678939578, "learning_rate": 4.564983888292159e-06, "loss": 0.0, "step": 9149 }, { "epoch": 0.5896758394019462, "grad_norm": 0.007501626420824486, "learning_rate": 4.564267812388113e-06, "loss": 0.0, "step": 9150 }, { "epoch": 0.5897402848488754, "grad_norm": 0.0012510811332456175, "learning_rate": 4.5635517364840675e-06, "loss": 0.0, "step": 9151 }, { "epoch": 0.5898047302958046, "grad_norm": 0.0746794568702863, "learning_rate": 4.562835660580022e-06, "loss": 0.001, "step": 9152 }, { "epoch": 0.5898691757427338, "grad_norm": 0.26361614627359486, "learning_rate": 4.562119584675976e-06, "loss": 0.0021, "step": 9153 }, { "epoch": 0.589933621189663, "grad_norm": 0.0002156212009346355, "learning_rate": 4.56140350877193e-06, "loss": 0.0, "step": 9154 }, { "epoch": 0.5899980666365922, "grad_norm": 0.0015590836780957843, "learning_rate": 4.560687432867885e-06, "loss": 0.0, "step": 9155 }, { "epoch": 0.5900625120835213, "grad_norm": 0.004308015541784625, "learning_rate": 4.559971356963839e-06, "loss": 0.0, "step": 9156 }, { "epoch": 0.5901269575304505, "grad_norm": 0.0717703984591805, "learning_rate": 4.5592552810597925e-06, "loss": 0.0003, "step": 9157 }, { "epoch": 0.5901914029773796, "grad_norm": 0.0016221557899336572, "learning_rate": 4.558539205155747e-06, "loss": 0.0, "step": 9158 }, { "epoch": 0.5902558484243088, "grad_norm": 0.02843239715369182, "learning_rate": 4.557823129251701e-06, "loss": 0.0001, "step": 9159 }, { "epoch": 0.590320293871238, "grad_norm": 0.08906306174586306, "learning_rate": 4.557107053347655e-06, "loss": 0.0002, "step": 9160 }, { "epoch": 0.5903847393181672, "grad_norm": 0.041065458269552234, "learning_rate": 4.55639097744361e-06, "loss": 0.0001, "step": 9161 }, { "epoch": 0.5904491847650963, "grad_norm": 0.0008033243988639701, "learning_rate": 4.555674901539563e-06, "loss": 0.0, "step": 9162 }, { "epoch": 0.5905136302120255, "grad_norm": 0.05046193202663287, "learning_rate": 4.5549588256355174e-06, "loss": 0.0, "step": 9163 }, { "epoch": 0.5905780756589547, "grad_norm": 0.00023855639736130923, "learning_rate": 4.554242749731472e-06, "loss": 0.0, "step": 9164 }, { "epoch": 0.5906425211058839, "grad_norm": 0.0006409486529008727, "learning_rate": 4.553526673827426e-06, "loss": 0.0, "step": 9165 }, { "epoch": 0.5907069665528131, "grad_norm": 0.00037044303511188413, "learning_rate": 4.55281059792338e-06, "loss": 0.0, "step": 9166 }, { "epoch": 0.5907714119997423, "grad_norm": 0.012088132812897083, "learning_rate": 4.552094522019335e-06, "loss": 0.0, "step": 9167 }, { "epoch": 0.5908358574466714, "grad_norm": 0.0016999619869035114, "learning_rate": 4.551378446115289e-06, "loss": 0.0, "step": 9168 }, { "epoch": 0.5909003028936005, "grad_norm": 0.38509600634394847, "learning_rate": 4.550662370211243e-06, "loss": 0.0014, "step": 9169 }, { "epoch": 0.5909647483405297, "grad_norm": 0.0015224179117437268, "learning_rate": 4.549946294307197e-06, "loss": 0.0, "step": 9170 }, { "epoch": 0.5910291937874589, "grad_norm": 0.0011083900503956407, "learning_rate": 4.549230218403151e-06, "loss": 0.0, "step": 9171 }, { "epoch": 0.5910936392343881, "grad_norm": 0.00030231327758239914, "learning_rate": 4.548514142499105e-06, "loss": 0.0, "step": 9172 }, { "epoch": 0.5911580846813173, "grad_norm": 0.17115920743812696, "learning_rate": 4.5477980665950596e-06, "loss": 0.0002, "step": 9173 }, { "epoch": 0.5912225301282464, "grad_norm": 0.0030166942177998085, "learning_rate": 4.547081990691014e-06, "loss": 0.0, "step": 9174 }, { "epoch": 0.5912869755751756, "grad_norm": 0.013242115384337326, "learning_rate": 4.546365914786967e-06, "loss": 0.0001, "step": 9175 }, { "epoch": 0.5913514210221048, "grad_norm": 0.23101641353282965, "learning_rate": 4.545649838882922e-06, "loss": 0.0006, "step": 9176 }, { "epoch": 0.591415866469034, "grad_norm": 0.03984360700703188, "learning_rate": 4.544933762978876e-06, "loss": 0.0016, "step": 9177 }, { "epoch": 0.5914803119159632, "grad_norm": 0.03024855675695247, "learning_rate": 4.54421768707483e-06, "loss": 0.0002, "step": 9178 }, { "epoch": 0.5915447573628924, "grad_norm": 0.04043637977027526, "learning_rate": 4.5435016111707845e-06, "loss": 0.0004, "step": 9179 }, { "epoch": 0.5916092028098214, "grad_norm": 0.10753774462695014, "learning_rate": 4.542785535266739e-06, "loss": 0.0001, "step": 9180 }, { "epoch": 0.5916736482567506, "grad_norm": 0.0025644812183412474, "learning_rate": 4.542069459362693e-06, "loss": 0.0, "step": 9181 }, { "epoch": 0.5917380937036798, "grad_norm": 0.04065201438857625, "learning_rate": 4.541353383458647e-06, "loss": 0.0001, "step": 9182 }, { "epoch": 0.591802539150609, "grad_norm": 0.3754980038895867, "learning_rate": 4.540637307554601e-06, "loss": 0.0003, "step": 9183 }, { "epoch": 0.5918669845975382, "grad_norm": 0.0013786835065639602, "learning_rate": 4.539921231650555e-06, "loss": 0.0, "step": 9184 }, { "epoch": 0.5919314300444674, "grad_norm": 0.038676694668396946, "learning_rate": 4.5392051557465095e-06, "loss": 0.0018, "step": 9185 }, { "epoch": 0.5919958754913965, "grad_norm": 0.123739661317091, "learning_rate": 4.538489079842464e-06, "loss": 0.0001, "step": 9186 }, { "epoch": 0.5920603209383257, "grad_norm": 0.002075990619147738, "learning_rate": 4.537773003938418e-06, "loss": 0.0, "step": 9187 }, { "epoch": 0.5921247663852549, "grad_norm": 0.0011510265777243546, "learning_rate": 4.5370569280343715e-06, "loss": 0.0, "step": 9188 }, { "epoch": 0.5921892118321841, "grad_norm": 0.006128596688916639, "learning_rate": 4.536340852130326e-06, "loss": 0.0, "step": 9189 }, { "epoch": 0.5922536572791133, "grad_norm": 0.3238451195135463, "learning_rate": 4.535624776226281e-06, "loss": 0.0008, "step": 9190 }, { "epoch": 0.5923181027260424, "grad_norm": 0.033411018542313785, "learning_rate": 4.534908700322234e-06, "loss": 0.0, "step": 9191 }, { "epoch": 0.5923825481729715, "grad_norm": 0.009916791814087991, "learning_rate": 4.534192624418189e-06, "loss": 0.0, "step": 9192 }, { "epoch": 0.5924469936199007, "grad_norm": 0.0006763817433519578, "learning_rate": 4.533476548514143e-06, "loss": 0.0, "step": 9193 }, { "epoch": 0.5925114390668299, "grad_norm": 0.2030172612492698, "learning_rate": 4.532760472610097e-06, "loss": 0.0042, "step": 9194 }, { "epoch": 0.5925758845137591, "grad_norm": 0.0010908219317394157, "learning_rate": 4.532044396706052e-06, "loss": 0.0, "step": 9195 }, { "epoch": 0.5926403299606883, "grad_norm": 0.0005182772170067717, "learning_rate": 4.531328320802005e-06, "loss": 0.0, "step": 9196 }, { "epoch": 0.5927047754076175, "grad_norm": 0.0014441613798425383, "learning_rate": 4.530612244897959e-06, "loss": 0.0, "step": 9197 }, { "epoch": 0.5927692208545466, "grad_norm": 0.0008751122361331298, "learning_rate": 4.529896168993914e-06, "loss": 0.0, "step": 9198 }, { "epoch": 0.5928336663014758, "grad_norm": 0.15613020834851898, "learning_rate": 4.529180093089868e-06, "loss": 0.0012, "step": 9199 }, { "epoch": 0.592898111748405, "grad_norm": 0.023299958553483276, "learning_rate": 4.528464017185822e-06, "loss": 0.0, "step": 9200 }, { "epoch": 0.5929625571953342, "grad_norm": 0.0002505282700823623, "learning_rate": 4.527747941281776e-06, "loss": 0.0, "step": 9201 }, { "epoch": 0.5930270026422634, "grad_norm": 0.06823508339202548, "learning_rate": 4.527031865377731e-06, "loss": 0.0002, "step": 9202 }, { "epoch": 0.5930914480891925, "grad_norm": 0.0023710727571781822, "learning_rate": 4.526315789473685e-06, "loss": 0.0, "step": 9203 }, { "epoch": 0.5931558935361216, "grad_norm": 0.005031136311208585, "learning_rate": 4.525599713569639e-06, "loss": 0.0, "step": 9204 }, { "epoch": 0.5932203389830508, "grad_norm": 0.00313223262834699, "learning_rate": 4.524883637665593e-06, "loss": 0.0, "step": 9205 }, { "epoch": 0.59328478442998, "grad_norm": 0.0023746098856013845, "learning_rate": 4.524167561761547e-06, "loss": 0.0, "step": 9206 }, { "epoch": 0.5933492298769092, "grad_norm": 0.00026919858289471487, "learning_rate": 4.5234514858575015e-06, "loss": 0.0, "step": 9207 }, { "epoch": 0.5934136753238384, "grad_norm": 0.004509877341564837, "learning_rate": 4.522735409953456e-06, "loss": 0.0, "step": 9208 }, { "epoch": 0.5934781207707676, "grad_norm": 0.42020257838400926, "learning_rate": 4.522019334049409e-06, "loss": 0.0015, "step": 9209 }, { "epoch": 0.5935425662176967, "grad_norm": 0.0028492342946140163, "learning_rate": 4.5213032581453635e-06, "loss": 0.0, "step": 9210 }, { "epoch": 0.5936070116646259, "grad_norm": 0.09158406548041811, "learning_rate": 4.520587182241318e-06, "loss": 0.0007, "step": 9211 }, { "epoch": 0.5936714571115551, "grad_norm": 0.005189893702205593, "learning_rate": 4.519871106337272e-06, "loss": 0.0, "step": 9212 }, { "epoch": 0.5937359025584843, "grad_norm": 0.002655863031524807, "learning_rate": 4.5191550304332264e-06, "loss": 0.0, "step": 9213 }, { "epoch": 0.5938003480054134, "grad_norm": 0.00017574247321199604, "learning_rate": 4.518438954529181e-06, "loss": 0.0, "step": 9214 }, { "epoch": 0.5938647934523426, "grad_norm": 0.004720604057724035, "learning_rate": 4.517722878625135e-06, "loss": 0.0, "step": 9215 }, { "epoch": 0.5939292388992717, "grad_norm": 0.000470269239913421, "learning_rate": 4.517006802721089e-06, "loss": 0.0, "step": 9216 }, { "epoch": 0.5939936843462009, "grad_norm": 0.004527785334700599, "learning_rate": 4.516290726817043e-06, "loss": 0.0, "step": 9217 }, { "epoch": 0.5940581297931301, "grad_norm": 0.22733169733458167, "learning_rate": 4.515574650912997e-06, "loss": 0.0008, "step": 9218 }, { "epoch": 0.5941225752400593, "grad_norm": 0.0009303410068623072, "learning_rate": 4.514858575008951e-06, "loss": 0.0, "step": 9219 }, { "epoch": 0.5941870206869885, "grad_norm": 0.006910788414918203, "learning_rate": 4.514142499104906e-06, "loss": 0.0001, "step": 9220 }, { "epoch": 0.5942514661339177, "grad_norm": 0.29944842913354613, "learning_rate": 4.513426423200859e-06, "loss": 0.0009, "step": 9221 }, { "epoch": 0.5943159115808468, "grad_norm": 0.015923637568431896, "learning_rate": 4.5127103472968134e-06, "loss": 0.0, "step": 9222 }, { "epoch": 0.594380357027776, "grad_norm": 0.1276660123801875, "learning_rate": 4.511994271392768e-06, "loss": 0.0002, "step": 9223 }, { "epoch": 0.5944448024747052, "grad_norm": 0.05243302043805129, "learning_rate": 4.511278195488722e-06, "loss": 0.0004, "step": 9224 }, { "epoch": 0.5945092479216343, "grad_norm": 0.02153507462951738, "learning_rate": 4.510562119584676e-06, "loss": 0.0001, "step": 9225 }, { "epoch": 0.5945736933685635, "grad_norm": 0.0004915653704900115, "learning_rate": 4.509846043680631e-06, "loss": 0.0, "step": 9226 }, { "epoch": 0.5946381388154927, "grad_norm": 0.00825432485504778, "learning_rate": 4.509129967776585e-06, "loss": 0.0, "step": 9227 }, { "epoch": 0.5947025842624218, "grad_norm": 0.08825993565435779, "learning_rate": 4.508413891872539e-06, "loss": 0.0006, "step": 9228 }, { "epoch": 0.594767029709351, "grad_norm": 8.340152364850253e-05, "learning_rate": 4.507697815968493e-06, "loss": 0.0, "step": 9229 }, { "epoch": 0.5948314751562802, "grad_norm": 0.013485439351184434, "learning_rate": 4.506981740064447e-06, "loss": 0.0001, "step": 9230 }, { "epoch": 0.5948959206032094, "grad_norm": 0.005780882656582949, "learning_rate": 4.506265664160401e-06, "loss": 0.0001, "step": 9231 }, { "epoch": 0.5949603660501386, "grad_norm": 0.01674859071076599, "learning_rate": 4.5055495882563556e-06, "loss": 0.0, "step": 9232 }, { "epoch": 0.5950248114970678, "grad_norm": 0.0019716342943576176, "learning_rate": 4.50483351235231e-06, "loss": 0.0, "step": 9233 }, { "epoch": 0.595089256943997, "grad_norm": 8.759305110141916e-05, "learning_rate": 4.504117436448263e-06, "loss": 0.0, "step": 9234 }, { "epoch": 0.5951537023909261, "grad_norm": 0.002496013202718065, "learning_rate": 4.503401360544218e-06, "loss": 0.0, "step": 9235 }, { "epoch": 0.5952181478378552, "grad_norm": 0.00025275087997574504, "learning_rate": 4.502685284640172e-06, "loss": 0.0, "step": 9236 }, { "epoch": 0.5952825932847844, "grad_norm": 0.0024015228303083213, "learning_rate": 4.501969208736126e-06, "loss": 0.0, "step": 9237 }, { "epoch": 0.5953470387317136, "grad_norm": 0.1258258953590908, "learning_rate": 4.5012531328320805e-06, "loss": 0.0003, "step": 9238 }, { "epoch": 0.5954114841786428, "grad_norm": 0.0032277228050439774, "learning_rate": 4.500537056928035e-06, "loss": 0.0, "step": 9239 }, { "epoch": 0.5954759296255719, "grad_norm": 0.0014017478054440399, "learning_rate": 4.499820981023989e-06, "loss": 0.0, "step": 9240 }, { "epoch": 0.5955403750725011, "grad_norm": 0.0043084187561121135, "learning_rate": 4.499104905119943e-06, "loss": 0.0, "step": 9241 }, { "epoch": 0.5956048205194303, "grad_norm": 0.014204112734319778, "learning_rate": 4.498388829215897e-06, "loss": 0.0, "step": 9242 }, { "epoch": 0.5956692659663595, "grad_norm": 0.0012018417368002514, "learning_rate": 4.497672753311851e-06, "loss": 0.0, "step": 9243 }, { "epoch": 0.5957337114132887, "grad_norm": 0.001397111472659201, "learning_rate": 4.4969566774078055e-06, "loss": 0.0, "step": 9244 }, { "epoch": 0.5957981568602179, "grad_norm": 0.004663769686834882, "learning_rate": 4.49624060150376e-06, "loss": 0.0, "step": 9245 }, { "epoch": 0.595862602307147, "grad_norm": 0.056992404426177555, "learning_rate": 4.495524525599714e-06, "loss": 0.0004, "step": 9246 }, { "epoch": 0.5959270477540761, "grad_norm": 0.002981061286054127, "learning_rate": 4.4948084496956675e-06, "loss": 0.0, "step": 9247 }, { "epoch": 0.5959914932010053, "grad_norm": 0.903715832698863, "learning_rate": 4.494092373791623e-06, "loss": 0.0048, "step": 9248 }, { "epoch": 0.5960559386479345, "grad_norm": 0.04485061684727686, "learning_rate": 4.493376297887577e-06, "loss": 0.0001, "step": 9249 }, { "epoch": 0.5961203840948637, "grad_norm": 0.014483704247325296, "learning_rate": 4.49266022198353e-06, "loss": 0.0, "step": 9250 }, { "epoch": 0.5961848295417929, "grad_norm": 0.0025339210037765942, "learning_rate": 4.491944146079485e-06, "loss": 0.0, "step": 9251 }, { "epoch": 0.596249274988722, "grad_norm": 0.02972256912942362, "learning_rate": 4.491228070175439e-06, "loss": 0.0, "step": 9252 }, { "epoch": 0.5963137204356512, "grad_norm": 0.003333447946224778, "learning_rate": 4.490511994271393e-06, "loss": 0.0, "step": 9253 }, { "epoch": 0.5963781658825804, "grad_norm": 0.002218025045587023, "learning_rate": 4.489795918367348e-06, "loss": 0.0, "step": 9254 }, { "epoch": 0.5964426113295096, "grad_norm": 0.00025620211906760514, "learning_rate": 4.489079842463301e-06, "loss": 0.0, "step": 9255 }, { "epoch": 0.5965070567764388, "grad_norm": 0.3525647500102783, "learning_rate": 4.488363766559255e-06, "loss": 0.0013, "step": 9256 }, { "epoch": 0.596571502223368, "grad_norm": 0.0009345009565512394, "learning_rate": 4.48764769065521e-06, "loss": 0.0, "step": 9257 }, { "epoch": 0.596635947670297, "grad_norm": 0.2165891494152304, "learning_rate": 4.486931614751164e-06, "loss": 0.0006, "step": 9258 }, { "epoch": 0.5967003931172262, "grad_norm": 0.05442176698580331, "learning_rate": 4.486215538847118e-06, "loss": 0.0001, "step": 9259 }, { "epoch": 0.5967648385641554, "grad_norm": 0.027872295910078077, "learning_rate": 4.4854994629430725e-06, "loss": 0.0, "step": 9260 }, { "epoch": 0.5968292840110846, "grad_norm": 0.0002799742786045959, "learning_rate": 4.484783387039027e-06, "loss": 0.0, "step": 9261 }, { "epoch": 0.5968937294580138, "grad_norm": 0.00018996156454566542, "learning_rate": 4.484067311134981e-06, "loss": 0.0, "step": 9262 }, { "epoch": 0.596958174904943, "grad_norm": 0.14084470328682747, "learning_rate": 4.483351235230935e-06, "loss": 0.0012, "step": 9263 }, { "epoch": 0.5970226203518721, "grad_norm": 0.001003408319839176, "learning_rate": 4.482635159326889e-06, "loss": 0.0, "step": 9264 }, { "epoch": 0.5970870657988013, "grad_norm": 0.006807802621411491, "learning_rate": 4.481919083422843e-06, "loss": 0.0, "step": 9265 }, { "epoch": 0.5971515112457305, "grad_norm": 0.020114278322576007, "learning_rate": 4.4812030075187975e-06, "loss": 0.0001, "step": 9266 }, { "epoch": 0.5972159566926597, "grad_norm": 0.00017411804282206113, "learning_rate": 4.480486931614752e-06, "loss": 0.0, "step": 9267 }, { "epoch": 0.5972804021395889, "grad_norm": 0.01873928089012201, "learning_rate": 4.479770855710705e-06, "loss": 0.0, "step": 9268 }, { "epoch": 0.597344847586518, "grad_norm": 0.0011347830260341888, "learning_rate": 4.4790547798066595e-06, "loss": 0.0, "step": 9269 }, { "epoch": 0.5974092930334471, "grad_norm": 0.153683499525407, "learning_rate": 4.478338703902614e-06, "loss": 0.0005, "step": 9270 }, { "epoch": 0.5974737384803763, "grad_norm": 0.005827371173133551, "learning_rate": 4.477622627998568e-06, "loss": 0.0, "step": 9271 }, { "epoch": 0.5975381839273055, "grad_norm": 0.0005971115847287093, "learning_rate": 4.4769065520945224e-06, "loss": 0.0, "step": 9272 }, { "epoch": 0.5976026293742347, "grad_norm": 0.004836487626164628, "learning_rate": 4.476190476190477e-06, "loss": 0.0, "step": 9273 }, { "epoch": 0.5976670748211639, "grad_norm": 0.0005959039040827599, "learning_rate": 4.475474400286431e-06, "loss": 0.0, "step": 9274 }, { "epoch": 0.5977315202680931, "grad_norm": 0.3020565196447107, "learning_rate": 4.474758324382385e-06, "loss": 0.0003, "step": 9275 }, { "epoch": 0.5977959657150222, "grad_norm": 0.001559705042993861, "learning_rate": 4.474042248478339e-06, "loss": 0.0, "step": 9276 }, { "epoch": 0.5978604111619514, "grad_norm": 1.8227666892697204e-05, "learning_rate": 4.473326172574293e-06, "loss": 0.0, "step": 9277 }, { "epoch": 0.5979248566088806, "grad_norm": 0.0013650415837087972, "learning_rate": 4.472610096670247e-06, "loss": 0.0, "step": 9278 }, { "epoch": 0.5979893020558098, "grad_norm": 0.16759155859822403, "learning_rate": 4.471894020766202e-06, "loss": 0.0009, "step": 9279 }, { "epoch": 0.598053747502739, "grad_norm": 0.0008573025217973782, "learning_rate": 4.471177944862156e-06, "loss": 0.0, "step": 9280 }, { "epoch": 0.5981181929496681, "grad_norm": 4.933914884938549e-05, "learning_rate": 4.4704618689581094e-06, "loss": 0.0, "step": 9281 }, { "epoch": 0.5981826383965972, "grad_norm": 0.0037086977972102943, "learning_rate": 4.469745793054064e-06, "loss": 0.0, "step": 9282 }, { "epoch": 0.5982470838435264, "grad_norm": 0.002770381211495419, "learning_rate": 4.469029717150019e-06, "loss": 0.0, "step": 9283 }, { "epoch": 0.5983115292904556, "grad_norm": 0.002774506415338846, "learning_rate": 4.468313641245972e-06, "loss": 0.0, "step": 9284 }, { "epoch": 0.5983759747373848, "grad_norm": 0.0010348561652971477, "learning_rate": 4.467597565341927e-06, "loss": 0.0, "step": 9285 }, { "epoch": 0.598440420184314, "grad_norm": 0.00225808661724457, "learning_rate": 4.466881489437881e-06, "loss": 0.0, "step": 9286 }, { "epoch": 0.5985048656312432, "grad_norm": 0.0001941368847980672, "learning_rate": 4.466165413533835e-06, "loss": 0.0, "step": 9287 }, { "epoch": 0.5985693110781724, "grad_norm": 6.751922171693977e-05, "learning_rate": 4.4654493376297895e-06, "loss": 0.0, "step": 9288 }, { "epoch": 0.5986337565251015, "grad_norm": 0.0003746486823726859, "learning_rate": 4.464733261725743e-06, "loss": 0.0, "step": 9289 }, { "epoch": 0.5986982019720307, "grad_norm": 0.0026928507882407063, "learning_rate": 4.464017185821697e-06, "loss": 0.0, "step": 9290 }, { "epoch": 0.5987626474189599, "grad_norm": 0.018445121237624786, "learning_rate": 4.4633011099176516e-06, "loss": 0.0002, "step": 9291 }, { "epoch": 0.598827092865889, "grad_norm": 0.00025725082956279275, "learning_rate": 4.462585034013606e-06, "loss": 0.0, "step": 9292 }, { "epoch": 0.5988915383128182, "grad_norm": 0.0010799365281890174, "learning_rate": 4.46186895810956e-06, "loss": 0.0, "step": 9293 }, { "epoch": 0.5989559837597473, "grad_norm": 0.007159703054942445, "learning_rate": 4.461152882205514e-06, "loss": 0.0, "step": 9294 }, { "epoch": 0.5990204292066765, "grad_norm": 1.2268386049962541e-05, "learning_rate": 4.460436806301469e-06, "loss": 0.0, "step": 9295 }, { "epoch": 0.5990848746536057, "grad_norm": 0.06045411872724877, "learning_rate": 4.459720730397423e-06, "loss": 0.0006, "step": 9296 }, { "epoch": 0.5991493201005349, "grad_norm": 0.19484259344276214, "learning_rate": 4.4590046544933765e-06, "loss": 0.0031, "step": 9297 }, { "epoch": 0.5992137655474641, "grad_norm": 0.00011573336964747802, "learning_rate": 4.458288578589331e-06, "loss": 0.0, "step": 9298 }, { "epoch": 0.5992782109943933, "grad_norm": 0.00013179948953989136, "learning_rate": 4.457572502685285e-06, "loss": 0.0, "step": 9299 }, { "epoch": 0.5993426564413225, "grad_norm": 0.08617986833949201, "learning_rate": 4.456856426781239e-06, "loss": 0.0001, "step": 9300 }, { "epoch": 0.5994071018882516, "grad_norm": 2.1917965791246905, "learning_rate": 4.456140350877194e-06, "loss": 0.0069, "step": 9301 }, { "epoch": 0.5994715473351808, "grad_norm": 0.005079315964603255, "learning_rate": 4.455424274973147e-06, "loss": 0.0, "step": 9302 }, { "epoch": 0.5995359927821099, "grad_norm": 0.0005311949623312273, "learning_rate": 4.4547081990691015e-06, "loss": 0.0, "step": 9303 }, { "epoch": 0.5996004382290391, "grad_norm": 0.00018466529970892427, "learning_rate": 4.453992123165056e-06, "loss": 0.0, "step": 9304 }, { "epoch": 0.5996648836759683, "grad_norm": 0.0024224931746407233, "learning_rate": 4.45327604726101e-06, "loss": 0.0, "step": 9305 }, { "epoch": 0.5997293291228974, "grad_norm": 9.214679526500649e-05, "learning_rate": 4.452559971356964e-06, "loss": 0.0, "step": 9306 }, { "epoch": 0.5997937745698266, "grad_norm": 0.00013752331314721667, "learning_rate": 4.451843895452919e-06, "loss": 0.0, "step": 9307 }, { "epoch": 0.5998582200167558, "grad_norm": 0.0012078464524044698, "learning_rate": 4.451127819548873e-06, "loss": 0.0, "step": 9308 }, { "epoch": 0.599922665463685, "grad_norm": 0.02017807043125452, "learning_rate": 4.450411743644827e-06, "loss": 0.0001, "step": 9309 }, { "epoch": 0.5999871109106142, "grad_norm": 0.12984587208589446, "learning_rate": 4.449695667740781e-06, "loss": 0.0004, "step": 9310 }, { "epoch": 0.6000515563575434, "grad_norm": 0.04357864342243002, "learning_rate": 4.448979591836735e-06, "loss": 0.0001, "step": 9311 }, { "epoch": 0.6001160018044726, "grad_norm": 0.009136213091560104, "learning_rate": 4.448263515932689e-06, "loss": 0.0, "step": 9312 }, { "epoch": 0.6001804472514017, "grad_norm": 0.03765791702092579, "learning_rate": 4.447547440028644e-06, "loss": 0.0, "step": 9313 }, { "epoch": 0.6002448926983308, "grad_norm": 0.13066622728000005, "learning_rate": 4.446831364124598e-06, "loss": 0.0004, "step": 9314 }, { "epoch": 0.60030933814526, "grad_norm": 0.033811319537168136, "learning_rate": 4.446115288220551e-06, "loss": 0.0001, "step": 9315 }, { "epoch": 0.6003737835921892, "grad_norm": 0.003377651363750598, "learning_rate": 4.445399212316506e-06, "loss": 0.0, "step": 9316 }, { "epoch": 0.6004382290391184, "grad_norm": 0.0012151443025219713, "learning_rate": 4.44468313641246e-06, "loss": 0.0, "step": 9317 }, { "epoch": 0.6005026744860475, "grad_norm": 0.00030076058428306606, "learning_rate": 4.443967060508414e-06, "loss": 0.0, "step": 9318 }, { "epoch": 0.6005671199329767, "grad_norm": 0.0015305195960151965, "learning_rate": 4.4432509846043685e-06, "loss": 0.0, "step": 9319 }, { "epoch": 0.6006315653799059, "grad_norm": 0.439756161792432, "learning_rate": 4.442534908700323e-06, "loss": 0.0012, "step": 9320 }, { "epoch": 0.6006960108268351, "grad_norm": 0.00840660182838595, "learning_rate": 4.441818832796277e-06, "loss": 0.0, "step": 9321 }, { "epoch": 0.6007604562737643, "grad_norm": 0.024425231047921048, "learning_rate": 4.4411027568922314e-06, "loss": 0.0002, "step": 9322 }, { "epoch": 0.6008249017206935, "grad_norm": 0.00420921162090846, "learning_rate": 4.440386680988185e-06, "loss": 0.0, "step": 9323 }, { "epoch": 0.6008893471676227, "grad_norm": 0.03988730496138384, "learning_rate": 4.439670605084139e-06, "loss": 0.0001, "step": 9324 }, { "epoch": 0.6009537926145517, "grad_norm": 0.0012243449380229714, "learning_rate": 4.4389545291800935e-06, "loss": 0.0, "step": 9325 }, { "epoch": 0.6010182380614809, "grad_norm": 0.0006668839996335907, "learning_rate": 4.438238453276048e-06, "loss": 0.0, "step": 9326 }, { "epoch": 0.6010826835084101, "grad_norm": 0.0001269798514468688, "learning_rate": 4.437522377372001e-06, "loss": 0.0, "step": 9327 }, { "epoch": 0.6011471289553393, "grad_norm": 0.006069018507372652, "learning_rate": 4.4368063014679555e-06, "loss": 0.0, "step": 9328 }, { "epoch": 0.6012115744022685, "grad_norm": 0.009952276861537372, "learning_rate": 4.43609022556391e-06, "loss": 0.0, "step": 9329 }, { "epoch": 0.6012760198491977, "grad_norm": 0.0001698301808685413, "learning_rate": 4.435374149659865e-06, "loss": 0.0, "step": 9330 }, { "epoch": 0.6013404652961268, "grad_norm": 0.0007671255886284143, "learning_rate": 4.4346580737558184e-06, "loss": 0.0, "step": 9331 }, { "epoch": 0.601404910743056, "grad_norm": 0.019085248753486946, "learning_rate": 4.433941997851773e-06, "loss": 0.0002, "step": 9332 }, { "epoch": 0.6014693561899852, "grad_norm": 0.0005585553011050799, "learning_rate": 4.433225921947727e-06, "loss": 0.0, "step": 9333 }, { "epoch": 0.6015338016369144, "grad_norm": 0.010856243724535, "learning_rate": 4.432509846043681e-06, "loss": 0.0001, "step": 9334 }, { "epoch": 0.6015982470838436, "grad_norm": 0.0002317457563166157, "learning_rate": 4.431793770139635e-06, "loss": 0.0, "step": 9335 }, { "epoch": 0.6016626925307726, "grad_norm": 0.0065507252221229315, "learning_rate": 4.431077694235589e-06, "loss": 0.0001, "step": 9336 }, { "epoch": 0.6017271379777018, "grad_norm": 0.00010838378621555254, "learning_rate": 4.430361618331543e-06, "loss": 0.0, "step": 9337 }, { "epoch": 0.601791583424631, "grad_norm": 0.0017090922054476857, "learning_rate": 4.429645542427498e-06, "loss": 0.0, "step": 9338 }, { "epoch": 0.6018560288715602, "grad_norm": 0.009171633804983492, "learning_rate": 4.428929466523452e-06, "loss": 0.0, "step": 9339 }, { "epoch": 0.6019204743184894, "grad_norm": 0.0022138650096616835, "learning_rate": 4.4282133906194054e-06, "loss": 0.0, "step": 9340 }, { "epoch": 0.6019849197654186, "grad_norm": 0.00016321892076394534, "learning_rate": 4.42749731471536e-06, "loss": 0.0, "step": 9341 }, { "epoch": 0.6020493652123478, "grad_norm": 0.7931170395375743, "learning_rate": 4.426781238811315e-06, "loss": 0.0087, "step": 9342 }, { "epoch": 0.6021138106592769, "grad_norm": 0.0027087375142905547, "learning_rate": 4.426065162907268e-06, "loss": 0.0, "step": 9343 }, { "epoch": 0.6021782561062061, "grad_norm": 0.00030620151578539744, "learning_rate": 4.425349087003223e-06, "loss": 0.0, "step": 9344 }, { "epoch": 0.6022427015531353, "grad_norm": 0.35033655461765545, "learning_rate": 4.424633011099177e-06, "loss": 0.007, "step": 9345 }, { "epoch": 0.6023071470000645, "grad_norm": 0.0008401919777756227, "learning_rate": 4.423916935195131e-06, "loss": 0.0, "step": 9346 }, { "epoch": 0.6023715924469936, "grad_norm": 0.00012728167519971065, "learning_rate": 4.4232008592910855e-06, "loss": 0.0, "step": 9347 }, { "epoch": 0.6024360378939227, "grad_norm": 0.012302640572469041, "learning_rate": 4.422484783387039e-06, "loss": 0.0, "step": 9348 }, { "epoch": 0.6025004833408519, "grad_norm": 0.08447763946476083, "learning_rate": 4.421768707482993e-06, "loss": 0.001, "step": 9349 }, { "epoch": 0.6025649287877811, "grad_norm": 0.0011682809611789449, "learning_rate": 4.4210526315789476e-06, "loss": 0.0, "step": 9350 }, { "epoch": 0.6026293742347103, "grad_norm": 0.0011277098483875475, "learning_rate": 4.420336555674902e-06, "loss": 0.0, "step": 9351 }, { "epoch": 0.6026938196816395, "grad_norm": 0.011400610017446352, "learning_rate": 4.419620479770856e-06, "loss": 0.0, "step": 9352 }, { "epoch": 0.6027582651285687, "grad_norm": 0.0541042169718465, "learning_rate": 4.41890440386681e-06, "loss": 0.0001, "step": 9353 }, { "epoch": 0.6028227105754979, "grad_norm": 0.0004733979353402136, "learning_rate": 4.418188327962765e-06, "loss": 0.0, "step": 9354 }, { "epoch": 0.602887156022427, "grad_norm": 0.00089674663328231, "learning_rate": 4.417472252058719e-06, "loss": 0.0, "step": 9355 }, { "epoch": 0.6029516014693562, "grad_norm": 0.0038075149394037884, "learning_rate": 4.4167561761546725e-06, "loss": 0.0, "step": 9356 }, { "epoch": 0.6030160469162854, "grad_norm": 0.0006955252692623161, "learning_rate": 4.416040100250627e-06, "loss": 0.0, "step": 9357 }, { "epoch": 0.6030804923632146, "grad_norm": 0.00020624013289641623, "learning_rate": 4.415324024346581e-06, "loss": 0.0, "step": 9358 }, { "epoch": 0.6031449378101437, "grad_norm": 0.048323787565792246, "learning_rate": 4.414607948442535e-06, "loss": 0.0002, "step": 9359 }, { "epoch": 0.6032093832570729, "grad_norm": 0.0012364503824618905, "learning_rate": 4.41389187253849e-06, "loss": 0.0, "step": 9360 }, { "epoch": 0.603273828704002, "grad_norm": 0.6295609472432298, "learning_rate": 4.413175796634443e-06, "loss": 0.001, "step": 9361 }, { "epoch": 0.6033382741509312, "grad_norm": 7.311658614915109e-05, "learning_rate": 4.4124597207303975e-06, "loss": 0.0, "step": 9362 }, { "epoch": 0.6034027195978604, "grad_norm": 0.00035291927788144885, "learning_rate": 4.411743644826352e-06, "loss": 0.0, "step": 9363 }, { "epoch": 0.6034671650447896, "grad_norm": 0.0010846684443867236, "learning_rate": 4.411027568922306e-06, "loss": 0.0, "step": 9364 }, { "epoch": 0.6035316104917188, "grad_norm": 0.00017466946654724934, "learning_rate": 4.41031149301826e-06, "loss": 0.0, "step": 9365 }, { "epoch": 0.603596055938648, "grad_norm": 0.001453067649005503, "learning_rate": 4.409595417114215e-06, "loss": 0.0, "step": 9366 }, { "epoch": 0.6036605013855771, "grad_norm": 0.00018182376128211283, "learning_rate": 4.408879341210169e-06, "loss": 0.0, "step": 9367 }, { "epoch": 0.6037249468325063, "grad_norm": 0.011400766377271708, "learning_rate": 4.408163265306123e-06, "loss": 0.0, "step": 9368 }, { "epoch": 0.6037893922794355, "grad_norm": 0.4758228127251253, "learning_rate": 4.407447189402077e-06, "loss": 0.0008, "step": 9369 }, { "epoch": 0.6038538377263646, "grad_norm": 7.137859350053377e-05, "learning_rate": 4.406731113498031e-06, "loss": 0.0, "step": 9370 }, { "epoch": 0.6039182831732938, "grad_norm": 0.002106704556151511, "learning_rate": 4.406015037593985e-06, "loss": 0.0, "step": 9371 }, { "epoch": 0.603982728620223, "grad_norm": 0.01561994489465668, "learning_rate": 4.40529896168994e-06, "loss": 0.0, "step": 9372 }, { "epoch": 0.6040471740671521, "grad_norm": 0.009708016318482603, "learning_rate": 4.404582885785894e-06, "loss": 0.0, "step": 9373 }, { "epoch": 0.6041116195140813, "grad_norm": 0.001466915021623313, "learning_rate": 4.403866809881847e-06, "loss": 0.0, "step": 9374 }, { "epoch": 0.6041760649610105, "grad_norm": 0.0007412942422267652, "learning_rate": 4.403150733977802e-06, "loss": 0.0, "step": 9375 }, { "epoch": 0.6042405104079397, "grad_norm": 0.005260947110871978, "learning_rate": 4.402434658073756e-06, "loss": 0.0, "step": 9376 }, { "epoch": 0.6043049558548689, "grad_norm": 5.846787475238024e-05, "learning_rate": 4.40171858216971e-06, "loss": 0.0, "step": 9377 }, { "epoch": 0.6043694013017981, "grad_norm": 0.0029260585270888422, "learning_rate": 4.4010025062656645e-06, "loss": 0.0, "step": 9378 }, { "epoch": 0.6044338467487272, "grad_norm": 0.0002419209646407248, "learning_rate": 4.400286430361619e-06, "loss": 0.0, "step": 9379 }, { "epoch": 0.6044982921956564, "grad_norm": 0.0003612351482923959, "learning_rate": 4.399570354457573e-06, "loss": 0.0, "step": 9380 }, { "epoch": 0.6045627376425855, "grad_norm": 9.690475230580101e-05, "learning_rate": 4.3988542785535274e-06, "loss": 0.0, "step": 9381 }, { "epoch": 0.6046271830895147, "grad_norm": 0.11312480103886216, "learning_rate": 4.398138202649481e-06, "loss": 0.0004, "step": 9382 }, { "epoch": 0.6046916285364439, "grad_norm": 0.0003024319588343002, "learning_rate": 4.397422126745435e-06, "loss": 0.0, "step": 9383 }, { "epoch": 0.604756073983373, "grad_norm": 0.0012353313973243075, "learning_rate": 4.3967060508413895e-06, "loss": 0.0, "step": 9384 }, { "epoch": 0.6048205194303022, "grad_norm": 0.012779457318109045, "learning_rate": 4.395989974937344e-06, "loss": 0.0002, "step": 9385 }, { "epoch": 0.6048849648772314, "grad_norm": 0.0009381798352755683, "learning_rate": 4.395273899033298e-06, "loss": 0.0, "step": 9386 }, { "epoch": 0.6049494103241606, "grad_norm": 0.00010893321625617296, "learning_rate": 4.3945578231292515e-06, "loss": 0.0, "step": 9387 }, { "epoch": 0.6050138557710898, "grad_norm": 0.003997170920714122, "learning_rate": 4.393841747225206e-06, "loss": 0.0, "step": 9388 }, { "epoch": 0.605078301218019, "grad_norm": 8.38234992955942e-05, "learning_rate": 4.393125671321161e-06, "loss": 0.0, "step": 9389 }, { "epoch": 0.6051427466649482, "grad_norm": 0.0005577757944864189, "learning_rate": 4.3924095954171144e-06, "loss": 0.0, "step": 9390 }, { "epoch": 0.6052071921118773, "grad_norm": 0.0007627417636095448, "learning_rate": 4.391693519513069e-06, "loss": 0.0, "step": 9391 }, { "epoch": 0.6052716375588064, "grad_norm": 0.00022876330997271556, "learning_rate": 4.390977443609023e-06, "loss": 0.0, "step": 9392 }, { "epoch": 0.6053360830057356, "grad_norm": 0.3514795311405156, "learning_rate": 4.390261367704977e-06, "loss": 0.0034, "step": 9393 }, { "epoch": 0.6054005284526648, "grad_norm": 0.06014872393965382, "learning_rate": 4.389545291800932e-06, "loss": 0.0, "step": 9394 }, { "epoch": 0.605464973899594, "grad_norm": 0.0012982233820627276, "learning_rate": 4.388829215896885e-06, "loss": 0.0, "step": 9395 }, { "epoch": 0.6055294193465232, "grad_norm": 0.0006104295092222888, "learning_rate": 4.388113139992839e-06, "loss": 0.0, "step": 9396 }, { "epoch": 0.6055938647934523, "grad_norm": 0.0015226621546786093, "learning_rate": 4.387397064088794e-06, "loss": 0.0, "step": 9397 }, { "epoch": 0.6056583102403815, "grad_norm": 0.004909857157835074, "learning_rate": 4.386680988184748e-06, "loss": 0.0, "step": 9398 }, { "epoch": 0.6057227556873107, "grad_norm": 0.0016943522584871827, "learning_rate": 4.385964912280702e-06, "loss": 0.0, "step": 9399 }, { "epoch": 0.6057872011342399, "grad_norm": 0.145636188277844, "learning_rate": 4.3852488363766566e-06, "loss": 0.0014, "step": 9400 }, { "epoch": 0.6058516465811691, "grad_norm": 0.0037991052504417187, "learning_rate": 4.384532760472611e-06, "loss": 0.0, "step": 9401 }, { "epoch": 0.6059160920280983, "grad_norm": 0.0005582835381395184, "learning_rate": 4.383816684568565e-06, "loss": 0.0, "step": 9402 }, { "epoch": 0.6059805374750273, "grad_norm": 0.14714135028611341, "learning_rate": 4.383100608664519e-06, "loss": 0.004, "step": 9403 }, { "epoch": 0.6060449829219565, "grad_norm": 0.00026380090583077884, "learning_rate": 4.382384532760473e-06, "loss": 0.0, "step": 9404 }, { "epoch": 0.6061094283688857, "grad_norm": 0.007371777117651726, "learning_rate": 4.381668456856427e-06, "loss": 0.0, "step": 9405 }, { "epoch": 0.6061738738158149, "grad_norm": 0.0010503819422914768, "learning_rate": 4.3809523809523815e-06, "loss": 0.0, "step": 9406 }, { "epoch": 0.6062383192627441, "grad_norm": 0.06254363771771412, "learning_rate": 4.380236305048336e-06, "loss": 0.0001, "step": 9407 }, { "epoch": 0.6063027647096733, "grad_norm": 0.0305562041932073, "learning_rate": 4.379520229144289e-06, "loss": 0.0001, "step": 9408 }, { "epoch": 0.6063672101566024, "grad_norm": 0.00900166756321749, "learning_rate": 4.3788041532402436e-06, "loss": 0.0, "step": 9409 }, { "epoch": 0.6064316556035316, "grad_norm": 0.30886882883342337, "learning_rate": 4.378088077336198e-06, "loss": 0.001, "step": 9410 }, { "epoch": 0.6064961010504608, "grad_norm": 0.5044701548161394, "learning_rate": 4.377372001432152e-06, "loss": 0.0022, "step": 9411 }, { "epoch": 0.60656054649739, "grad_norm": 0.0034124956418660653, "learning_rate": 4.3766559255281065e-06, "loss": 0.0, "step": 9412 }, { "epoch": 0.6066249919443192, "grad_norm": 0.0024336553075388167, "learning_rate": 4.375939849624061e-06, "loss": 0.0, "step": 9413 }, { "epoch": 0.6066894373912483, "grad_norm": 0.0006861778038973162, "learning_rate": 4.375223773720015e-06, "loss": 0.0, "step": 9414 }, { "epoch": 0.6067538828381774, "grad_norm": 0.0007463060227097989, "learning_rate": 4.374507697815969e-06, "loss": 0.0, "step": 9415 }, { "epoch": 0.6068183282851066, "grad_norm": 0.008138348403934246, "learning_rate": 4.373791621911923e-06, "loss": 0.0, "step": 9416 }, { "epoch": 0.6068827737320358, "grad_norm": 0.0015037820156294577, "learning_rate": 4.373075546007877e-06, "loss": 0.0, "step": 9417 }, { "epoch": 0.606947219178965, "grad_norm": 7.03840792272183e-05, "learning_rate": 4.372359470103831e-06, "loss": 0.0, "step": 9418 }, { "epoch": 0.6070116646258942, "grad_norm": 0.724938618593745, "learning_rate": 4.371643394199786e-06, "loss": 0.006, "step": 9419 }, { "epoch": 0.6070761100728234, "grad_norm": 0.00116128567512363, "learning_rate": 4.370927318295739e-06, "loss": 0.0, "step": 9420 }, { "epoch": 0.6071405555197525, "grad_norm": 0.0850829120345377, "learning_rate": 4.3702112423916935e-06, "loss": 0.0002, "step": 9421 }, { "epoch": 0.6072050009666817, "grad_norm": 0.0010536833509948063, "learning_rate": 4.369495166487648e-06, "loss": 0.0, "step": 9422 }, { "epoch": 0.6072694464136109, "grad_norm": 0.00385163720268815, "learning_rate": 4.368779090583602e-06, "loss": 0.0, "step": 9423 }, { "epoch": 0.6073338918605401, "grad_norm": 0.11442118970579021, "learning_rate": 4.368063014679556e-06, "loss": 0.0001, "step": 9424 }, { "epoch": 0.6073983373074693, "grad_norm": 0.0007916495679545597, "learning_rate": 4.367346938775511e-06, "loss": 0.0, "step": 9425 }, { "epoch": 0.6074627827543984, "grad_norm": 0.0011092425353548807, "learning_rate": 4.366630862871465e-06, "loss": 0.0, "step": 9426 }, { "epoch": 0.6075272282013275, "grad_norm": 9.429646011118711e-05, "learning_rate": 4.365914786967419e-06, "loss": 0.0, "step": 9427 }, { "epoch": 0.6075916736482567, "grad_norm": 0.002318340581239123, "learning_rate": 4.365198711063373e-06, "loss": 0.0, "step": 9428 }, { "epoch": 0.6076561190951859, "grad_norm": 0.0031013551819890905, "learning_rate": 4.364482635159327e-06, "loss": 0.0, "step": 9429 }, { "epoch": 0.6077205645421151, "grad_norm": 0.0011860604719637813, "learning_rate": 4.363766559255281e-06, "loss": 0.0, "step": 9430 }, { "epoch": 0.6077850099890443, "grad_norm": 0.0868075929508591, "learning_rate": 4.363050483351236e-06, "loss": 0.0017, "step": 9431 }, { "epoch": 0.6078494554359735, "grad_norm": 0.0016228980177938656, "learning_rate": 4.36233440744719e-06, "loss": 0.0, "step": 9432 }, { "epoch": 0.6079139008829026, "grad_norm": 0.002918866658195965, "learning_rate": 4.361618331543143e-06, "loss": 0.0, "step": 9433 }, { "epoch": 0.6079783463298318, "grad_norm": 0.40538073119100626, "learning_rate": 4.360902255639098e-06, "loss": 0.0006, "step": 9434 }, { "epoch": 0.608042791776761, "grad_norm": 0.006536840513148788, "learning_rate": 4.360186179735053e-06, "loss": 0.0, "step": 9435 }, { "epoch": 0.6081072372236902, "grad_norm": 0.02315314722042714, "learning_rate": 4.359470103831006e-06, "loss": 0.0001, "step": 9436 }, { "epoch": 0.6081716826706193, "grad_norm": 0.0005369910501333005, "learning_rate": 4.3587540279269605e-06, "loss": 0.0, "step": 9437 }, { "epoch": 0.6082361281175485, "grad_norm": 0.014507810777531585, "learning_rate": 4.358037952022915e-06, "loss": 0.0001, "step": 9438 }, { "epoch": 0.6083005735644776, "grad_norm": 0.0014420113067096622, "learning_rate": 4.357321876118869e-06, "loss": 0.0, "step": 9439 }, { "epoch": 0.6083650190114068, "grad_norm": 0.0003051545929258759, "learning_rate": 4.3566058002148234e-06, "loss": 0.0, "step": 9440 }, { "epoch": 0.608429464458336, "grad_norm": 0.10313037041927729, "learning_rate": 4.355889724310777e-06, "loss": 0.0001, "step": 9441 }, { "epoch": 0.6084939099052652, "grad_norm": 0.00023097012118933073, "learning_rate": 4.355173648406731e-06, "loss": 0.0, "step": 9442 }, { "epoch": 0.6085583553521944, "grad_norm": 0.004176636239720323, "learning_rate": 4.3544575725026855e-06, "loss": 0.0, "step": 9443 }, { "epoch": 0.6086228007991236, "grad_norm": 0.031410970579226795, "learning_rate": 4.35374149659864e-06, "loss": 0.0001, "step": 9444 }, { "epoch": 0.6086872462460527, "grad_norm": 0.011897834685541422, "learning_rate": 4.353025420694594e-06, "loss": 0.0, "step": 9445 }, { "epoch": 0.6087516916929819, "grad_norm": 0.00046606655128807525, "learning_rate": 4.3523093447905475e-06, "loss": 0.0, "step": 9446 }, { "epoch": 0.6088161371399111, "grad_norm": 0.024800149341012708, "learning_rate": 4.351593268886503e-06, "loss": 0.0015, "step": 9447 }, { "epoch": 0.6088805825868402, "grad_norm": 0.0221088822473336, "learning_rate": 4.350877192982457e-06, "loss": 0.0, "step": 9448 }, { "epoch": 0.6089450280337694, "grad_norm": 6.0055351174664875e-05, "learning_rate": 4.3501611170784104e-06, "loss": 0.0, "step": 9449 }, { "epoch": 0.6090094734806986, "grad_norm": 0.05522244514116825, "learning_rate": 4.349445041174365e-06, "loss": 0.0, "step": 9450 }, { "epoch": 0.6090739189276277, "grad_norm": 0.0020661568106822036, "learning_rate": 4.348728965270319e-06, "loss": 0.0, "step": 9451 }, { "epoch": 0.6091383643745569, "grad_norm": 0.00014003216050082992, "learning_rate": 4.348012889366273e-06, "loss": 0.0, "step": 9452 }, { "epoch": 0.6092028098214861, "grad_norm": 5.3691884195080886e-05, "learning_rate": 4.347296813462228e-06, "loss": 0.0, "step": 9453 }, { "epoch": 0.6092672552684153, "grad_norm": 2.2470153142569833e-05, "learning_rate": 4.346580737558181e-06, "loss": 0.0, "step": 9454 }, { "epoch": 0.6093317007153445, "grad_norm": 0.005263171562567179, "learning_rate": 4.345864661654135e-06, "loss": 0.0, "step": 9455 }, { "epoch": 0.6093961461622737, "grad_norm": 0.028391588065871397, "learning_rate": 4.34514858575009e-06, "loss": 0.0, "step": 9456 }, { "epoch": 0.6094605916092029, "grad_norm": 0.0032687334314929254, "learning_rate": 4.344432509846044e-06, "loss": 0.0, "step": 9457 }, { "epoch": 0.609525037056132, "grad_norm": 0.003059619076663178, "learning_rate": 4.343716433941998e-06, "loss": 0.0, "step": 9458 }, { "epoch": 0.6095894825030611, "grad_norm": 0.00011531079670687498, "learning_rate": 4.3430003580379526e-06, "loss": 0.0, "step": 9459 }, { "epoch": 0.6096539279499903, "grad_norm": 0.7228147590624995, "learning_rate": 4.342284282133907e-06, "loss": 0.0041, "step": 9460 }, { "epoch": 0.6097183733969195, "grad_norm": 0.002931431348183814, "learning_rate": 4.341568206229861e-06, "loss": 0.0, "step": 9461 }, { "epoch": 0.6097828188438487, "grad_norm": 2.102471073261476e-05, "learning_rate": 4.340852130325815e-06, "loss": 0.0, "step": 9462 }, { "epoch": 0.6098472642907778, "grad_norm": 0.00016897046354369543, "learning_rate": 4.340136054421769e-06, "loss": 0.0, "step": 9463 }, { "epoch": 0.609911709737707, "grad_norm": 0.0014111495206503312, "learning_rate": 4.339419978517723e-06, "loss": 0.0, "step": 9464 }, { "epoch": 0.6099761551846362, "grad_norm": 0.00035216732375646924, "learning_rate": 4.3387039026136775e-06, "loss": 0.0, "step": 9465 }, { "epoch": 0.6100406006315654, "grad_norm": 0.0001382897188749735, "learning_rate": 4.337987826709632e-06, "loss": 0.0, "step": 9466 }, { "epoch": 0.6101050460784946, "grad_norm": 0.009698598664787292, "learning_rate": 4.337271750805585e-06, "loss": 0.0, "step": 9467 }, { "epoch": 0.6101694915254238, "grad_norm": 0.06168647575651353, "learning_rate": 4.3365556749015396e-06, "loss": 0.0, "step": 9468 }, { "epoch": 0.610233936972353, "grad_norm": 0.004649457971022612, "learning_rate": 4.335839598997494e-06, "loss": 0.0, "step": 9469 }, { "epoch": 0.610298382419282, "grad_norm": 0.006819369683324906, "learning_rate": 4.335123523093448e-06, "loss": 0.0, "step": 9470 }, { "epoch": 0.6103628278662112, "grad_norm": 0.0038879490160562474, "learning_rate": 4.3344074471894025e-06, "loss": 0.0, "step": 9471 }, { "epoch": 0.6104272733131404, "grad_norm": 0.0001479780592880403, "learning_rate": 4.333691371285357e-06, "loss": 0.0, "step": 9472 }, { "epoch": 0.6104917187600696, "grad_norm": 0.0005454773247210619, "learning_rate": 4.332975295381311e-06, "loss": 0.0, "step": 9473 }, { "epoch": 0.6105561642069988, "grad_norm": 0.041149449013538716, "learning_rate": 4.332259219477265e-06, "loss": 0.0001, "step": 9474 }, { "epoch": 0.610620609653928, "grad_norm": 0.0016966869856339295, "learning_rate": 4.331543143573219e-06, "loss": 0.0, "step": 9475 }, { "epoch": 0.6106850551008571, "grad_norm": 0.10398651031290745, "learning_rate": 4.330827067669173e-06, "loss": 0.0004, "step": 9476 }, { "epoch": 0.6107495005477863, "grad_norm": 0.0028450556875423104, "learning_rate": 4.330110991765127e-06, "loss": 0.0, "step": 9477 }, { "epoch": 0.6108139459947155, "grad_norm": 0.0033837477527026193, "learning_rate": 4.329394915861082e-06, "loss": 0.0, "step": 9478 }, { "epoch": 0.6108783914416447, "grad_norm": 0.006324049606286477, "learning_rate": 4.328678839957036e-06, "loss": 0.0, "step": 9479 }, { "epoch": 0.6109428368885739, "grad_norm": 0.07765264767556082, "learning_rate": 4.3279627640529895e-06, "loss": 0.0001, "step": 9480 }, { "epoch": 0.6110072823355029, "grad_norm": 0.01570204834010833, "learning_rate": 4.327246688148944e-06, "loss": 0.0001, "step": 9481 }, { "epoch": 0.6110717277824321, "grad_norm": 0.0018498621459318526, "learning_rate": 4.326530612244899e-06, "loss": 0.0, "step": 9482 }, { "epoch": 0.6111361732293613, "grad_norm": 0.037915579330810155, "learning_rate": 4.325814536340852e-06, "loss": 0.0, "step": 9483 }, { "epoch": 0.6112006186762905, "grad_norm": 0.18641078003385522, "learning_rate": 4.325098460436807e-06, "loss": 0.0009, "step": 9484 }, { "epoch": 0.6112650641232197, "grad_norm": 0.004106447632898952, "learning_rate": 4.324382384532761e-06, "loss": 0.0015, "step": 9485 }, { "epoch": 0.6113295095701489, "grad_norm": 0.00024362199685292236, "learning_rate": 4.323666308628715e-06, "loss": 0.0, "step": 9486 }, { "epoch": 0.611393955017078, "grad_norm": 0.3062199665445999, "learning_rate": 4.3229502327246696e-06, "loss": 0.0016, "step": 9487 }, { "epoch": 0.6114584004640072, "grad_norm": 0.010514023663567535, "learning_rate": 4.322234156820623e-06, "loss": 0.0001, "step": 9488 }, { "epoch": 0.6115228459109364, "grad_norm": 0.0024672399660492102, "learning_rate": 4.321518080916577e-06, "loss": 0.0, "step": 9489 }, { "epoch": 0.6115872913578656, "grad_norm": 0.0035961915359396693, "learning_rate": 4.320802005012532e-06, "loss": 0.0, "step": 9490 }, { "epoch": 0.6116517368047948, "grad_norm": 0.0008687014780787742, "learning_rate": 4.320085929108486e-06, "loss": 0.0, "step": 9491 }, { "epoch": 0.6117161822517239, "grad_norm": 0.00013052206685491074, "learning_rate": 4.31936985320444e-06, "loss": 0.0, "step": 9492 }, { "epoch": 0.611780627698653, "grad_norm": 0.0008957571000338346, "learning_rate": 4.318653777300394e-06, "loss": 0.0, "step": 9493 }, { "epoch": 0.6118450731455822, "grad_norm": 0.0026848465137663054, "learning_rate": 4.317937701396349e-06, "loss": 0.0, "step": 9494 }, { "epoch": 0.6119095185925114, "grad_norm": 0.32030492866685867, "learning_rate": 4.317221625492303e-06, "loss": 0.0006, "step": 9495 }, { "epoch": 0.6119739640394406, "grad_norm": 0.06446050110427554, "learning_rate": 4.3165055495882565e-06, "loss": 0.0003, "step": 9496 }, { "epoch": 0.6120384094863698, "grad_norm": 0.0011579000983677743, "learning_rate": 4.315789473684211e-06, "loss": 0.0, "step": 9497 }, { "epoch": 0.612102854933299, "grad_norm": 0.009338821450512087, "learning_rate": 4.315073397780165e-06, "loss": 0.0, "step": 9498 }, { "epoch": 0.6121673003802282, "grad_norm": 0.0014420975540785869, "learning_rate": 4.3143573218761194e-06, "loss": 0.0, "step": 9499 }, { "epoch": 0.6122317458271573, "grad_norm": 2.6957402428930943, "learning_rate": 4.313641245972074e-06, "loss": 0.0235, "step": 9500 }, { "epoch": 0.6122961912740865, "grad_norm": 0.042654696893015, "learning_rate": 4.312925170068027e-06, "loss": 0.0004, "step": 9501 }, { "epoch": 0.6123606367210157, "grad_norm": 0.3342348687492541, "learning_rate": 4.3122090941639815e-06, "loss": 0.0006, "step": 9502 }, { "epoch": 0.6124250821679449, "grad_norm": 0.01680525990444114, "learning_rate": 4.311493018259936e-06, "loss": 0.0, "step": 9503 }, { "epoch": 0.612489527614874, "grad_norm": 0.0285541931701851, "learning_rate": 4.31077694235589e-06, "loss": 0.0001, "step": 9504 }, { "epoch": 0.6125539730618031, "grad_norm": 0.0024596945855401516, "learning_rate": 4.310060866451844e-06, "loss": 0.0, "step": 9505 }, { "epoch": 0.6126184185087323, "grad_norm": 0.16790719345938113, "learning_rate": 4.309344790547799e-06, "loss": 0.0014, "step": 9506 }, { "epoch": 0.6126828639556615, "grad_norm": 0.0001499272645216901, "learning_rate": 4.308628714643753e-06, "loss": 0.0, "step": 9507 }, { "epoch": 0.6127473094025907, "grad_norm": 0.000488662623177265, "learning_rate": 4.307912638739707e-06, "loss": 0.0, "step": 9508 }, { "epoch": 0.6128117548495199, "grad_norm": 0.00029549820058340356, "learning_rate": 4.307196562835661e-06, "loss": 0.0, "step": 9509 }, { "epoch": 0.6128762002964491, "grad_norm": 0.16494169692561012, "learning_rate": 4.306480486931615e-06, "loss": 0.0021, "step": 9510 }, { "epoch": 0.6129406457433783, "grad_norm": 0.0022252980179280958, "learning_rate": 4.305764411027569e-06, "loss": 0.0, "step": 9511 }, { "epoch": 0.6130050911903074, "grad_norm": 0.0031953051167575635, "learning_rate": 4.305048335123524e-06, "loss": 0.0, "step": 9512 }, { "epoch": 0.6130695366372366, "grad_norm": 0.0798065214137776, "learning_rate": 4.304332259219477e-06, "loss": 0.0002, "step": 9513 }, { "epoch": 0.6131339820841658, "grad_norm": 0.0032883889415415957, "learning_rate": 4.303616183315431e-06, "loss": 0.0, "step": 9514 }, { "epoch": 0.6131984275310949, "grad_norm": 0.026226701485447677, "learning_rate": 4.302900107411386e-06, "loss": 0.0001, "step": 9515 }, { "epoch": 0.6132628729780241, "grad_norm": 0.027247142216119388, "learning_rate": 4.30218403150734e-06, "loss": 0.0002, "step": 9516 }, { "epoch": 0.6133273184249532, "grad_norm": 0.03939227437205754, "learning_rate": 4.301467955603294e-06, "loss": 0.0, "step": 9517 }, { "epoch": 0.6133917638718824, "grad_norm": 0.00019537654302919405, "learning_rate": 4.3007518796992486e-06, "loss": 0.0, "step": 9518 }, { "epoch": 0.6134562093188116, "grad_norm": 0.02171230850239767, "learning_rate": 4.300035803795203e-06, "loss": 0.0001, "step": 9519 }, { "epoch": 0.6135206547657408, "grad_norm": 0.205370531067778, "learning_rate": 4.299319727891157e-06, "loss": 0.0008, "step": 9520 }, { "epoch": 0.61358510021267, "grad_norm": 0.0006344435985339772, "learning_rate": 4.2986036519871115e-06, "loss": 0.0, "step": 9521 }, { "epoch": 0.6136495456595992, "grad_norm": 0.031260613412393105, "learning_rate": 4.297887576083065e-06, "loss": 0.0001, "step": 9522 }, { "epoch": 0.6137139911065284, "grad_norm": 0.048878088660678165, "learning_rate": 4.297171500179019e-06, "loss": 0.0001, "step": 9523 }, { "epoch": 0.6137784365534575, "grad_norm": 0.5282191040348414, "learning_rate": 4.2964554242749735e-06, "loss": 0.0025, "step": 9524 }, { "epoch": 0.6138428820003867, "grad_norm": 0.12579375317430874, "learning_rate": 4.295739348370928e-06, "loss": 0.0005, "step": 9525 }, { "epoch": 0.6139073274473158, "grad_norm": 0.04641811663453107, "learning_rate": 4.295023272466881e-06, "loss": 0.0001, "step": 9526 }, { "epoch": 0.613971772894245, "grad_norm": 0.0002580502539569169, "learning_rate": 4.2943071965628356e-06, "loss": 0.0, "step": 9527 }, { "epoch": 0.6140362183411742, "grad_norm": 0.0033282476706218336, "learning_rate": 4.29359112065879e-06, "loss": 0.0, "step": 9528 }, { "epoch": 0.6141006637881034, "grad_norm": 0.00047556915669403905, "learning_rate": 4.292875044754745e-06, "loss": 0.0, "step": 9529 }, { "epoch": 0.6141651092350325, "grad_norm": 0.0013045353118663315, "learning_rate": 4.2921589688506985e-06, "loss": 0.0, "step": 9530 }, { "epoch": 0.6142295546819617, "grad_norm": 1.2612551377705028, "learning_rate": 4.291442892946653e-06, "loss": 0.0021, "step": 9531 }, { "epoch": 0.6142940001288909, "grad_norm": 0.00489277865661325, "learning_rate": 4.290726817042607e-06, "loss": 0.0, "step": 9532 }, { "epoch": 0.6143584455758201, "grad_norm": 0.0012519150045497776, "learning_rate": 4.290010741138561e-06, "loss": 0.0, "step": 9533 }, { "epoch": 0.6144228910227493, "grad_norm": 0.00015498001967760488, "learning_rate": 4.289294665234515e-06, "loss": 0.0, "step": 9534 }, { "epoch": 0.6144873364696785, "grad_norm": 0.28547099784379487, "learning_rate": 4.288578589330469e-06, "loss": 0.0015, "step": 9535 }, { "epoch": 0.6145517819166076, "grad_norm": 0.004607738427160797, "learning_rate": 4.287862513426423e-06, "loss": 0.0, "step": 9536 }, { "epoch": 0.6146162273635367, "grad_norm": 0.03986350100567441, "learning_rate": 4.287146437522378e-06, "loss": 0.0, "step": 9537 }, { "epoch": 0.6146806728104659, "grad_norm": 0.030827555896124095, "learning_rate": 4.286430361618332e-06, "loss": 0.0, "step": 9538 }, { "epoch": 0.6147451182573951, "grad_norm": 0.00016944203631999406, "learning_rate": 4.2857142857142855e-06, "loss": 0.0, "step": 9539 }, { "epoch": 0.6148095637043243, "grad_norm": 0.3045021862960025, "learning_rate": 4.284998209810241e-06, "loss": 0.0008, "step": 9540 }, { "epoch": 0.6148740091512535, "grad_norm": 0.14254954456604813, "learning_rate": 4.284282133906195e-06, "loss": 0.0017, "step": 9541 }, { "epoch": 0.6149384545981826, "grad_norm": 0.015514097077617816, "learning_rate": 4.283566058002148e-06, "loss": 0.0001, "step": 9542 }, { "epoch": 0.6150029000451118, "grad_norm": 0.00337946789828538, "learning_rate": 4.282849982098103e-06, "loss": 0.0, "step": 9543 }, { "epoch": 0.615067345492041, "grad_norm": 0.10997992786917365, "learning_rate": 4.282133906194057e-06, "loss": 0.0001, "step": 9544 }, { "epoch": 0.6151317909389702, "grad_norm": 0.08706040598996456, "learning_rate": 4.281417830290011e-06, "loss": 0.0002, "step": 9545 }, { "epoch": 0.6151962363858994, "grad_norm": 0.0017644996784956883, "learning_rate": 4.2807017543859656e-06, "loss": 0.0, "step": 9546 }, { "epoch": 0.6152606818328286, "grad_norm": 0.06708993009425664, "learning_rate": 4.279985678481919e-06, "loss": 0.0001, "step": 9547 }, { "epoch": 0.6153251272797576, "grad_norm": 0.24657193153772985, "learning_rate": 4.279269602577873e-06, "loss": 0.0002, "step": 9548 }, { "epoch": 0.6153895727266868, "grad_norm": 0.0006839220314001171, "learning_rate": 4.278553526673828e-06, "loss": 0.0, "step": 9549 }, { "epoch": 0.615454018173616, "grad_norm": 0.028784731907356236, "learning_rate": 4.277837450769782e-06, "loss": 0.0002, "step": 9550 }, { "epoch": 0.6155184636205452, "grad_norm": 0.0002460372595801576, "learning_rate": 4.277121374865736e-06, "loss": 0.0, "step": 9551 }, { "epoch": 0.6155829090674744, "grad_norm": 0.08505335527793859, "learning_rate": 4.2764052989616905e-06, "loss": 0.0001, "step": 9552 }, { "epoch": 0.6156473545144036, "grad_norm": 0.0007359800036842759, "learning_rate": 4.275689223057645e-06, "loss": 0.0, "step": 9553 }, { "epoch": 0.6157117999613327, "grad_norm": 0.0001408191174706121, "learning_rate": 4.274973147153599e-06, "loss": 0.0, "step": 9554 }, { "epoch": 0.6157762454082619, "grad_norm": 0.0023287981858707055, "learning_rate": 4.2742570712495525e-06, "loss": 0.0, "step": 9555 }, { "epoch": 0.6158406908551911, "grad_norm": 0.0019438432122889477, "learning_rate": 4.273540995345507e-06, "loss": 0.0, "step": 9556 }, { "epoch": 0.6159051363021203, "grad_norm": 1.152815331124407, "learning_rate": 4.272824919441461e-06, "loss": 0.0041, "step": 9557 }, { "epoch": 0.6159695817490495, "grad_norm": 0.009054800502372921, "learning_rate": 4.2721088435374154e-06, "loss": 0.0001, "step": 9558 }, { "epoch": 0.6160340271959786, "grad_norm": 0.41673807088647546, "learning_rate": 4.27139276763337e-06, "loss": 0.0025, "step": 9559 }, { "epoch": 0.6160984726429077, "grad_norm": 0.001291795997738671, "learning_rate": 4.270676691729323e-06, "loss": 0.0, "step": 9560 }, { "epoch": 0.6161629180898369, "grad_norm": 0.00037450827288190545, "learning_rate": 4.2699606158252775e-06, "loss": 0.0, "step": 9561 }, { "epoch": 0.6162273635367661, "grad_norm": 0.0005643279883164137, "learning_rate": 4.269244539921232e-06, "loss": 0.0, "step": 9562 }, { "epoch": 0.6162918089836953, "grad_norm": 0.00013711708141211, "learning_rate": 4.268528464017186e-06, "loss": 0.0, "step": 9563 }, { "epoch": 0.6163562544306245, "grad_norm": 0.00627109371547579, "learning_rate": 4.26781238811314e-06, "loss": 0.0015, "step": 9564 }, { "epoch": 0.6164206998775537, "grad_norm": 0.025730302441083985, "learning_rate": 4.267096312209095e-06, "loss": 0.0001, "step": 9565 }, { "epoch": 0.6164851453244828, "grad_norm": 0.0020360262097681384, "learning_rate": 4.266380236305049e-06, "loss": 0.0, "step": 9566 }, { "epoch": 0.616549590771412, "grad_norm": 0.00888623677442725, "learning_rate": 4.265664160401003e-06, "loss": 0.0, "step": 9567 }, { "epoch": 0.6166140362183412, "grad_norm": 0.00021110422840300865, "learning_rate": 4.264948084496957e-06, "loss": 0.0, "step": 9568 }, { "epoch": 0.6166784816652704, "grad_norm": 0.5416255501278395, "learning_rate": 4.264232008592911e-06, "loss": 0.0025, "step": 9569 }, { "epoch": 0.6167429271121995, "grad_norm": 0.19467164760361685, "learning_rate": 4.263515932688865e-06, "loss": 0.0003, "step": 9570 }, { "epoch": 0.6168073725591287, "grad_norm": 0.13399001852000092, "learning_rate": 4.26279985678482e-06, "loss": 0.0003, "step": 9571 }, { "epoch": 0.6168718180060578, "grad_norm": 0.00011533534958016236, "learning_rate": 4.262083780880774e-06, "loss": 0.0, "step": 9572 }, { "epoch": 0.616936263452987, "grad_norm": 0.08406597431618373, "learning_rate": 4.261367704976727e-06, "loss": 0.0001, "step": 9573 }, { "epoch": 0.6170007088999162, "grad_norm": 0.006471258951393888, "learning_rate": 4.260651629072682e-06, "loss": 0.0, "step": 9574 }, { "epoch": 0.6170651543468454, "grad_norm": 0.11885665445735855, "learning_rate": 4.259935553168637e-06, "loss": 0.0016, "step": 9575 }, { "epoch": 0.6171295997937746, "grad_norm": 0.005192039990253033, "learning_rate": 4.25921947726459e-06, "loss": 0.0001, "step": 9576 }, { "epoch": 0.6171940452407038, "grad_norm": 0.10412234963006879, "learning_rate": 4.2585034013605446e-06, "loss": 0.0001, "step": 9577 }, { "epoch": 0.6172584906876329, "grad_norm": 0.03084306802890673, "learning_rate": 4.257787325456499e-06, "loss": 0.0, "step": 9578 }, { "epoch": 0.6173229361345621, "grad_norm": 0.1886445616070666, "learning_rate": 4.257071249552453e-06, "loss": 0.0003, "step": 9579 }, { "epoch": 0.6173873815814913, "grad_norm": 0.007571403887036818, "learning_rate": 4.2563551736484075e-06, "loss": 0.0, "step": 9580 }, { "epoch": 0.6174518270284205, "grad_norm": 0.11695276831761708, "learning_rate": 4.255639097744361e-06, "loss": 0.0015, "step": 9581 }, { "epoch": 0.6175162724753496, "grad_norm": 0.03454928162524213, "learning_rate": 4.254923021840315e-06, "loss": 0.0001, "step": 9582 }, { "epoch": 0.6175807179222788, "grad_norm": 0.08363969623079226, "learning_rate": 4.2542069459362695e-06, "loss": 0.0001, "step": 9583 }, { "epoch": 0.6176451633692079, "grad_norm": 0.0032724227689246324, "learning_rate": 4.253490870032224e-06, "loss": 0.0, "step": 9584 }, { "epoch": 0.6177096088161371, "grad_norm": 0.16838093020000738, "learning_rate": 4.252774794128178e-06, "loss": 0.002, "step": 9585 }, { "epoch": 0.6177740542630663, "grad_norm": 0.015356828445240035, "learning_rate": 4.2520587182241316e-06, "loss": 0.0001, "step": 9586 }, { "epoch": 0.6178384997099955, "grad_norm": 0.24950890436843448, "learning_rate": 4.251342642320087e-06, "loss": 0.0009, "step": 9587 }, { "epoch": 0.6179029451569247, "grad_norm": 0.0004905781353135952, "learning_rate": 4.250626566416041e-06, "loss": 0.0, "step": 9588 }, { "epoch": 0.6179673906038539, "grad_norm": 0.0017723076927522122, "learning_rate": 4.2499104905119945e-06, "loss": 0.0, "step": 9589 }, { "epoch": 0.618031836050783, "grad_norm": 0.00027595294939807507, "learning_rate": 4.249194414607949e-06, "loss": 0.0, "step": 9590 }, { "epoch": 0.6180962814977122, "grad_norm": 0.44881769604763494, "learning_rate": 4.248478338703903e-06, "loss": 0.0044, "step": 9591 }, { "epoch": 0.6181607269446414, "grad_norm": 0.0011654496177690313, "learning_rate": 4.247762262799857e-06, "loss": 0.0, "step": 9592 }, { "epoch": 0.6182251723915705, "grad_norm": 0.0002969354686474064, "learning_rate": 4.247046186895812e-06, "loss": 0.0, "step": 9593 }, { "epoch": 0.6182896178384997, "grad_norm": 0.06758952912812587, "learning_rate": 4.246330110991765e-06, "loss": 0.0003, "step": 9594 }, { "epoch": 0.6183540632854289, "grad_norm": 0.0023606559277491745, "learning_rate": 4.245614035087719e-06, "loss": 0.0, "step": 9595 }, { "epoch": 0.618418508732358, "grad_norm": 0.0005240889689227749, "learning_rate": 4.244897959183674e-06, "loss": 0.0, "step": 9596 }, { "epoch": 0.6184829541792872, "grad_norm": 0.0028731344088614675, "learning_rate": 4.244181883279628e-06, "loss": 0.0, "step": 9597 }, { "epoch": 0.6185473996262164, "grad_norm": 0.003774219453522588, "learning_rate": 4.243465807375582e-06, "loss": 0.0, "step": 9598 }, { "epoch": 0.6186118450731456, "grad_norm": 0.04812190750716624, "learning_rate": 4.242749731471537e-06, "loss": 0.0002, "step": 9599 }, { "epoch": 0.6186762905200748, "grad_norm": 0.0009303746721064348, "learning_rate": 4.242033655567491e-06, "loss": 0.0, "step": 9600 }, { "epoch": 0.618740735967004, "grad_norm": 0.000261596983827811, "learning_rate": 4.241317579663445e-06, "loss": 0.0, "step": 9601 }, { "epoch": 0.6188051814139331, "grad_norm": 4.060820420659366e-05, "learning_rate": 4.240601503759399e-06, "loss": 0.0, "step": 9602 }, { "epoch": 0.6188696268608623, "grad_norm": 0.05925821210830824, "learning_rate": 4.239885427855353e-06, "loss": 0.0016, "step": 9603 }, { "epoch": 0.6189340723077914, "grad_norm": 0.0003562257420606521, "learning_rate": 4.239169351951307e-06, "loss": 0.0, "step": 9604 }, { "epoch": 0.6189985177547206, "grad_norm": 0.031397710705029135, "learning_rate": 4.2384532760472616e-06, "loss": 0.0, "step": 9605 }, { "epoch": 0.6190629632016498, "grad_norm": 2.4042094265960856e-05, "learning_rate": 4.237737200143216e-06, "loss": 0.0, "step": 9606 }, { "epoch": 0.619127408648579, "grad_norm": 0.021245099475145683, "learning_rate": 4.237021124239169e-06, "loss": 0.0002, "step": 9607 }, { "epoch": 0.6191918540955081, "grad_norm": 0.0018329740066415307, "learning_rate": 4.236305048335124e-06, "loss": 0.0, "step": 9608 }, { "epoch": 0.6192562995424373, "grad_norm": 0.003174532843030457, "learning_rate": 4.235588972431078e-06, "loss": 0.0, "step": 9609 }, { "epoch": 0.6193207449893665, "grad_norm": 0.12445039922168191, "learning_rate": 4.234872896527032e-06, "loss": 0.0004, "step": 9610 }, { "epoch": 0.6193851904362957, "grad_norm": 0.6708809129849929, "learning_rate": 4.2341568206229865e-06, "loss": 0.0025, "step": 9611 }, { "epoch": 0.6194496358832249, "grad_norm": 0.06722900209427916, "learning_rate": 4.233440744718941e-06, "loss": 0.0002, "step": 9612 }, { "epoch": 0.6195140813301541, "grad_norm": 0.02487789501266384, "learning_rate": 4.232724668814895e-06, "loss": 0.0001, "step": 9613 }, { "epoch": 0.6195785267770832, "grad_norm": 0.0070891124843938435, "learning_rate": 4.232008592910849e-06, "loss": 0.0, "step": 9614 }, { "epoch": 0.6196429722240123, "grad_norm": 0.03626208442549633, "learning_rate": 4.231292517006803e-06, "loss": 0.0001, "step": 9615 }, { "epoch": 0.6197074176709415, "grad_norm": 0.13685853487861604, "learning_rate": 4.230576441102757e-06, "loss": 0.0004, "step": 9616 }, { "epoch": 0.6197718631178707, "grad_norm": 0.02757626052057049, "learning_rate": 4.2298603651987114e-06, "loss": 0.0001, "step": 9617 }, { "epoch": 0.6198363085647999, "grad_norm": 0.0014798321427789366, "learning_rate": 4.229144289294666e-06, "loss": 0.0, "step": 9618 }, { "epoch": 0.6199007540117291, "grad_norm": 0.01049815395489131, "learning_rate": 4.228428213390619e-06, "loss": 0.0001, "step": 9619 }, { "epoch": 0.6199651994586582, "grad_norm": 0.6844524113320779, "learning_rate": 4.2277121374865735e-06, "loss": 0.0044, "step": 9620 }, { "epoch": 0.6200296449055874, "grad_norm": 0.0002147181528283652, "learning_rate": 4.226996061582528e-06, "loss": 0.0, "step": 9621 }, { "epoch": 0.6200940903525166, "grad_norm": 0.2500937256360808, "learning_rate": 4.226279985678483e-06, "loss": 0.001, "step": 9622 }, { "epoch": 0.6201585357994458, "grad_norm": 0.04441692228177357, "learning_rate": 4.225563909774436e-06, "loss": 0.0001, "step": 9623 }, { "epoch": 0.620222981246375, "grad_norm": 0.0028260496702314057, "learning_rate": 4.224847833870391e-06, "loss": 0.0, "step": 9624 }, { "epoch": 0.6202874266933042, "grad_norm": 0.027488300872075832, "learning_rate": 4.224131757966345e-06, "loss": 0.0, "step": 9625 }, { "epoch": 0.6203518721402332, "grad_norm": 7.346583784405198e-05, "learning_rate": 4.223415682062299e-06, "loss": 0.0, "step": 9626 }, { "epoch": 0.6204163175871624, "grad_norm": 0.06306652635555225, "learning_rate": 4.222699606158253e-06, "loss": 0.0002, "step": 9627 }, { "epoch": 0.6204807630340916, "grad_norm": 0.00592616535198368, "learning_rate": 4.221983530254207e-06, "loss": 0.0001, "step": 9628 }, { "epoch": 0.6205452084810208, "grad_norm": 0.0001044583771192941, "learning_rate": 4.221267454350161e-06, "loss": 0.0, "step": 9629 }, { "epoch": 0.62060965392795, "grad_norm": 5.9252248572389913e-05, "learning_rate": 4.220551378446116e-06, "loss": 0.0, "step": 9630 }, { "epoch": 0.6206740993748792, "grad_norm": 0.04576163754311494, "learning_rate": 4.21983530254207e-06, "loss": 0.0017, "step": 9631 }, { "epoch": 0.6207385448218083, "grad_norm": 0.011120442526384637, "learning_rate": 4.219119226638023e-06, "loss": 0.0001, "step": 9632 }, { "epoch": 0.6208029902687375, "grad_norm": 0.0012373695941060873, "learning_rate": 4.218403150733978e-06, "loss": 0.0, "step": 9633 }, { "epoch": 0.6208674357156667, "grad_norm": 0.0033271694642447464, "learning_rate": 4.217687074829933e-06, "loss": 0.0, "step": 9634 }, { "epoch": 0.6209318811625959, "grad_norm": 0.0023266009334657824, "learning_rate": 4.216970998925886e-06, "loss": 0.0, "step": 9635 }, { "epoch": 0.6209963266095251, "grad_norm": 0.0006776249456023286, "learning_rate": 4.2162549230218406e-06, "loss": 0.0, "step": 9636 }, { "epoch": 0.6210607720564542, "grad_norm": 0.01654521411218407, "learning_rate": 4.215538847117795e-06, "loss": 0.0, "step": 9637 }, { "epoch": 0.6211252175033833, "grad_norm": 4.852683199395689e-05, "learning_rate": 4.214822771213749e-06, "loss": 0.0, "step": 9638 }, { "epoch": 0.6211896629503125, "grad_norm": 0.0021993879120563247, "learning_rate": 4.2141066953097035e-06, "loss": 0.0, "step": 9639 }, { "epoch": 0.6212541083972417, "grad_norm": 0.0007637886447388776, "learning_rate": 4.213390619405657e-06, "loss": 0.0, "step": 9640 }, { "epoch": 0.6213185538441709, "grad_norm": 0.007377042865803844, "learning_rate": 4.212674543501611e-06, "loss": 0.0, "step": 9641 }, { "epoch": 0.6213829992911001, "grad_norm": 0.0005533541648899976, "learning_rate": 4.2119584675975655e-06, "loss": 0.0, "step": 9642 }, { "epoch": 0.6214474447380293, "grad_norm": 0.42125994387586024, "learning_rate": 4.21124239169352e-06, "loss": 0.0025, "step": 9643 }, { "epoch": 0.6215118901849584, "grad_norm": 0.03175686695537614, "learning_rate": 4.210526315789474e-06, "loss": 0.0001, "step": 9644 }, { "epoch": 0.6215763356318876, "grad_norm": 0.299370056123468, "learning_rate": 4.209810239885428e-06, "loss": 0.0024, "step": 9645 }, { "epoch": 0.6216407810788168, "grad_norm": 0.26325604179437306, "learning_rate": 4.209094163981383e-06, "loss": 0.0007, "step": 9646 }, { "epoch": 0.621705226525746, "grad_norm": 0.01473776976325703, "learning_rate": 4.208378088077337e-06, "loss": 0.0, "step": 9647 }, { "epoch": 0.6217696719726751, "grad_norm": 0.0005109300137223879, "learning_rate": 4.2076620121732905e-06, "loss": 0.0, "step": 9648 }, { "epoch": 0.6218341174196043, "grad_norm": 0.0002493537642654104, "learning_rate": 4.206945936269245e-06, "loss": 0.0, "step": 9649 }, { "epoch": 0.6218985628665334, "grad_norm": 0.07706493977662457, "learning_rate": 4.206229860365199e-06, "loss": 0.0001, "step": 9650 }, { "epoch": 0.6219630083134626, "grad_norm": 0.0003456695922869574, "learning_rate": 4.205513784461153e-06, "loss": 0.0, "step": 9651 }, { "epoch": 0.6220274537603918, "grad_norm": 0.0018471179024505506, "learning_rate": 4.204797708557108e-06, "loss": 0.0, "step": 9652 }, { "epoch": 0.622091899207321, "grad_norm": 0.4382099794044541, "learning_rate": 4.204081632653061e-06, "loss": 0.0014, "step": 9653 }, { "epoch": 0.6221563446542502, "grad_norm": 0.00014206190967321758, "learning_rate": 4.203365556749015e-06, "loss": 0.0, "step": 9654 }, { "epoch": 0.6222207901011794, "grad_norm": 0.023678985918828478, "learning_rate": 4.20264948084497e-06, "loss": 0.0, "step": 9655 }, { "epoch": 0.6222852355481086, "grad_norm": 0.0015190579278471688, "learning_rate": 4.201933404940924e-06, "loss": 0.0, "step": 9656 }, { "epoch": 0.6223496809950377, "grad_norm": 2.9540092315116344, "learning_rate": 4.201217329036878e-06, "loss": 0.0244, "step": 9657 }, { "epoch": 0.6224141264419669, "grad_norm": 0.00012206465167372003, "learning_rate": 4.200501253132833e-06, "loss": 0.0, "step": 9658 }, { "epoch": 0.6224785718888961, "grad_norm": 0.0002953703826734249, "learning_rate": 4.199785177228787e-06, "loss": 0.0, "step": 9659 }, { "epoch": 0.6225430173358252, "grad_norm": 0.0006869575541465469, "learning_rate": 4.199069101324741e-06, "loss": 0.0, "step": 9660 }, { "epoch": 0.6226074627827544, "grad_norm": 0.005004019924537658, "learning_rate": 4.198353025420695e-06, "loss": 0.0, "step": 9661 }, { "epoch": 0.6226719082296835, "grad_norm": 0.0036624435182955875, "learning_rate": 4.197636949516649e-06, "loss": 0.0, "step": 9662 }, { "epoch": 0.6227363536766127, "grad_norm": 0.00772927658587039, "learning_rate": 4.196920873612603e-06, "loss": 0.0001, "step": 9663 }, { "epoch": 0.6228007991235419, "grad_norm": 0.007641090487948875, "learning_rate": 4.1962047977085576e-06, "loss": 0.0, "step": 9664 }, { "epoch": 0.6228652445704711, "grad_norm": 0.12874149139306848, "learning_rate": 4.195488721804512e-06, "loss": 0.0005, "step": 9665 }, { "epoch": 0.6229296900174003, "grad_norm": 0.043186854746607224, "learning_rate": 4.194772645900465e-06, "loss": 0.0001, "step": 9666 }, { "epoch": 0.6229941354643295, "grad_norm": 0.0034495539036033776, "learning_rate": 4.19405656999642e-06, "loss": 0.0, "step": 9667 }, { "epoch": 0.6230585809112587, "grad_norm": 0.0015735462216765262, "learning_rate": 4.193340494092374e-06, "loss": 0.0, "step": 9668 }, { "epoch": 0.6231230263581878, "grad_norm": 0.0028003798244138406, "learning_rate": 4.192624418188328e-06, "loss": 0.0, "step": 9669 }, { "epoch": 0.623187471805117, "grad_norm": 0.0021314886590723505, "learning_rate": 4.1919083422842825e-06, "loss": 0.0, "step": 9670 }, { "epoch": 0.6232519172520461, "grad_norm": 0.020338036201677363, "learning_rate": 4.191192266380237e-06, "loss": 0.0, "step": 9671 }, { "epoch": 0.6233163626989753, "grad_norm": 0.15354739217184882, "learning_rate": 4.190476190476191e-06, "loss": 0.0016, "step": 9672 }, { "epoch": 0.6233808081459045, "grad_norm": 0.015586767135734498, "learning_rate": 4.189760114572145e-06, "loss": 0.0, "step": 9673 }, { "epoch": 0.6234452535928336, "grad_norm": 0.016917160072103598, "learning_rate": 4.189044038668099e-06, "loss": 0.0, "step": 9674 }, { "epoch": 0.6235096990397628, "grad_norm": 0.04137864002051029, "learning_rate": 4.188327962764053e-06, "loss": 0.0, "step": 9675 }, { "epoch": 0.623574144486692, "grad_norm": 0.005969930532603671, "learning_rate": 4.1876118868600074e-06, "loss": 0.0, "step": 9676 }, { "epoch": 0.6236385899336212, "grad_norm": 0.01609130556531648, "learning_rate": 4.186895810955962e-06, "loss": 0.0, "step": 9677 }, { "epoch": 0.6237030353805504, "grad_norm": 0.0025735075923348596, "learning_rate": 4.186179735051916e-06, "loss": 0.0, "step": 9678 }, { "epoch": 0.6237674808274796, "grad_norm": 0.0016625419018875228, "learning_rate": 4.1854636591478695e-06, "loss": 0.0, "step": 9679 }, { "epoch": 0.6238319262744088, "grad_norm": 0.0003346156449337851, "learning_rate": 4.184747583243825e-06, "loss": 0.0, "step": 9680 }, { "epoch": 0.6238963717213379, "grad_norm": 0.007099106515354779, "learning_rate": 4.184031507339779e-06, "loss": 0.0, "step": 9681 }, { "epoch": 0.623960817168267, "grad_norm": 0.003438129501815967, "learning_rate": 4.183315431435732e-06, "loss": 0.0, "step": 9682 }, { "epoch": 0.6240252626151962, "grad_norm": 0.03816232143606552, "learning_rate": 4.182599355531687e-06, "loss": 0.0005, "step": 9683 }, { "epoch": 0.6240897080621254, "grad_norm": 0.07756988779862854, "learning_rate": 4.181883279627641e-06, "loss": 0.0004, "step": 9684 }, { "epoch": 0.6241541535090546, "grad_norm": 0.4092447233751244, "learning_rate": 4.181167203723595e-06, "loss": 0.001, "step": 9685 }, { "epoch": 0.6242185989559838, "grad_norm": 0.0007207571886176135, "learning_rate": 4.18045112781955e-06, "loss": 0.0, "step": 9686 }, { "epoch": 0.6242830444029129, "grad_norm": 0.0015483125744365732, "learning_rate": 4.179735051915503e-06, "loss": 0.0, "step": 9687 }, { "epoch": 0.6243474898498421, "grad_norm": 0.0021180640674535764, "learning_rate": 4.179018976011457e-06, "loss": 0.0, "step": 9688 }, { "epoch": 0.6244119352967713, "grad_norm": 0.0004169306514595993, "learning_rate": 4.178302900107412e-06, "loss": 0.0, "step": 9689 }, { "epoch": 0.6244763807437005, "grad_norm": 0.2161596745728091, "learning_rate": 4.177586824203366e-06, "loss": 0.0003, "step": 9690 }, { "epoch": 0.6245408261906297, "grad_norm": 0.0011705409204292648, "learning_rate": 4.17687074829932e-06, "loss": 0.0, "step": 9691 }, { "epoch": 0.6246052716375589, "grad_norm": 0.0003713461406357066, "learning_rate": 4.1761546723952745e-06, "loss": 0.0, "step": 9692 }, { "epoch": 0.6246697170844879, "grad_norm": 0.021227762380770367, "learning_rate": 4.175438596491229e-06, "loss": 0.0, "step": 9693 }, { "epoch": 0.6247341625314171, "grad_norm": 0.002796829726804933, "learning_rate": 4.174722520587183e-06, "loss": 0.0, "step": 9694 }, { "epoch": 0.6247986079783463, "grad_norm": 0.029885802088271313, "learning_rate": 4.1740064446831366e-06, "loss": 0.0, "step": 9695 }, { "epoch": 0.6248630534252755, "grad_norm": 0.058992337485208504, "learning_rate": 4.173290368779091e-06, "loss": 0.0001, "step": 9696 }, { "epoch": 0.6249274988722047, "grad_norm": 0.006196414162415305, "learning_rate": 4.172574292875045e-06, "loss": 0.0, "step": 9697 }, { "epoch": 0.6249919443191339, "grad_norm": 0.0003641721267636872, "learning_rate": 4.1718582169709995e-06, "loss": 0.0, "step": 9698 }, { "epoch": 0.625056389766063, "grad_norm": 0.000895571559838155, "learning_rate": 4.171142141066954e-06, "loss": 0.0, "step": 9699 }, { "epoch": 0.6251208352129922, "grad_norm": 0.035813595296406875, "learning_rate": 4.170426065162907e-06, "loss": 0.0004, "step": 9700 }, { "epoch": 0.6251852806599214, "grad_norm": 0.014866533526694621, "learning_rate": 4.1697099892588615e-06, "loss": 0.0, "step": 9701 }, { "epoch": 0.6252497261068506, "grad_norm": 0.0004485504683336761, "learning_rate": 4.168993913354816e-06, "loss": 0.0, "step": 9702 }, { "epoch": 0.6253141715537798, "grad_norm": 0.003314286639645609, "learning_rate": 4.16827783745077e-06, "loss": 0.0, "step": 9703 }, { "epoch": 0.6253786170007088, "grad_norm": 0.0038918480210237234, "learning_rate": 4.167561761546724e-06, "loss": 0.0, "step": 9704 }, { "epoch": 0.625443062447638, "grad_norm": 0.12356112889405628, "learning_rate": 4.166845685642679e-06, "loss": 0.0005, "step": 9705 }, { "epoch": 0.6255075078945672, "grad_norm": 0.010094385634850529, "learning_rate": 4.166129609738633e-06, "loss": 0.0, "step": 9706 }, { "epoch": 0.6255719533414964, "grad_norm": 0.0004073141393491479, "learning_rate": 4.165413533834587e-06, "loss": 0.0, "step": 9707 }, { "epoch": 0.6256363987884256, "grad_norm": 0.017301657748644873, "learning_rate": 4.164697457930541e-06, "loss": 0.0001, "step": 9708 }, { "epoch": 0.6257008442353548, "grad_norm": 0.008330549915594329, "learning_rate": 4.163981382026495e-06, "loss": 0.0, "step": 9709 }, { "epoch": 0.625765289682284, "grad_norm": 0.0012220208405838166, "learning_rate": 4.163265306122449e-06, "loss": 0.0, "step": 9710 }, { "epoch": 0.6258297351292131, "grad_norm": 0.0024680865602183513, "learning_rate": 4.162549230218404e-06, "loss": 0.0, "step": 9711 }, { "epoch": 0.6258941805761423, "grad_norm": 0.0005113877056839675, "learning_rate": 4.161833154314357e-06, "loss": 0.0, "step": 9712 }, { "epoch": 0.6259586260230715, "grad_norm": 0.22660793473691426, "learning_rate": 4.161117078410311e-06, "loss": 0.0002, "step": 9713 }, { "epoch": 0.6260230714700007, "grad_norm": 0.0678046505039315, "learning_rate": 4.160401002506266e-06, "loss": 0.0, "step": 9714 }, { "epoch": 0.6260875169169298, "grad_norm": 0.0009742323449192132, "learning_rate": 4.159684926602221e-06, "loss": 0.0, "step": 9715 }, { "epoch": 0.626151962363859, "grad_norm": 0.006071902899983566, "learning_rate": 4.158968850698174e-06, "loss": 0.0, "step": 9716 }, { "epoch": 0.6262164078107881, "grad_norm": 0.2007009679693991, "learning_rate": 4.158252774794129e-06, "loss": 0.0006, "step": 9717 }, { "epoch": 0.6262808532577173, "grad_norm": 0.6364653615568687, "learning_rate": 4.157536698890083e-06, "loss": 0.0149, "step": 9718 }, { "epoch": 0.6263452987046465, "grad_norm": 0.002847480991120004, "learning_rate": 4.156820622986037e-06, "loss": 0.0, "step": 9719 }, { "epoch": 0.6264097441515757, "grad_norm": 0.0029042838819600516, "learning_rate": 4.156104547081991e-06, "loss": 0.0, "step": 9720 }, { "epoch": 0.6264741895985049, "grad_norm": 0.0010571381934525565, "learning_rate": 4.155388471177945e-06, "loss": 0.0, "step": 9721 }, { "epoch": 0.6265386350454341, "grad_norm": 0.0006312867694469445, "learning_rate": 4.154672395273899e-06, "loss": 0.0, "step": 9722 }, { "epoch": 0.6266030804923632, "grad_norm": 0.501257536919454, "learning_rate": 4.1539563193698536e-06, "loss": 0.0035, "step": 9723 }, { "epoch": 0.6266675259392924, "grad_norm": 0.0003598110398831839, "learning_rate": 4.153240243465808e-06, "loss": 0.0, "step": 9724 }, { "epoch": 0.6267319713862216, "grad_norm": 0.4419211195223192, "learning_rate": 4.152524167561761e-06, "loss": 0.0013, "step": 9725 }, { "epoch": 0.6267964168331507, "grad_norm": 0.005140032610661535, "learning_rate": 4.151808091657716e-06, "loss": 0.0, "step": 9726 }, { "epoch": 0.6268608622800799, "grad_norm": 0.006610240234879396, "learning_rate": 4.151092015753671e-06, "loss": 0.0, "step": 9727 }, { "epoch": 0.626925307727009, "grad_norm": 0.012762952650590107, "learning_rate": 4.150375939849624e-06, "loss": 0.0, "step": 9728 }, { "epoch": 0.6269897531739382, "grad_norm": 0.010514889004076607, "learning_rate": 4.1496598639455785e-06, "loss": 0.0001, "step": 9729 }, { "epoch": 0.6270541986208674, "grad_norm": 3.305536977108447, "learning_rate": 4.148943788041533e-06, "loss": 0.0243, "step": 9730 }, { "epoch": 0.6271186440677966, "grad_norm": 0.009644390248062271, "learning_rate": 4.148227712137487e-06, "loss": 0.0001, "step": 9731 }, { "epoch": 0.6271830895147258, "grad_norm": 0.004172765545080662, "learning_rate": 4.147511636233441e-06, "loss": 0.0, "step": 9732 }, { "epoch": 0.627247534961655, "grad_norm": 0.6687569011795615, "learning_rate": 4.146795560329395e-06, "loss": 0.0072, "step": 9733 }, { "epoch": 0.6273119804085842, "grad_norm": 0.005077958161566104, "learning_rate": 4.146079484425349e-06, "loss": 0.0, "step": 9734 }, { "epoch": 0.6273764258555133, "grad_norm": 0.0011856766644601346, "learning_rate": 4.1453634085213034e-06, "loss": 0.0, "step": 9735 }, { "epoch": 0.6274408713024425, "grad_norm": 0.0011318798438648293, "learning_rate": 4.144647332617258e-06, "loss": 0.0, "step": 9736 }, { "epoch": 0.6275053167493717, "grad_norm": 0.0022622025217353646, "learning_rate": 4.143931256713212e-06, "loss": 0.0, "step": 9737 }, { "epoch": 0.6275697621963008, "grad_norm": 0.0031208555963091454, "learning_rate": 4.1432151808091655e-06, "loss": 0.0, "step": 9738 }, { "epoch": 0.62763420764323, "grad_norm": 0.11720895173505456, "learning_rate": 4.142499104905121e-06, "loss": 0.0001, "step": 9739 }, { "epoch": 0.6276986530901592, "grad_norm": 0.005784035279351796, "learning_rate": 4.141783029001075e-06, "loss": 0.0, "step": 9740 }, { "epoch": 0.6277630985370883, "grad_norm": 0.001448754885450368, "learning_rate": 4.141066953097028e-06, "loss": 0.0, "step": 9741 }, { "epoch": 0.6278275439840175, "grad_norm": 0.0007223212627194857, "learning_rate": 4.140350877192983e-06, "loss": 0.0, "step": 9742 }, { "epoch": 0.6278919894309467, "grad_norm": 0.14653846377902094, "learning_rate": 4.139634801288937e-06, "loss": 0.0009, "step": 9743 }, { "epoch": 0.6279564348778759, "grad_norm": 0.06317755627566499, "learning_rate": 4.138918725384891e-06, "loss": 0.0, "step": 9744 }, { "epoch": 0.6280208803248051, "grad_norm": 0.3560164288228587, "learning_rate": 4.138202649480846e-06, "loss": 0.0018, "step": 9745 }, { "epoch": 0.6280853257717343, "grad_norm": 0.005363883013006638, "learning_rate": 4.137486573576799e-06, "loss": 0.0, "step": 9746 }, { "epoch": 0.6281497712186634, "grad_norm": 0.033138692973302124, "learning_rate": 4.136770497672753e-06, "loss": 0.0, "step": 9747 }, { "epoch": 0.6282142166655926, "grad_norm": 0.28489794855303097, "learning_rate": 4.136054421768708e-06, "loss": 0.0004, "step": 9748 }, { "epoch": 0.6282786621125217, "grad_norm": 0.003932125381898977, "learning_rate": 4.135338345864662e-06, "loss": 0.0, "step": 9749 }, { "epoch": 0.6283431075594509, "grad_norm": 0.019557865455868052, "learning_rate": 4.134622269960616e-06, "loss": 0.0001, "step": 9750 }, { "epoch": 0.6284075530063801, "grad_norm": 0.000930857796089239, "learning_rate": 4.1339061940565705e-06, "loss": 0.0, "step": 9751 }, { "epoch": 0.6284719984533093, "grad_norm": 0.004989099766665328, "learning_rate": 4.133190118152525e-06, "loss": 0.0, "step": 9752 }, { "epoch": 0.6285364439002384, "grad_norm": 0.0035396782047819827, "learning_rate": 4.132474042248479e-06, "loss": 0.0, "step": 9753 }, { "epoch": 0.6286008893471676, "grad_norm": 0.05278230405212935, "learning_rate": 4.1317579663444326e-06, "loss": 0.0001, "step": 9754 }, { "epoch": 0.6286653347940968, "grad_norm": 0.003403609391087778, "learning_rate": 4.131041890440387e-06, "loss": 0.0, "step": 9755 }, { "epoch": 0.628729780241026, "grad_norm": 0.07996074866573993, "learning_rate": 4.130325814536341e-06, "loss": 0.0002, "step": 9756 }, { "epoch": 0.6287942256879552, "grad_norm": 0.2723902128574586, "learning_rate": 4.1296097386322955e-06, "loss": 0.0012, "step": 9757 }, { "epoch": 0.6288586711348844, "grad_norm": 0.0025250918803746652, "learning_rate": 4.12889366272825e-06, "loss": 0.0, "step": 9758 }, { "epoch": 0.6289231165818135, "grad_norm": 0.0360516493613523, "learning_rate": 4.128177586824203e-06, "loss": 0.0001, "step": 9759 }, { "epoch": 0.6289875620287426, "grad_norm": 0.051373972553429056, "learning_rate": 4.1274615109201575e-06, "loss": 0.0002, "step": 9760 }, { "epoch": 0.6290520074756718, "grad_norm": 0.007127486987450691, "learning_rate": 4.126745435016112e-06, "loss": 0.0, "step": 9761 }, { "epoch": 0.629116452922601, "grad_norm": 0.04414521157648746, "learning_rate": 4.126029359112066e-06, "loss": 0.0001, "step": 9762 }, { "epoch": 0.6291808983695302, "grad_norm": 0.015933055726976987, "learning_rate": 4.12531328320802e-06, "loss": 0.0, "step": 9763 }, { "epoch": 0.6292453438164594, "grad_norm": 0.0004764234417019966, "learning_rate": 4.124597207303975e-06, "loss": 0.0, "step": 9764 }, { "epoch": 0.6293097892633885, "grad_norm": 0.005602980997375922, "learning_rate": 4.123881131399929e-06, "loss": 0.0, "step": 9765 }, { "epoch": 0.6293742347103177, "grad_norm": 0.0022225477643587023, "learning_rate": 4.123165055495883e-06, "loss": 0.0, "step": 9766 }, { "epoch": 0.6294386801572469, "grad_norm": 0.3022448735374261, "learning_rate": 4.122448979591837e-06, "loss": 0.0012, "step": 9767 }, { "epoch": 0.6295031256041761, "grad_norm": 0.02914737365516086, "learning_rate": 4.121732903687791e-06, "loss": 0.0001, "step": 9768 }, { "epoch": 0.6295675710511053, "grad_norm": 0.05615636769679371, "learning_rate": 4.121016827783745e-06, "loss": 0.0001, "step": 9769 }, { "epoch": 0.6296320164980345, "grad_norm": 0.0003076192540867642, "learning_rate": 4.1203007518797e-06, "loss": 0.0, "step": 9770 }, { "epoch": 0.6296964619449635, "grad_norm": 0.06890702224480363, "learning_rate": 4.119584675975654e-06, "loss": 0.0003, "step": 9771 }, { "epoch": 0.6297609073918927, "grad_norm": 0.006440353235390373, "learning_rate": 4.118868600071607e-06, "loss": 0.0, "step": 9772 }, { "epoch": 0.6298253528388219, "grad_norm": 0.001049338266341266, "learning_rate": 4.118152524167562e-06, "loss": 0.0, "step": 9773 }, { "epoch": 0.6298897982857511, "grad_norm": 0.09006338639124273, "learning_rate": 4.117436448263517e-06, "loss": 0.0017, "step": 9774 }, { "epoch": 0.6299542437326803, "grad_norm": 0.32334044423878544, "learning_rate": 4.11672037235947e-06, "loss": 0.0022, "step": 9775 }, { "epoch": 0.6300186891796095, "grad_norm": 0.0014797895047647244, "learning_rate": 4.116004296455425e-06, "loss": 0.0, "step": 9776 }, { "epoch": 0.6300831346265386, "grad_norm": 0.002647025070808818, "learning_rate": 4.115288220551379e-06, "loss": 0.0, "step": 9777 }, { "epoch": 0.6301475800734678, "grad_norm": 0.0002881831873300413, "learning_rate": 4.114572144647333e-06, "loss": 0.0, "step": 9778 }, { "epoch": 0.630212025520397, "grad_norm": 0.0025381103557760654, "learning_rate": 4.1138560687432875e-06, "loss": 0.0, "step": 9779 }, { "epoch": 0.6302764709673262, "grad_norm": 0.0006100557027432721, "learning_rate": 4.113139992839241e-06, "loss": 0.0, "step": 9780 }, { "epoch": 0.6303409164142554, "grad_norm": 0.00111819279469302, "learning_rate": 4.112423916935195e-06, "loss": 0.0, "step": 9781 }, { "epoch": 0.6304053618611845, "grad_norm": 0.009024577275247043, "learning_rate": 4.1117078410311496e-06, "loss": 0.0, "step": 9782 }, { "epoch": 0.6304698073081136, "grad_norm": 0.040998943660520266, "learning_rate": 4.110991765127104e-06, "loss": 0.0005, "step": 9783 }, { "epoch": 0.6305342527550428, "grad_norm": 0.2744994794192702, "learning_rate": 4.110275689223058e-06, "loss": 0.0017, "step": 9784 }, { "epoch": 0.630598698201972, "grad_norm": 0.47197642973060894, "learning_rate": 4.109559613319012e-06, "loss": 0.0008, "step": 9785 }, { "epoch": 0.6306631436489012, "grad_norm": 0.011569277487227457, "learning_rate": 4.108843537414967e-06, "loss": 0.0001, "step": 9786 }, { "epoch": 0.6307275890958304, "grad_norm": 0.00016951053258177544, "learning_rate": 4.108127461510921e-06, "loss": 0.0, "step": 9787 }, { "epoch": 0.6307920345427596, "grad_norm": 0.0002565186560096678, "learning_rate": 4.1074113856068745e-06, "loss": 0.0, "step": 9788 }, { "epoch": 0.6308564799896887, "grad_norm": 0.015760302528762305, "learning_rate": 4.106695309702829e-06, "loss": 0.0001, "step": 9789 }, { "epoch": 0.6309209254366179, "grad_norm": 0.0005307053243126115, "learning_rate": 4.105979233798783e-06, "loss": 0.0, "step": 9790 }, { "epoch": 0.6309853708835471, "grad_norm": 0.009203236432359213, "learning_rate": 4.105263157894737e-06, "loss": 0.0, "step": 9791 }, { "epoch": 0.6310498163304763, "grad_norm": 0.00010736600066678872, "learning_rate": 4.104547081990692e-06, "loss": 0.0, "step": 9792 }, { "epoch": 0.6311142617774054, "grad_norm": 0.01727893981573304, "learning_rate": 4.103831006086645e-06, "loss": 0.0, "step": 9793 }, { "epoch": 0.6311787072243346, "grad_norm": 0.005858238365772789, "learning_rate": 4.1031149301825994e-06, "loss": 0.0, "step": 9794 }, { "epoch": 0.6312431526712637, "grad_norm": 0.03891247460398242, "learning_rate": 4.102398854278554e-06, "loss": 0.0001, "step": 9795 }, { "epoch": 0.6313075981181929, "grad_norm": 0.03522325296595622, "learning_rate": 4.101682778374508e-06, "loss": 0.0001, "step": 9796 }, { "epoch": 0.6313720435651221, "grad_norm": 0.0002808154914054595, "learning_rate": 4.100966702470462e-06, "loss": 0.0, "step": 9797 }, { "epoch": 0.6314364890120513, "grad_norm": 0.0385147856123727, "learning_rate": 4.100250626566417e-06, "loss": 0.0004, "step": 9798 }, { "epoch": 0.6315009344589805, "grad_norm": 0.005532733142487714, "learning_rate": 4.099534550662371e-06, "loss": 0.0, "step": 9799 }, { "epoch": 0.6315653799059097, "grad_norm": 0.034152268583626674, "learning_rate": 4.098818474758325e-06, "loss": 0.0001, "step": 9800 }, { "epoch": 0.6316298253528388, "grad_norm": 0.39788263618123626, "learning_rate": 4.098102398854279e-06, "loss": 0.0004, "step": 9801 }, { "epoch": 0.631694270799768, "grad_norm": 0.008583379104093024, "learning_rate": 4.097386322950233e-06, "loss": 0.0, "step": 9802 }, { "epoch": 0.6317587162466972, "grad_norm": 0.0025010910330443806, "learning_rate": 4.096670247046187e-06, "loss": 0.0, "step": 9803 }, { "epoch": 0.6318231616936264, "grad_norm": 0.01292406597771796, "learning_rate": 4.095954171142142e-06, "loss": 0.0, "step": 9804 }, { "epoch": 0.6318876071405555, "grad_norm": 0.011911608905516258, "learning_rate": 4.095238095238096e-06, "loss": 0.0, "step": 9805 }, { "epoch": 0.6319520525874847, "grad_norm": 0.021996962163873125, "learning_rate": 4.094522019334049e-06, "loss": 0.0, "step": 9806 }, { "epoch": 0.6320164980344138, "grad_norm": 0.0006577608885751697, "learning_rate": 4.093805943430004e-06, "loss": 0.0, "step": 9807 }, { "epoch": 0.632080943481343, "grad_norm": 0.0022851470470816276, "learning_rate": 4.093089867525958e-06, "loss": 0.0, "step": 9808 }, { "epoch": 0.6321453889282722, "grad_norm": 0.00044632028055951176, "learning_rate": 4.092373791621912e-06, "loss": 0.0, "step": 9809 }, { "epoch": 0.6322098343752014, "grad_norm": 0.0005976803320070037, "learning_rate": 4.0916577157178665e-06, "loss": 0.0, "step": 9810 }, { "epoch": 0.6322742798221306, "grad_norm": 0.0005299045073304586, "learning_rate": 4.090941639813821e-06, "loss": 0.0, "step": 9811 }, { "epoch": 0.6323387252690598, "grad_norm": 0.00022852497761108778, "learning_rate": 4.090225563909775e-06, "loss": 0.0, "step": 9812 }, { "epoch": 0.632403170715989, "grad_norm": 0.21186302379079988, "learning_rate": 4.089509488005729e-06, "loss": 0.0031, "step": 9813 }, { "epoch": 0.6324676161629181, "grad_norm": 0.12596821572501107, "learning_rate": 4.088793412101683e-06, "loss": 0.0002, "step": 9814 }, { "epoch": 0.6325320616098473, "grad_norm": 0.0013975578937536427, "learning_rate": 4.088077336197637e-06, "loss": 0.0, "step": 9815 }, { "epoch": 0.6325965070567764, "grad_norm": 0.005904553442262322, "learning_rate": 4.0873612602935915e-06, "loss": 0.0, "step": 9816 }, { "epoch": 0.6326609525037056, "grad_norm": 0.016000071829722146, "learning_rate": 4.086645184389546e-06, "loss": 0.0002, "step": 9817 }, { "epoch": 0.6327253979506348, "grad_norm": 0.0028822450328563408, "learning_rate": 4.085929108485499e-06, "loss": 0.0, "step": 9818 }, { "epoch": 0.632789843397564, "grad_norm": 0.05139031940953903, "learning_rate": 4.0852130325814535e-06, "loss": 0.0002, "step": 9819 }, { "epoch": 0.6328542888444931, "grad_norm": 9.16404113373634e-05, "learning_rate": 4.084496956677408e-06, "loss": 0.0, "step": 9820 }, { "epoch": 0.6329187342914223, "grad_norm": 0.0016909506227782403, "learning_rate": 4.083780880773363e-06, "loss": 0.0, "step": 9821 }, { "epoch": 0.6329831797383515, "grad_norm": 0.025381268785659834, "learning_rate": 4.083064804869316e-06, "loss": 0.0, "step": 9822 }, { "epoch": 0.6330476251852807, "grad_norm": 0.004422708829880086, "learning_rate": 4.082348728965271e-06, "loss": 0.0001, "step": 9823 }, { "epoch": 0.6331120706322099, "grad_norm": 0.002109393715706444, "learning_rate": 4.081632653061225e-06, "loss": 0.0, "step": 9824 }, { "epoch": 0.633176516079139, "grad_norm": 0.002699339732813906, "learning_rate": 4.080916577157179e-06, "loss": 0.0, "step": 9825 }, { "epoch": 0.6332409615260682, "grad_norm": 0.0017968199788469807, "learning_rate": 4.080200501253133e-06, "loss": 0.0, "step": 9826 }, { "epoch": 0.6333054069729973, "grad_norm": 0.004023536004873301, "learning_rate": 4.079484425349087e-06, "loss": 0.0, "step": 9827 }, { "epoch": 0.6333698524199265, "grad_norm": 0.0003069514670206831, "learning_rate": 4.078768349445041e-06, "loss": 0.0, "step": 9828 }, { "epoch": 0.6334342978668557, "grad_norm": 0.0028215493809561425, "learning_rate": 4.078052273540996e-06, "loss": 0.0, "step": 9829 }, { "epoch": 0.6334987433137849, "grad_norm": 0.11994944466776976, "learning_rate": 4.07733619763695e-06, "loss": 0.0003, "step": 9830 }, { "epoch": 0.633563188760714, "grad_norm": 0.10761587225219324, "learning_rate": 4.076620121732903e-06, "loss": 0.0003, "step": 9831 }, { "epoch": 0.6336276342076432, "grad_norm": 0.011345525980090358, "learning_rate": 4.0759040458288586e-06, "loss": 0.0, "step": 9832 }, { "epoch": 0.6336920796545724, "grad_norm": 0.02846167078207075, "learning_rate": 4.075187969924813e-06, "loss": 0.0, "step": 9833 }, { "epoch": 0.6337565251015016, "grad_norm": 0.0062590929192869165, "learning_rate": 4.074471894020766e-06, "loss": 0.0, "step": 9834 }, { "epoch": 0.6338209705484308, "grad_norm": 0.3454879802491421, "learning_rate": 4.073755818116721e-06, "loss": 0.0009, "step": 9835 }, { "epoch": 0.63388541599536, "grad_norm": 0.00633899276324277, "learning_rate": 4.073039742212675e-06, "loss": 0.0, "step": 9836 }, { "epoch": 0.6339498614422892, "grad_norm": 0.014193635549446313, "learning_rate": 4.072323666308629e-06, "loss": 0.0, "step": 9837 }, { "epoch": 0.6340143068892182, "grad_norm": 0.11098855918860485, "learning_rate": 4.0716075904045835e-06, "loss": 0.0002, "step": 9838 }, { "epoch": 0.6340787523361474, "grad_norm": 0.0023207273082203215, "learning_rate": 4.070891514500537e-06, "loss": 0.0, "step": 9839 }, { "epoch": 0.6341431977830766, "grad_norm": 0.0006344213112119258, "learning_rate": 4.070175438596491e-06, "loss": 0.0, "step": 9840 }, { "epoch": 0.6342076432300058, "grad_norm": 0.08611299195893168, "learning_rate": 4.0694593626924456e-06, "loss": 0.0004, "step": 9841 }, { "epoch": 0.634272088676935, "grad_norm": 8.981659056621043e-05, "learning_rate": 4.0687432867884e-06, "loss": 0.0, "step": 9842 }, { "epoch": 0.6343365341238641, "grad_norm": 0.00012564965903519854, "learning_rate": 4.068027210884354e-06, "loss": 0.0, "step": 9843 }, { "epoch": 0.6344009795707933, "grad_norm": 0.0006654507491736668, "learning_rate": 4.0673111349803084e-06, "loss": 0.0, "step": 9844 }, { "epoch": 0.6344654250177225, "grad_norm": 0.0034752734788544896, "learning_rate": 4.066595059076263e-06, "loss": 0.0, "step": 9845 }, { "epoch": 0.6345298704646517, "grad_norm": 0.0011339178097354364, "learning_rate": 4.065878983172217e-06, "loss": 0.0, "step": 9846 }, { "epoch": 0.6345943159115809, "grad_norm": 0.008264259238473211, "learning_rate": 4.0651629072681705e-06, "loss": 0.0, "step": 9847 }, { "epoch": 0.6346587613585101, "grad_norm": 0.003021064877237617, "learning_rate": 4.064446831364125e-06, "loss": 0.0, "step": 9848 }, { "epoch": 0.6347232068054391, "grad_norm": 0.0014914980012132165, "learning_rate": 4.063730755460079e-06, "loss": 0.0, "step": 9849 }, { "epoch": 0.6347876522523683, "grad_norm": 0.12833182633070414, "learning_rate": 4.063014679556033e-06, "loss": 0.0003, "step": 9850 }, { "epoch": 0.6348520976992975, "grad_norm": 0.010818723560201225, "learning_rate": 4.062298603651988e-06, "loss": 0.0, "step": 9851 }, { "epoch": 0.6349165431462267, "grad_norm": 0.15318523214957946, "learning_rate": 4.061582527747941e-06, "loss": 0.0004, "step": 9852 }, { "epoch": 0.6349809885931559, "grad_norm": 0.19271358384033405, "learning_rate": 4.0608664518438954e-06, "loss": 0.0023, "step": 9853 }, { "epoch": 0.6350454340400851, "grad_norm": 0.10603754438942112, "learning_rate": 4.06015037593985e-06, "loss": 0.0002, "step": 9854 }, { "epoch": 0.6351098794870143, "grad_norm": 0.00042468406024622174, "learning_rate": 4.059434300035804e-06, "loss": 0.0, "step": 9855 }, { "epoch": 0.6351743249339434, "grad_norm": 0.0011900971873508306, "learning_rate": 4.058718224131758e-06, "loss": 0.0, "step": 9856 }, { "epoch": 0.6352387703808726, "grad_norm": 0.2271719250462828, "learning_rate": 4.058002148227713e-06, "loss": 0.0003, "step": 9857 }, { "epoch": 0.6353032158278018, "grad_norm": 0.0006767117946808032, "learning_rate": 4.057286072323667e-06, "loss": 0.0, "step": 9858 }, { "epoch": 0.635367661274731, "grad_norm": 0.00020405993774540835, "learning_rate": 4.056569996419621e-06, "loss": 0.0, "step": 9859 }, { "epoch": 0.6354321067216601, "grad_norm": 0.0038386513119053446, "learning_rate": 4.055853920515575e-06, "loss": 0.0, "step": 9860 }, { "epoch": 0.6354965521685892, "grad_norm": 0.14175836497304545, "learning_rate": 4.055137844611529e-06, "loss": 0.0041, "step": 9861 }, { "epoch": 0.6355609976155184, "grad_norm": 0.5609968286873434, "learning_rate": 4.054421768707483e-06, "loss": 0.0015, "step": 9862 }, { "epoch": 0.6356254430624476, "grad_norm": 0.22833693274969805, "learning_rate": 4.053705692803438e-06, "loss": 0.0037, "step": 9863 }, { "epoch": 0.6356898885093768, "grad_norm": 0.0004343785643513413, "learning_rate": 4.052989616899392e-06, "loss": 0.0, "step": 9864 }, { "epoch": 0.635754333956306, "grad_norm": 0.00029451475877984956, "learning_rate": 4.052273540995345e-06, "loss": 0.0, "step": 9865 }, { "epoch": 0.6358187794032352, "grad_norm": 0.1578427849877735, "learning_rate": 4.0515574650913e-06, "loss": 0.0021, "step": 9866 }, { "epoch": 0.6358832248501644, "grad_norm": 0.002889111443432167, "learning_rate": 4.050841389187255e-06, "loss": 0.0, "step": 9867 }, { "epoch": 0.6359476702970935, "grad_norm": 0.015472808998329517, "learning_rate": 4.050125313283208e-06, "loss": 0.0, "step": 9868 }, { "epoch": 0.6360121157440227, "grad_norm": 0.0006547937423044578, "learning_rate": 4.0494092373791625e-06, "loss": 0.0, "step": 9869 }, { "epoch": 0.6360765611909519, "grad_norm": 0.0016740075257970334, "learning_rate": 4.048693161475117e-06, "loss": 0.0, "step": 9870 }, { "epoch": 0.636141006637881, "grad_norm": 0.0032675604364454484, "learning_rate": 4.047977085571071e-06, "loss": 0.0, "step": 9871 }, { "epoch": 0.6362054520848102, "grad_norm": 0.07995895205773623, "learning_rate": 4.047261009667025e-06, "loss": 0.0001, "step": 9872 }, { "epoch": 0.6362698975317393, "grad_norm": 0.01585555228427254, "learning_rate": 4.046544933762979e-06, "loss": 0.0002, "step": 9873 }, { "epoch": 0.6363343429786685, "grad_norm": 0.0005467433965174832, "learning_rate": 4.045828857858933e-06, "loss": 0.0, "step": 9874 }, { "epoch": 0.6363987884255977, "grad_norm": 0.002782702034882121, "learning_rate": 4.0451127819548875e-06, "loss": 0.0, "step": 9875 }, { "epoch": 0.6364632338725269, "grad_norm": 0.0010571305041660374, "learning_rate": 4.044396706050842e-06, "loss": 0.0, "step": 9876 }, { "epoch": 0.6365276793194561, "grad_norm": 0.671455673838229, "learning_rate": 4.043680630146796e-06, "loss": 0.0056, "step": 9877 }, { "epoch": 0.6365921247663853, "grad_norm": 6.159026134501892e-05, "learning_rate": 4.0429645542427495e-06, "loss": 0.0, "step": 9878 }, { "epoch": 0.6366565702133145, "grad_norm": 0.11269901540612999, "learning_rate": 4.042248478338705e-06, "loss": 0.0002, "step": 9879 }, { "epoch": 0.6367210156602436, "grad_norm": 0.13829956983992775, "learning_rate": 4.041532402434659e-06, "loss": 0.0004, "step": 9880 }, { "epoch": 0.6367854611071728, "grad_norm": 0.00028129003725998087, "learning_rate": 4.040816326530612e-06, "loss": 0.0, "step": 9881 }, { "epoch": 0.636849906554102, "grad_norm": 0.18228105218956056, "learning_rate": 4.040100250626567e-06, "loss": 0.0002, "step": 9882 }, { "epoch": 0.6369143520010311, "grad_norm": 0.00044808417187598156, "learning_rate": 4.039384174722521e-06, "loss": 0.0, "step": 9883 }, { "epoch": 0.6369787974479603, "grad_norm": 0.004541619186065626, "learning_rate": 4.038668098818475e-06, "loss": 0.0, "step": 9884 }, { "epoch": 0.6370432428948894, "grad_norm": 0.1287355882717353, "learning_rate": 4.03795202291443e-06, "loss": 0.0004, "step": 9885 }, { "epoch": 0.6371076883418186, "grad_norm": 0.000665257157196762, "learning_rate": 4.037235947010383e-06, "loss": 0.0, "step": 9886 }, { "epoch": 0.6371721337887478, "grad_norm": 0.00015430858028718633, "learning_rate": 4.036519871106337e-06, "loss": 0.0, "step": 9887 }, { "epoch": 0.637236579235677, "grad_norm": 0.01693254525602601, "learning_rate": 4.035803795202292e-06, "loss": 0.0001, "step": 9888 }, { "epoch": 0.6373010246826062, "grad_norm": 0.000977959590454271, "learning_rate": 4.035087719298246e-06, "loss": 0.0, "step": 9889 }, { "epoch": 0.6373654701295354, "grad_norm": 0.0001807368223541276, "learning_rate": 4.0343716433942e-06, "loss": 0.0, "step": 9890 }, { "epoch": 0.6374299155764646, "grad_norm": 0.13511406041563384, "learning_rate": 4.0336555674901546e-06, "loss": 0.0004, "step": 9891 }, { "epoch": 0.6374943610233937, "grad_norm": 0.00462214646127079, "learning_rate": 4.032939491586109e-06, "loss": 0.0, "step": 9892 }, { "epoch": 0.6375588064703229, "grad_norm": 8.380419679333197e-05, "learning_rate": 4.032223415682063e-06, "loss": 0.0, "step": 9893 }, { "epoch": 0.637623251917252, "grad_norm": 0.001185555940977872, "learning_rate": 4.031507339778017e-06, "loss": 0.0, "step": 9894 }, { "epoch": 0.6376876973641812, "grad_norm": 0.01787338779530966, "learning_rate": 4.030791263873971e-06, "loss": 0.0, "step": 9895 }, { "epoch": 0.6377521428111104, "grad_norm": 0.05857672841469521, "learning_rate": 4.030075187969925e-06, "loss": 0.0003, "step": 9896 }, { "epoch": 0.6378165882580396, "grad_norm": 0.042201623274486, "learning_rate": 4.0293591120658795e-06, "loss": 0.0002, "step": 9897 }, { "epoch": 0.6378810337049687, "grad_norm": 0.014896175954414791, "learning_rate": 4.028643036161834e-06, "loss": 0.0, "step": 9898 }, { "epoch": 0.6379454791518979, "grad_norm": 0.006496097356850603, "learning_rate": 4.027926960257787e-06, "loss": 0.0, "step": 9899 }, { "epoch": 0.6380099245988271, "grad_norm": 0.0005663189838518361, "learning_rate": 4.0272108843537416e-06, "loss": 0.0, "step": 9900 }, { "epoch": 0.6380743700457563, "grad_norm": 0.00168549572276779, "learning_rate": 4.026494808449696e-06, "loss": 0.0, "step": 9901 }, { "epoch": 0.6381388154926855, "grad_norm": 0.021107483516845608, "learning_rate": 4.02577873254565e-06, "loss": 0.0001, "step": 9902 }, { "epoch": 0.6382032609396147, "grad_norm": 7.06100544471114e-05, "learning_rate": 4.0250626566416044e-06, "loss": 0.0, "step": 9903 }, { "epoch": 0.6382677063865438, "grad_norm": 0.004029435938904156, "learning_rate": 4.024346580737559e-06, "loss": 0.0, "step": 9904 }, { "epoch": 0.6383321518334729, "grad_norm": 0.003942796891727747, "learning_rate": 4.023630504833513e-06, "loss": 0.0, "step": 9905 }, { "epoch": 0.6383965972804021, "grad_norm": 0.10042532347163381, "learning_rate": 4.022914428929467e-06, "loss": 0.0055, "step": 9906 }, { "epoch": 0.6384610427273313, "grad_norm": 0.013568788087903462, "learning_rate": 4.022198353025421e-06, "loss": 0.0, "step": 9907 }, { "epoch": 0.6385254881742605, "grad_norm": 0.11221282353071427, "learning_rate": 4.021482277121375e-06, "loss": 0.0001, "step": 9908 }, { "epoch": 0.6385899336211897, "grad_norm": 0.00044984605912318877, "learning_rate": 4.020766201217329e-06, "loss": 0.0, "step": 9909 }, { "epoch": 0.6386543790681188, "grad_norm": 0.00013384249196529097, "learning_rate": 4.020050125313284e-06, "loss": 0.0, "step": 9910 }, { "epoch": 0.638718824515048, "grad_norm": 0.003407667433976282, "learning_rate": 4.019334049409237e-06, "loss": 0.0, "step": 9911 }, { "epoch": 0.6387832699619772, "grad_norm": 0.011748166466181148, "learning_rate": 4.0186179735051914e-06, "loss": 0.0, "step": 9912 }, { "epoch": 0.6388477154089064, "grad_norm": 0.027792480039555506, "learning_rate": 4.017901897601146e-06, "loss": 0.0, "step": 9913 }, { "epoch": 0.6389121608558356, "grad_norm": 0.008272129900484659, "learning_rate": 4.017185821697101e-06, "loss": 0.0001, "step": 9914 }, { "epoch": 0.6389766063027648, "grad_norm": 0.0002833124323843118, "learning_rate": 4.016469745793054e-06, "loss": 0.0, "step": 9915 }, { "epoch": 0.6390410517496938, "grad_norm": 0.025964498676430598, "learning_rate": 4.015753669889009e-06, "loss": 0.0, "step": 9916 }, { "epoch": 0.639105497196623, "grad_norm": 0.17665134300402457, "learning_rate": 4.015037593984963e-06, "loss": 0.0008, "step": 9917 }, { "epoch": 0.6391699426435522, "grad_norm": 0.010062429170806275, "learning_rate": 4.014321518080917e-06, "loss": 0.0, "step": 9918 }, { "epoch": 0.6392343880904814, "grad_norm": 0.001940753301590691, "learning_rate": 4.013605442176871e-06, "loss": 0.0, "step": 9919 }, { "epoch": 0.6392988335374106, "grad_norm": 0.13457927397190214, "learning_rate": 4.012889366272825e-06, "loss": 0.0004, "step": 9920 }, { "epoch": 0.6393632789843398, "grad_norm": 0.00035575603334679986, "learning_rate": 4.012173290368779e-06, "loss": 0.0, "step": 9921 }, { "epoch": 0.6394277244312689, "grad_norm": 0.19851455393941003, "learning_rate": 4.011457214464734e-06, "loss": 0.0019, "step": 9922 }, { "epoch": 0.6394921698781981, "grad_norm": 0.0008800058176206106, "learning_rate": 4.010741138560688e-06, "loss": 0.0, "step": 9923 }, { "epoch": 0.6395566153251273, "grad_norm": 0.015232629286257246, "learning_rate": 4.010025062656641e-06, "loss": 0.0, "step": 9924 }, { "epoch": 0.6396210607720565, "grad_norm": 0.005831437638291429, "learning_rate": 4.009308986752596e-06, "loss": 0.0, "step": 9925 }, { "epoch": 0.6396855062189857, "grad_norm": 0.00974646379763554, "learning_rate": 4.008592910848551e-06, "loss": 0.0, "step": 9926 }, { "epoch": 0.6397499516659148, "grad_norm": 0.00011297209002473684, "learning_rate": 4.007876834944504e-06, "loss": 0.0, "step": 9927 }, { "epoch": 0.6398143971128439, "grad_norm": 0.00044290672661680006, "learning_rate": 4.0071607590404585e-06, "loss": 0.0, "step": 9928 }, { "epoch": 0.6398788425597731, "grad_norm": 0.002342356820183904, "learning_rate": 4.006444683136413e-06, "loss": 0.0, "step": 9929 }, { "epoch": 0.6399432880067023, "grad_norm": 0.002250269272006791, "learning_rate": 4.005728607232367e-06, "loss": 0.0, "step": 9930 }, { "epoch": 0.6400077334536315, "grad_norm": 0.007098118431674166, "learning_rate": 4.005012531328321e-06, "loss": 0.0001, "step": 9931 }, { "epoch": 0.6400721789005607, "grad_norm": 0.0006225908267203256, "learning_rate": 4.004296455424275e-06, "loss": 0.0, "step": 9932 }, { "epoch": 0.6401366243474899, "grad_norm": 0.002042717291839209, "learning_rate": 4.003580379520229e-06, "loss": 0.0, "step": 9933 }, { "epoch": 0.640201069794419, "grad_norm": 0.8397476141266291, "learning_rate": 4.0028643036161835e-06, "loss": 0.0014, "step": 9934 }, { "epoch": 0.6402655152413482, "grad_norm": 0.01909159525474567, "learning_rate": 4.002148227712138e-06, "loss": 0.0001, "step": 9935 }, { "epoch": 0.6403299606882774, "grad_norm": 0.0063646263341570804, "learning_rate": 4.001432151808092e-06, "loss": 0.0, "step": 9936 }, { "epoch": 0.6403944061352066, "grad_norm": 0.00024152938378510026, "learning_rate": 4.000716075904046e-06, "loss": 0.0, "step": 9937 }, { "epoch": 0.6404588515821357, "grad_norm": 0.00029932230104452766, "learning_rate": 4.000000000000001e-06, "loss": 0.0, "step": 9938 }, { "epoch": 0.6405232970290649, "grad_norm": 0.01575336589201022, "learning_rate": 3.999283924095955e-06, "loss": 0.0001, "step": 9939 }, { "epoch": 0.640587742475994, "grad_norm": 0.4669428800544325, "learning_rate": 3.998567848191908e-06, "loss": 0.0021, "step": 9940 }, { "epoch": 0.6406521879229232, "grad_norm": 0.0012947301126589104, "learning_rate": 3.997851772287863e-06, "loss": 0.0, "step": 9941 }, { "epoch": 0.6407166333698524, "grad_norm": 0.013855897877270457, "learning_rate": 3.997135696383817e-06, "loss": 0.0, "step": 9942 }, { "epoch": 0.6407810788167816, "grad_norm": 0.00033208404617710225, "learning_rate": 3.996419620479771e-06, "loss": 0.0, "step": 9943 }, { "epoch": 0.6408455242637108, "grad_norm": 0.10435821286848203, "learning_rate": 3.995703544575726e-06, "loss": 0.0006, "step": 9944 }, { "epoch": 0.64090996971064, "grad_norm": 0.015503957950853523, "learning_rate": 3.994987468671679e-06, "loss": 0.0, "step": 9945 }, { "epoch": 0.6409744151575691, "grad_norm": 0.0005417660503162809, "learning_rate": 3.994271392767633e-06, "loss": 0.0, "step": 9946 }, { "epoch": 0.6410388606044983, "grad_norm": 2.6578900723737707e-05, "learning_rate": 3.993555316863588e-06, "loss": 0.0, "step": 9947 }, { "epoch": 0.6411033060514275, "grad_norm": 0.00017938237437263428, "learning_rate": 3.992839240959542e-06, "loss": 0.0, "step": 9948 }, { "epoch": 0.6411677514983566, "grad_norm": 0.34429821818284284, "learning_rate": 3.992123165055496e-06, "loss": 0.0011, "step": 9949 }, { "epoch": 0.6412321969452858, "grad_norm": 0.0034726296503280736, "learning_rate": 3.9914070891514506e-06, "loss": 0.0, "step": 9950 }, { "epoch": 0.641296642392215, "grad_norm": 0.00018911348694327, "learning_rate": 3.990691013247405e-06, "loss": 0.0, "step": 9951 }, { "epoch": 0.6413610878391441, "grad_norm": 0.0021938632494621483, "learning_rate": 3.989974937343359e-06, "loss": 0.0, "step": 9952 }, { "epoch": 0.6414255332860733, "grad_norm": 0.0012114879226396045, "learning_rate": 3.989258861439313e-06, "loss": 0.0, "step": 9953 }, { "epoch": 0.6414899787330025, "grad_norm": 0.05394574339195268, "learning_rate": 3.988542785535267e-06, "loss": 0.0005, "step": 9954 }, { "epoch": 0.6415544241799317, "grad_norm": 0.9579358243969295, "learning_rate": 3.987826709631221e-06, "loss": 0.0048, "step": 9955 }, { "epoch": 0.6416188696268609, "grad_norm": 0.34143033673758666, "learning_rate": 3.9871106337271755e-06, "loss": 0.0042, "step": 9956 }, { "epoch": 0.6416833150737901, "grad_norm": 0.17423421059847782, "learning_rate": 3.98639455782313e-06, "loss": 0.0005, "step": 9957 }, { "epoch": 0.6417477605207192, "grad_norm": 0.0009932734330962791, "learning_rate": 3.985678481919083e-06, "loss": 0.0, "step": 9958 }, { "epoch": 0.6418122059676484, "grad_norm": 0.000888577600146786, "learning_rate": 3.9849624060150376e-06, "loss": 0.0, "step": 9959 }, { "epoch": 0.6418766514145776, "grad_norm": 0.012914636043180742, "learning_rate": 3.984246330110992e-06, "loss": 0.0002, "step": 9960 }, { "epoch": 0.6419410968615067, "grad_norm": 0.04609532181840023, "learning_rate": 3.983530254206946e-06, "loss": 0.0, "step": 9961 }, { "epoch": 0.6420055423084359, "grad_norm": 0.16728946370704814, "learning_rate": 3.9828141783029004e-06, "loss": 0.0015, "step": 9962 }, { "epoch": 0.6420699877553651, "grad_norm": 0.0006231815176938104, "learning_rate": 3.982098102398855e-06, "loss": 0.0, "step": 9963 }, { "epoch": 0.6421344332022942, "grad_norm": 0.04196601012932234, "learning_rate": 3.981382026494809e-06, "loss": 0.0001, "step": 9964 }, { "epoch": 0.6421988786492234, "grad_norm": 0.6129773352744001, "learning_rate": 3.980665950590763e-06, "loss": 0.0047, "step": 9965 }, { "epoch": 0.6422633240961526, "grad_norm": 0.006540673913466794, "learning_rate": 3.979949874686717e-06, "loss": 0.0, "step": 9966 }, { "epoch": 0.6423277695430818, "grad_norm": 0.010330284011252865, "learning_rate": 3.979233798782671e-06, "loss": 0.0001, "step": 9967 }, { "epoch": 0.642392214990011, "grad_norm": 0.0012068267576809393, "learning_rate": 3.978517722878625e-06, "loss": 0.0, "step": 9968 }, { "epoch": 0.6424566604369402, "grad_norm": 0.004279481705147905, "learning_rate": 3.97780164697458e-06, "loss": 0.0, "step": 9969 }, { "epoch": 0.6425211058838693, "grad_norm": 0.015038512520982627, "learning_rate": 3.977085571070534e-06, "loss": 0.0, "step": 9970 }, { "epoch": 0.6425855513307985, "grad_norm": 0.0005527755708677373, "learning_rate": 3.9763694951664874e-06, "loss": 0.0, "step": 9971 }, { "epoch": 0.6426499967777276, "grad_norm": 0.002589969194584389, "learning_rate": 3.975653419262443e-06, "loss": 0.0, "step": 9972 }, { "epoch": 0.6427144422246568, "grad_norm": 0.4182773591616916, "learning_rate": 3.974937343358397e-06, "loss": 0.0002, "step": 9973 }, { "epoch": 0.642778887671586, "grad_norm": 0.09478272415081301, "learning_rate": 3.97422126745435e-06, "loss": 0.0001, "step": 9974 }, { "epoch": 0.6428433331185152, "grad_norm": 0.06265709550030224, "learning_rate": 3.973505191550305e-06, "loss": 0.0, "step": 9975 }, { "epoch": 0.6429077785654443, "grad_norm": 0.004482413553391734, "learning_rate": 3.972789115646259e-06, "loss": 0.0, "step": 9976 }, { "epoch": 0.6429722240123735, "grad_norm": 0.012129023568842196, "learning_rate": 3.972073039742213e-06, "loss": 0.0001, "step": 9977 }, { "epoch": 0.6430366694593027, "grad_norm": 0.02426212837729917, "learning_rate": 3.9713569638381675e-06, "loss": 0.0001, "step": 9978 }, { "epoch": 0.6431011149062319, "grad_norm": 0.388016344424232, "learning_rate": 3.970640887934121e-06, "loss": 0.0015, "step": 9979 }, { "epoch": 0.6431655603531611, "grad_norm": 0.009141367046433759, "learning_rate": 3.969924812030075e-06, "loss": 0.0, "step": 9980 }, { "epoch": 0.6432300058000903, "grad_norm": 0.0032954234977543386, "learning_rate": 3.96920873612603e-06, "loss": 0.0, "step": 9981 }, { "epoch": 0.6432944512470194, "grad_norm": 0.0022175645149016825, "learning_rate": 3.968492660221984e-06, "loss": 0.0, "step": 9982 }, { "epoch": 0.6433588966939485, "grad_norm": 0.0007280509024169878, "learning_rate": 3.967776584317938e-06, "loss": 0.0, "step": 9983 }, { "epoch": 0.6434233421408777, "grad_norm": 0.056778146835200966, "learning_rate": 3.9670605084138925e-06, "loss": 0.0001, "step": 9984 }, { "epoch": 0.6434877875878069, "grad_norm": 0.26815644299933966, "learning_rate": 3.966344432509847e-06, "loss": 0.0022, "step": 9985 }, { "epoch": 0.6435522330347361, "grad_norm": 0.007594321936676571, "learning_rate": 3.965628356605801e-06, "loss": 0.0, "step": 9986 }, { "epoch": 0.6436166784816653, "grad_norm": 0.001696701726712196, "learning_rate": 3.9649122807017545e-06, "loss": 0.0, "step": 9987 }, { "epoch": 0.6436811239285944, "grad_norm": 0.001928534918932584, "learning_rate": 3.964196204797709e-06, "loss": 0.0, "step": 9988 }, { "epoch": 0.6437455693755236, "grad_norm": 0.08768668886361478, "learning_rate": 3.963480128893663e-06, "loss": 0.0008, "step": 9989 }, { "epoch": 0.6438100148224528, "grad_norm": 0.001866720703754344, "learning_rate": 3.962764052989617e-06, "loss": 0.0, "step": 9990 }, { "epoch": 0.643874460269382, "grad_norm": 0.11344296324003514, "learning_rate": 3.962047977085572e-06, "loss": 0.0017, "step": 9991 }, { "epoch": 0.6439389057163112, "grad_norm": 0.0041534937274514475, "learning_rate": 3.961331901181525e-06, "loss": 0.0, "step": 9992 }, { "epoch": 0.6440033511632404, "grad_norm": 0.0043058563308134915, "learning_rate": 3.9606158252774795e-06, "loss": 0.0, "step": 9993 }, { "epoch": 0.6440677966101694, "grad_norm": 0.0028722903588658793, "learning_rate": 3.959899749373434e-06, "loss": 0.0, "step": 9994 }, { "epoch": 0.6441322420570986, "grad_norm": 0.0002852931476689053, "learning_rate": 3.959183673469388e-06, "loss": 0.0, "step": 9995 }, { "epoch": 0.6441966875040278, "grad_norm": 1.4214037439131955, "learning_rate": 3.958467597565342e-06, "loss": 0.0013, "step": 9996 }, { "epoch": 0.644261132950957, "grad_norm": 0.002912964128315929, "learning_rate": 3.957751521661297e-06, "loss": 0.0, "step": 9997 }, { "epoch": 0.6443255783978862, "grad_norm": 0.008286350517975941, "learning_rate": 3.957035445757251e-06, "loss": 0.0, "step": 9998 }, { "epoch": 0.6443900238448154, "grad_norm": 0.0009665192786302733, "learning_rate": 3.956319369853205e-06, "loss": 0.0, "step": 9999 }, { "epoch": 0.6444544692917445, "grad_norm": 0.0026490413554310444, "learning_rate": 3.955603293949159e-06, "loss": 0.0, "step": 10000 }, { "epoch": 0.6445189147386737, "grad_norm": 0.01759335401256255, "learning_rate": 3.954887218045113e-06, "loss": 0.0, "step": 10001 }, { "epoch": 0.6445833601856029, "grad_norm": 0.009520969315805258, "learning_rate": 3.954171142141067e-06, "loss": 0.0, "step": 10002 }, { "epoch": 0.6446478056325321, "grad_norm": 0.001728311984742966, "learning_rate": 3.953455066237022e-06, "loss": 0.0, "step": 10003 }, { "epoch": 0.6447122510794613, "grad_norm": 0.0009198832699478046, "learning_rate": 3.952738990332975e-06, "loss": 0.0, "step": 10004 }, { "epoch": 0.6447766965263904, "grad_norm": 0.0011534950561722203, "learning_rate": 3.952022914428929e-06, "loss": 0.0, "step": 10005 }, { "epoch": 0.6448411419733195, "grad_norm": 0.0003031917733232299, "learning_rate": 3.951306838524884e-06, "loss": 0.0, "step": 10006 }, { "epoch": 0.6449055874202487, "grad_norm": 0.001510916530904056, "learning_rate": 3.950590762620839e-06, "loss": 0.0, "step": 10007 }, { "epoch": 0.6449700328671779, "grad_norm": 0.0451465203765521, "learning_rate": 3.949874686716792e-06, "loss": 0.0004, "step": 10008 }, { "epoch": 0.6450344783141071, "grad_norm": 0.0009382365449063471, "learning_rate": 3.9491586108127466e-06, "loss": 0.0, "step": 10009 }, { "epoch": 0.6450989237610363, "grad_norm": 0.00043472888646857314, "learning_rate": 3.948442534908701e-06, "loss": 0.0, "step": 10010 }, { "epoch": 0.6451633692079655, "grad_norm": 0.0012430078323916194, "learning_rate": 3.947726459004655e-06, "loss": 0.0, "step": 10011 }, { "epoch": 0.6452278146548946, "grad_norm": 0.0014639720055084765, "learning_rate": 3.947010383100609e-06, "loss": 0.0, "step": 10012 }, { "epoch": 0.6452922601018238, "grad_norm": 0.002401018043739887, "learning_rate": 3.946294307196563e-06, "loss": 0.0, "step": 10013 }, { "epoch": 0.645356705548753, "grad_norm": 0.015478704234006003, "learning_rate": 3.945578231292517e-06, "loss": 0.0, "step": 10014 }, { "epoch": 0.6454211509956822, "grad_norm": 0.001973197331748077, "learning_rate": 3.9448621553884715e-06, "loss": 0.0, "step": 10015 }, { "epoch": 0.6454855964426113, "grad_norm": 0.05995384634129306, "learning_rate": 3.944146079484426e-06, "loss": 0.0016, "step": 10016 }, { "epoch": 0.6455500418895405, "grad_norm": 0.04199333799249915, "learning_rate": 3.943430003580379e-06, "loss": 0.0, "step": 10017 }, { "epoch": 0.6456144873364696, "grad_norm": 0.05694073473606429, "learning_rate": 3.9427139276763336e-06, "loss": 0.0001, "step": 10018 }, { "epoch": 0.6456789327833988, "grad_norm": 0.0034993164557464897, "learning_rate": 3.941997851772289e-06, "loss": 0.0, "step": 10019 }, { "epoch": 0.645743378230328, "grad_norm": 0.009195842661245979, "learning_rate": 3.941281775868242e-06, "loss": 0.0001, "step": 10020 }, { "epoch": 0.6458078236772572, "grad_norm": 0.00044558898441164694, "learning_rate": 3.9405656999641964e-06, "loss": 0.0, "step": 10021 }, { "epoch": 0.6458722691241864, "grad_norm": 0.0009647395321825095, "learning_rate": 3.939849624060151e-06, "loss": 0.0, "step": 10022 }, { "epoch": 0.6459367145711156, "grad_norm": 0.05492106017896077, "learning_rate": 3.939133548156105e-06, "loss": 0.0001, "step": 10023 }, { "epoch": 0.6460011600180448, "grad_norm": 0.11704149687743283, "learning_rate": 3.938417472252059e-06, "loss": 0.0001, "step": 10024 }, { "epoch": 0.6460656054649739, "grad_norm": 0.005956494344295038, "learning_rate": 3.937701396348013e-06, "loss": 0.0, "step": 10025 }, { "epoch": 0.6461300509119031, "grad_norm": 0.00031369930774159714, "learning_rate": 3.936985320443967e-06, "loss": 0.0, "step": 10026 }, { "epoch": 0.6461944963588322, "grad_norm": 0.06431663756990698, "learning_rate": 3.936269244539921e-06, "loss": 0.0007, "step": 10027 }, { "epoch": 0.6462589418057614, "grad_norm": 0.0011454047402656453, "learning_rate": 3.935553168635876e-06, "loss": 0.0, "step": 10028 }, { "epoch": 0.6463233872526906, "grad_norm": 0.005924979209156877, "learning_rate": 3.93483709273183e-06, "loss": 0.0, "step": 10029 }, { "epoch": 0.6463878326996197, "grad_norm": 0.021247387098242476, "learning_rate": 3.9341210168277834e-06, "loss": 0.0003, "step": 10030 }, { "epoch": 0.6464522781465489, "grad_norm": 0.009782796901761426, "learning_rate": 3.933404940923739e-06, "loss": 0.0, "step": 10031 }, { "epoch": 0.6465167235934781, "grad_norm": 0.034589421586336565, "learning_rate": 3.932688865019693e-06, "loss": 0.0, "step": 10032 }, { "epoch": 0.6465811690404073, "grad_norm": 0.18486351863565736, "learning_rate": 3.931972789115646e-06, "loss": 0.002, "step": 10033 }, { "epoch": 0.6466456144873365, "grad_norm": 7.558595765490377e-05, "learning_rate": 3.931256713211601e-06, "loss": 0.0, "step": 10034 }, { "epoch": 0.6467100599342657, "grad_norm": 0.001811319970900262, "learning_rate": 3.930540637307555e-06, "loss": 0.0, "step": 10035 }, { "epoch": 0.6467745053811949, "grad_norm": 0.00010906105212224324, "learning_rate": 3.929824561403509e-06, "loss": 0.0, "step": 10036 }, { "epoch": 0.646838950828124, "grad_norm": 3.836100339832203, "learning_rate": 3.9291084854994635e-06, "loss": 0.0449, "step": 10037 }, { "epoch": 0.6469033962750532, "grad_norm": 0.0005711962089788333, "learning_rate": 3.928392409595417e-06, "loss": 0.0, "step": 10038 }, { "epoch": 0.6469678417219823, "grad_norm": 0.0012421915486166405, "learning_rate": 3.927676333691371e-06, "loss": 0.0, "step": 10039 }, { "epoch": 0.6470322871689115, "grad_norm": 0.10516009377572318, "learning_rate": 3.926960257787326e-06, "loss": 0.0004, "step": 10040 }, { "epoch": 0.6470967326158407, "grad_norm": 0.011991362360826062, "learning_rate": 3.92624418188328e-06, "loss": 0.0, "step": 10041 }, { "epoch": 0.6471611780627698, "grad_norm": 0.0014761180281003693, "learning_rate": 3.925528105979234e-06, "loss": 0.0, "step": 10042 }, { "epoch": 0.647225623509699, "grad_norm": 0.0014376881504091783, "learning_rate": 3.9248120300751885e-06, "loss": 0.0, "step": 10043 }, { "epoch": 0.6472900689566282, "grad_norm": 0.32301565801310034, "learning_rate": 3.924095954171143e-06, "loss": 0.0005, "step": 10044 }, { "epoch": 0.6473545144035574, "grad_norm": 0.0014243909713996896, "learning_rate": 3.923379878267097e-06, "loss": 0.0, "step": 10045 }, { "epoch": 0.6474189598504866, "grad_norm": 0.000623841882694804, "learning_rate": 3.9226638023630505e-06, "loss": 0.0, "step": 10046 }, { "epoch": 0.6474834052974158, "grad_norm": 0.021735964259168947, "learning_rate": 3.921947726459005e-06, "loss": 0.0, "step": 10047 }, { "epoch": 0.647547850744345, "grad_norm": 0.003005139298277695, "learning_rate": 3.921231650554959e-06, "loss": 0.0, "step": 10048 }, { "epoch": 0.6476122961912741, "grad_norm": 0.009583776841016588, "learning_rate": 3.920515574650913e-06, "loss": 0.0, "step": 10049 }, { "epoch": 0.6476767416382032, "grad_norm": 0.0008914135898324495, "learning_rate": 3.919799498746868e-06, "loss": 0.0, "step": 10050 }, { "epoch": 0.6477411870851324, "grad_norm": 0.0013635385532760897, "learning_rate": 3.919083422842821e-06, "loss": 0.0, "step": 10051 }, { "epoch": 0.6478056325320616, "grad_norm": 0.09818460111478787, "learning_rate": 3.9183673469387755e-06, "loss": 0.0012, "step": 10052 }, { "epoch": 0.6478700779789908, "grad_norm": 0.0008677872964371634, "learning_rate": 3.91765127103473e-06, "loss": 0.0, "step": 10053 }, { "epoch": 0.64793452342592, "grad_norm": 0.013500065410627189, "learning_rate": 3.916935195130684e-06, "loss": 0.0001, "step": 10054 }, { "epoch": 0.6479989688728491, "grad_norm": 0.00023195192506884497, "learning_rate": 3.916219119226638e-06, "loss": 0.0, "step": 10055 }, { "epoch": 0.6480634143197783, "grad_norm": 0.00956348066167725, "learning_rate": 3.915503043322593e-06, "loss": 0.0, "step": 10056 }, { "epoch": 0.6481278597667075, "grad_norm": 0.0011233392884167606, "learning_rate": 3.914786967418547e-06, "loss": 0.0, "step": 10057 }, { "epoch": 0.6481923052136367, "grad_norm": 0.00217687030993054, "learning_rate": 3.914070891514501e-06, "loss": 0.0, "step": 10058 }, { "epoch": 0.6482567506605659, "grad_norm": 0.0007690180828168407, "learning_rate": 3.913354815610455e-06, "loss": 0.0, "step": 10059 }, { "epoch": 0.6483211961074951, "grad_norm": 0.00018957142718229443, "learning_rate": 3.912638739706409e-06, "loss": 0.0, "step": 10060 }, { "epoch": 0.6483856415544241, "grad_norm": 0.0033156219554045124, "learning_rate": 3.911922663802363e-06, "loss": 0.0, "step": 10061 }, { "epoch": 0.6484500870013533, "grad_norm": 0.0193806849948235, "learning_rate": 3.911206587898318e-06, "loss": 0.0, "step": 10062 }, { "epoch": 0.6485145324482825, "grad_norm": 0.0018474096180259869, "learning_rate": 3.910490511994272e-06, "loss": 0.0, "step": 10063 }, { "epoch": 0.6485789778952117, "grad_norm": 0.0027109400656019204, "learning_rate": 3.909774436090225e-06, "loss": 0.0, "step": 10064 }, { "epoch": 0.6486434233421409, "grad_norm": 0.17048972712294916, "learning_rate": 3.90905836018618e-06, "loss": 0.0036, "step": 10065 }, { "epoch": 0.64870786878907, "grad_norm": 0.001639427937846142, "learning_rate": 3.908342284282135e-06, "loss": 0.0, "step": 10066 }, { "epoch": 0.6487723142359992, "grad_norm": 0.0003054587604726002, "learning_rate": 3.907626208378088e-06, "loss": 0.0, "step": 10067 }, { "epoch": 0.6488367596829284, "grad_norm": 0.0027535541277126873, "learning_rate": 3.9069101324740426e-06, "loss": 0.0, "step": 10068 }, { "epoch": 0.6489012051298576, "grad_norm": 0.0006657856217193417, "learning_rate": 3.906194056569997e-06, "loss": 0.0, "step": 10069 }, { "epoch": 0.6489656505767868, "grad_norm": 0.0060369187223777215, "learning_rate": 3.905477980665951e-06, "loss": 0.0, "step": 10070 }, { "epoch": 0.649030096023716, "grad_norm": 0.0019431457633511454, "learning_rate": 3.9047619047619055e-06, "loss": 0.0, "step": 10071 }, { "epoch": 0.649094541470645, "grad_norm": 0.3679831994007836, "learning_rate": 3.904045828857859e-06, "loss": 0.0011, "step": 10072 }, { "epoch": 0.6491589869175742, "grad_norm": 0.007803615252833467, "learning_rate": 3.903329752953813e-06, "loss": 0.0001, "step": 10073 }, { "epoch": 0.6492234323645034, "grad_norm": 0.11488098575653381, "learning_rate": 3.9026136770497675e-06, "loss": 0.0004, "step": 10074 }, { "epoch": 0.6492878778114326, "grad_norm": 0.00033450819125250343, "learning_rate": 3.901897601145722e-06, "loss": 0.0, "step": 10075 }, { "epoch": 0.6493523232583618, "grad_norm": 0.15049065941398837, "learning_rate": 3.901181525241676e-06, "loss": 0.0002, "step": 10076 }, { "epoch": 0.649416768705291, "grad_norm": 0.001570508333555772, "learning_rate": 3.90046544933763e-06, "loss": 0.0, "step": 10077 }, { "epoch": 0.6494812141522202, "grad_norm": 0.030068070105821104, "learning_rate": 3.899749373433585e-06, "loss": 0.0, "step": 10078 }, { "epoch": 0.6495456595991493, "grad_norm": 0.07123143989605957, "learning_rate": 3.899033297529539e-06, "loss": 0.0001, "step": 10079 }, { "epoch": 0.6496101050460785, "grad_norm": 0.007017937085895609, "learning_rate": 3.8983172216254924e-06, "loss": 0.0, "step": 10080 }, { "epoch": 0.6496745504930077, "grad_norm": 0.0017217782930513232, "learning_rate": 3.897601145721447e-06, "loss": 0.0, "step": 10081 }, { "epoch": 0.6497389959399369, "grad_norm": 0.46246827248698735, "learning_rate": 3.896885069817401e-06, "loss": 0.0006, "step": 10082 }, { "epoch": 0.649803441386866, "grad_norm": 0.020012365780105686, "learning_rate": 3.896168993913355e-06, "loss": 0.0, "step": 10083 }, { "epoch": 0.6498678868337951, "grad_norm": 0.0037748346140321957, "learning_rate": 3.89545291800931e-06, "loss": 0.0, "step": 10084 }, { "epoch": 0.6499323322807243, "grad_norm": 7.76592282656403e-05, "learning_rate": 3.894736842105263e-06, "loss": 0.0, "step": 10085 }, { "epoch": 0.6499967777276535, "grad_norm": 0.00016100952285215606, "learning_rate": 3.894020766201217e-06, "loss": 0.0, "step": 10086 }, { "epoch": 0.6500612231745827, "grad_norm": 0.20804745112879044, "learning_rate": 3.893304690297172e-06, "loss": 0.0007, "step": 10087 }, { "epoch": 0.6501256686215119, "grad_norm": 0.0005138267208881703, "learning_rate": 3.892588614393126e-06, "loss": 0.0, "step": 10088 }, { "epoch": 0.6501901140684411, "grad_norm": 0.00140696667912771, "learning_rate": 3.89187253848908e-06, "loss": 0.0, "step": 10089 }, { "epoch": 0.6502545595153703, "grad_norm": 0.0014816001929669846, "learning_rate": 3.891156462585035e-06, "loss": 0.0, "step": 10090 }, { "epoch": 0.6503190049622994, "grad_norm": 0.011444484703948571, "learning_rate": 3.890440386680989e-06, "loss": 0.0, "step": 10091 }, { "epoch": 0.6503834504092286, "grad_norm": 0.002593092375976146, "learning_rate": 3.889724310776943e-06, "loss": 0.0, "step": 10092 }, { "epoch": 0.6504478958561578, "grad_norm": 0.0008823114322447084, "learning_rate": 3.889008234872897e-06, "loss": 0.0, "step": 10093 }, { "epoch": 0.6505123413030869, "grad_norm": 0.45643913189772434, "learning_rate": 3.888292158968851e-06, "loss": 0.0005, "step": 10094 }, { "epoch": 0.6505767867500161, "grad_norm": 0.001553883683300925, "learning_rate": 3.887576083064805e-06, "loss": 0.0, "step": 10095 }, { "epoch": 0.6506412321969453, "grad_norm": 5.411795877500031e-05, "learning_rate": 3.8868600071607595e-06, "loss": 0.0, "step": 10096 }, { "epoch": 0.6507056776438744, "grad_norm": 0.02975246910597393, "learning_rate": 3.886143931256714e-06, "loss": 0.0003, "step": 10097 }, { "epoch": 0.6507701230908036, "grad_norm": 0.0008351681676013702, "learning_rate": 3.885427855352667e-06, "loss": 0.0, "step": 10098 }, { "epoch": 0.6508345685377328, "grad_norm": 0.3116308164738484, "learning_rate": 3.884711779448622e-06, "loss": 0.0023, "step": 10099 }, { "epoch": 0.650899013984662, "grad_norm": 0.003933262974662413, "learning_rate": 3.883995703544576e-06, "loss": 0.0, "step": 10100 }, { "epoch": 0.6509634594315912, "grad_norm": 0.07911621652664459, "learning_rate": 3.88327962764053e-06, "loss": 0.0001, "step": 10101 }, { "epoch": 0.6510279048785204, "grad_norm": 0.0004710866212675768, "learning_rate": 3.8825635517364845e-06, "loss": 0.0, "step": 10102 }, { "epoch": 0.6510923503254495, "grad_norm": 0.0062199568895057135, "learning_rate": 3.881847475832439e-06, "loss": 0.0, "step": 10103 }, { "epoch": 0.6511567957723787, "grad_norm": 0.47167855019758076, "learning_rate": 3.881131399928393e-06, "loss": 0.0011, "step": 10104 }, { "epoch": 0.6512212412193079, "grad_norm": 0.32272571896552665, "learning_rate": 3.880415324024347e-06, "loss": 0.0016, "step": 10105 }, { "epoch": 0.651285686666237, "grad_norm": 0.07696105436920875, "learning_rate": 3.879699248120301e-06, "loss": 0.0001, "step": 10106 }, { "epoch": 0.6513501321131662, "grad_norm": 0.0015881369909475057, "learning_rate": 3.878983172216255e-06, "loss": 0.0, "step": 10107 }, { "epoch": 0.6514145775600954, "grad_norm": 0.00023232975344055165, "learning_rate": 3.878267096312209e-06, "loss": 0.0, "step": 10108 }, { "epoch": 0.6514790230070245, "grad_norm": 0.0025092237827565697, "learning_rate": 3.877551020408164e-06, "loss": 0.0, "step": 10109 }, { "epoch": 0.6515434684539537, "grad_norm": 0.000561346000613679, "learning_rate": 3.876834944504117e-06, "loss": 0.0, "step": 10110 }, { "epoch": 0.6516079139008829, "grad_norm": 0.08851294074908397, "learning_rate": 3.8761188686000715e-06, "loss": 0.0008, "step": 10111 }, { "epoch": 0.6516723593478121, "grad_norm": 9.094776729926713e-05, "learning_rate": 3.875402792696027e-06, "loss": 0.0, "step": 10112 }, { "epoch": 0.6517368047947413, "grad_norm": 0.009651514658336328, "learning_rate": 3.874686716791981e-06, "loss": 0.0001, "step": 10113 }, { "epoch": 0.6518012502416705, "grad_norm": 8.080717123989275e-05, "learning_rate": 3.873970640887934e-06, "loss": 0.0, "step": 10114 }, { "epoch": 0.6518656956885996, "grad_norm": 0.005040657665753197, "learning_rate": 3.873254564983889e-06, "loss": 0.0, "step": 10115 }, { "epoch": 0.6519301411355288, "grad_norm": 3.942586078710562e-05, "learning_rate": 3.872538489079843e-06, "loss": 0.0, "step": 10116 }, { "epoch": 0.6519945865824579, "grad_norm": 0.0007002757109208436, "learning_rate": 3.871822413175797e-06, "loss": 0.0, "step": 10117 }, { "epoch": 0.6520590320293871, "grad_norm": 0.0019366055380994323, "learning_rate": 3.871106337271751e-06, "loss": 0.0, "step": 10118 }, { "epoch": 0.6521234774763163, "grad_norm": 0.0035408849877017687, "learning_rate": 3.870390261367705e-06, "loss": 0.0, "step": 10119 }, { "epoch": 0.6521879229232455, "grad_norm": 0.6270142760119335, "learning_rate": 3.869674185463659e-06, "loss": 0.0046, "step": 10120 }, { "epoch": 0.6522523683701746, "grad_norm": 0.00039780686488753573, "learning_rate": 3.868958109559614e-06, "loss": 0.0, "step": 10121 }, { "epoch": 0.6523168138171038, "grad_norm": 0.003640746475764863, "learning_rate": 3.868242033655568e-06, "loss": 0.0, "step": 10122 }, { "epoch": 0.652381259264033, "grad_norm": 0.4130364426705445, "learning_rate": 3.867525957751521e-06, "loss": 0.0016, "step": 10123 }, { "epoch": 0.6524457047109622, "grad_norm": 0.059154666829826646, "learning_rate": 3.8668098818474765e-06, "loss": 0.0004, "step": 10124 }, { "epoch": 0.6525101501578914, "grad_norm": 0.0013061928478204214, "learning_rate": 3.866093805943431e-06, "loss": 0.0, "step": 10125 }, { "epoch": 0.6525745956048206, "grad_norm": 0.28045806386242605, "learning_rate": 3.865377730039384e-06, "loss": 0.0004, "step": 10126 }, { "epoch": 0.6526390410517497, "grad_norm": 0.01899712727240623, "learning_rate": 3.8646616541353386e-06, "loss": 0.0001, "step": 10127 }, { "epoch": 0.6527034864986788, "grad_norm": 0.051157784159917466, "learning_rate": 3.863945578231293e-06, "loss": 0.0001, "step": 10128 }, { "epoch": 0.652767931945608, "grad_norm": 0.025598438954331462, "learning_rate": 3.863229502327247e-06, "loss": 0.0001, "step": 10129 }, { "epoch": 0.6528323773925372, "grad_norm": 0.0016328159009993447, "learning_rate": 3.8625134264232015e-06, "loss": 0.0, "step": 10130 }, { "epoch": 0.6528968228394664, "grad_norm": 2.6339330807109222, "learning_rate": 3.861797350519155e-06, "loss": 0.0112, "step": 10131 }, { "epoch": 0.6529612682863956, "grad_norm": 0.05387536153884999, "learning_rate": 3.861081274615109e-06, "loss": 0.0007, "step": 10132 }, { "epoch": 0.6530257137333247, "grad_norm": 0.4103096084899589, "learning_rate": 3.8603651987110635e-06, "loss": 0.002, "step": 10133 }, { "epoch": 0.6530901591802539, "grad_norm": 0.0023990892949300193, "learning_rate": 3.859649122807018e-06, "loss": 0.0, "step": 10134 }, { "epoch": 0.6531546046271831, "grad_norm": 0.0025769823262489213, "learning_rate": 3.858933046902972e-06, "loss": 0.0, "step": 10135 }, { "epoch": 0.6532190500741123, "grad_norm": 0.008294945393946193, "learning_rate": 3.858216970998926e-06, "loss": 0.0, "step": 10136 }, { "epoch": 0.6532834955210415, "grad_norm": 0.0038449482692927794, "learning_rate": 3.857500895094881e-06, "loss": 0.0, "step": 10137 }, { "epoch": 0.6533479409679707, "grad_norm": 0.016722334512031662, "learning_rate": 3.856784819190835e-06, "loss": 0.0002, "step": 10138 }, { "epoch": 0.6534123864148997, "grad_norm": 0.45878159933893453, "learning_rate": 3.8560687432867884e-06, "loss": 0.0022, "step": 10139 }, { "epoch": 0.6534768318618289, "grad_norm": 0.003303008831653256, "learning_rate": 3.855352667382743e-06, "loss": 0.0, "step": 10140 }, { "epoch": 0.6535412773087581, "grad_norm": 0.01095606656160586, "learning_rate": 3.854636591478697e-06, "loss": 0.0, "step": 10141 }, { "epoch": 0.6536057227556873, "grad_norm": 0.0007942663676417572, "learning_rate": 3.853920515574651e-06, "loss": 0.0, "step": 10142 }, { "epoch": 0.6536701682026165, "grad_norm": 0.011422861115300881, "learning_rate": 3.853204439670606e-06, "loss": 0.0, "step": 10143 }, { "epoch": 0.6537346136495457, "grad_norm": 0.13214908426043642, "learning_rate": 3.852488363766559e-06, "loss": 0.0003, "step": 10144 }, { "epoch": 0.6537990590964748, "grad_norm": 0.00021781995305225694, "learning_rate": 3.851772287862513e-06, "loss": 0.0, "step": 10145 }, { "epoch": 0.653863504543404, "grad_norm": 0.002687471652329774, "learning_rate": 3.851056211958468e-06, "loss": 0.0, "step": 10146 }, { "epoch": 0.6539279499903332, "grad_norm": 0.0005095891078883199, "learning_rate": 3.850340136054422e-06, "loss": 0.0, "step": 10147 }, { "epoch": 0.6539923954372624, "grad_norm": 0.00152943308971141, "learning_rate": 3.849624060150376e-06, "loss": 0.0, "step": 10148 }, { "epoch": 0.6540568408841916, "grad_norm": 0.0010116890607374909, "learning_rate": 3.848907984246331e-06, "loss": 0.0, "step": 10149 }, { "epoch": 0.6541212863311207, "grad_norm": 0.008247315056338593, "learning_rate": 3.848191908342285e-06, "loss": 0.0, "step": 10150 }, { "epoch": 0.6541857317780498, "grad_norm": 0.03783836195338165, "learning_rate": 3.847475832438239e-06, "loss": 0.0001, "step": 10151 }, { "epoch": 0.654250177224979, "grad_norm": 0.03685747469146827, "learning_rate": 3.846759756534193e-06, "loss": 0.0, "step": 10152 }, { "epoch": 0.6543146226719082, "grad_norm": 0.01833364381008545, "learning_rate": 3.846043680630147e-06, "loss": 0.0, "step": 10153 }, { "epoch": 0.6543790681188374, "grad_norm": 0.5712872105755907, "learning_rate": 3.845327604726101e-06, "loss": 0.0033, "step": 10154 }, { "epoch": 0.6544435135657666, "grad_norm": 0.004128894377295956, "learning_rate": 3.8446115288220555e-06, "loss": 0.0, "step": 10155 }, { "epoch": 0.6545079590126958, "grad_norm": 0.026285459872411314, "learning_rate": 3.84389545291801e-06, "loss": 0.0, "step": 10156 }, { "epoch": 0.654572404459625, "grad_norm": 0.010629958355985208, "learning_rate": 3.843179377013963e-06, "loss": 0.0001, "step": 10157 }, { "epoch": 0.6546368499065541, "grad_norm": 0.0003458473109674389, "learning_rate": 3.842463301109918e-06, "loss": 0.0, "step": 10158 }, { "epoch": 0.6547012953534833, "grad_norm": 0.019025562351784445, "learning_rate": 3.841747225205873e-06, "loss": 0.0, "step": 10159 }, { "epoch": 0.6547657408004125, "grad_norm": 0.0031882815792937374, "learning_rate": 3.841031149301826e-06, "loss": 0.0, "step": 10160 }, { "epoch": 0.6548301862473416, "grad_norm": 0.01813268104120978, "learning_rate": 3.8403150733977805e-06, "loss": 0.0, "step": 10161 }, { "epoch": 0.6548946316942708, "grad_norm": 0.02995992804919363, "learning_rate": 3.839598997493735e-06, "loss": 0.0, "step": 10162 }, { "epoch": 0.6549590771411999, "grad_norm": 0.006443501348050104, "learning_rate": 3.838882921589689e-06, "loss": 0.0, "step": 10163 }, { "epoch": 0.6550235225881291, "grad_norm": 0.00042109950495271373, "learning_rate": 3.838166845685643e-06, "loss": 0.0, "step": 10164 }, { "epoch": 0.6550879680350583, "grad_norm": 0.0008626208900784901, "learning_rate": 3.837450769781597e-06, "loss": 0.0, "step": 10165 }, { "epoch": 0.6551524134819875, "grad_norm": 0.003391952493412297, "learning_rate": 3.836734693877551e-06, "loss": 0.0, "step": 10166 }, { "epoch": 0.6552168589289167, "grad_norm": 0.005285101991014211, "learning_rate": 3.836018617973505e-06, "loss": 0.0, "step": 10167 }, { "epoch": 0.6552813043758459, "grad_norm": 0.01946311256116589, "learning_rate": 3.83530254206946e-06, "loss": 0.0, "step": 10168 }, { "epoch": 0.655345749822775, "grad_norm": 0.0026740134172567116, "learning_rate": 3.834586466165414e-06, "loss": 0.0, "step": 10169 }, { "epoch": 0.6554101952697042, "grad_norm": 0.0033936852631130097, "learning_rate": 3.8338703902613675e-06, "loss": 0.0, "step": 10170 }, { "epoch": 0.6554746407166334, "grad_norm": 0.0009185211832299836, "learning_rate": 3.833154314357323e-06, "loss": 0.0, "step": 10171 }, { "epoch": 0.6555390861635625, "grad_norm": 0.06459203475290538, "learning_rate": 3.832438238453277e-06, "loss": 0.0005, "step": 10172 }, { "epoch": 0.6556035316104917, "grad_norm": 0.0026117542544247074, "learning_rate": 3.83172216254923e-06, "loss": 0.0, "step": 10173 }, { "epoch": 0.6556679770574209, "grad_norm": 0.1658535446637217, "learning_rate": 3.831006086645185e-06, "loss": 0.0001, "step": 10174 }, { "epoch": 0.65573242250435, "grad_norm": 0.12451728359577288, "learning_rate": 3.830290010741139e-06, "loss": 0.002, "step": 10175 }, { "epoch": 0.6557968679512792, "grad_norm": 0.30647530928600114, "learning_rate": 3.829573934837093e-06, "loss": 0.0006, "step": 10176 }, { "epoch": 0.6558613133982084, "grad_norm": 0.05586064285560092, "learning_rate": 3.8288578589330476e-06, "loss": 0.0002, "step": 10177 }, { "epoch": 0.6559257588451376, "grad_norm": 0.0025995801811718247, "learning_rate": 3.828141783029001e-06, "loss": 0.0, "step": 10178 }, { "epoch": 0.6559902042920668, "grad_norm": 0.0038249163839321164, "learning_rate": 3.827425707124955e-06, "loss": 0.0, "step": 10179 }, { "epoch": 0.656054649738996, "grad_norm": 0.4108950183217354, "learning_rate": 3.82670963122091e-06, "loss": 0.0029, "step": 10180 }, { "epoch": 0.6561190951859251, "grad_norm": 0.004374683406390487, "learning_rate": 3.825993555316864e-06, "loss": 0.0, "step": 10181 }, { "epoch": 0.6561835406328543, "grad_norm": 0.0014969097910061068, "learning_rate": 3.825277479412818e-06, "loss": 0.0, "step": 10182 }, { "epoch": 0.6562479860797835, "grad_norm": 0.0012723266838122523, "learning_rate": 3.8245614035087725e-06, "loss": 0.0, "step": 10183 }, { "epoch": 0.6563124315267126, "grad_norm": 0.008525120834268957, "learning_rate": 3.823845327604727e-06, "loss": 0.0, "step": 10184 }, { "epoch": 0.6563768769736418, "grad_norm": 0.0030668060505652244, "learning_rate": 3.823129251700681e-06, "loss": 0.0, "step": 10185 }, { "epoch": 0.656441322420571, "grad_norm": 0.00534143900713158, "learning_rate": 3.8224131757966346e-06, "loss": 0.0, "step": 10186 }, { "epoch": 0.6565057678675001, "grad_norm": 0.0037717083756212658, "learning_rate": 3.821697099892589e-06, "loss": 0.0, "step": 10187 }, { "epoch": 0.6565702133144293, "grad_norm": 0.0012179376897736268, "learning_rate": 3.820981023988543e-06, "loss": 0.0, "step": 10188 }, { "epoch": 0.6566346587613585, "grad_norm": 0.00037249200636945535, "learning_rate": 3.8202649480844975e-06, "loss": 0.0, "step": 10189 }, { "epoch": 0.6566991042082877, "grad_norm": 0.017734283475125033, "learning_rate": 3.819548872180452e-06, "loss": 0.0002, "step": 10190 }, { "epoch": 0.6567635496552169, "grad_norm": 0.020177854077664724, "learning_rate": 3.818832796276405e-06, "loss": 0.0, "step": 10191 }, { "epoch": 0.6568279951021461, "grad_norm": 0.007316823046694329, "learning_rate": 3.8181167203723595e-06, "loss": 0.0, "step": 10192 }, { "epoch": 0.6568924405490753, "grad_norm": 0.0005207608387712419, "learning_rate": 3.817400644468314e-06, "loss": 0.0, "step": 10193 }, { "epoch": 0.6569568859960044, "grad_norm": 0.0009445133087726743, "learning_rate": 3.816684568564268e-06, "loss": 0.0, "step": 10194 }, { "epoch": 0.6570213314429335, "grad_norm": 0.02219222355146514, "learning_rate": 3.815968492660222e-06, "loss": 0.0, "step": 10195 }, { "epoch": 0.6570857768898627, "grad_norm": 0.0012817471509581164, "learning_rate": 3.815252416756177e-06, "loss": 0.0, "step": 10196 }, { "epoch": 0.6571502223367919, "grad_norm": 0.010393691367853066, "learning_rate": 3.8145363408521306e-06, "loss": 0.0, "step": 10197 }, { "epoch": 0.6572146677837211, "grad_norm": 0.016148501614400432, "learning_rate": 3.813820264948085e-06, "loss": 0.0, "step": 10198 }, { "epoch": 0.6572791132306502, "grad_norm": 0.028812178897839727, "learning_rate": 3.813104189044039e-06, "loss": 0.0, "step": 10199 }, { "epoch": 0.6573435586775794, "grad_norm": 0.0002261249635128685, "learning_rate": 3.812388113139993e-06, "loss": 0.0, "step": 10200 }, { "epoch": 0.6574080041245086, "grad_norm": 0.001779168386697235, "learning_rate": 3.8116720372359473e-06, "loss": 0.0, "step": 10201 }, { "epoch": 0.6574724495714378, "grad_norm": 0.0011019397221192367, "learning_rate": 3.8109559613319012e-06, "loss": 0.0, "step": 10202 }, { "epoch": 0.657536895018367, "grad_norm": 0.00014219508472730657, "learning_rate": 3.8102398854278555e-06, "loss": 0.0, "step": 10203 }, { "epoch": 0.6576013404652962, "grad_norm": 0.0265215081898783, "learning_rate": 3.80952380952381e-06, "loss": 0.0002, "step": 10204 }, { "epoch": 0.6576657859122254, "grad_norm": 0.03401318937206382, "learning_rate": 3.8088077336197637e-06, "loss": 0.0002, "step": 10205 }, { "epoch": 0.6577302313591544, "grad_norm": 0.07009893009647121, "learning_rate": 3.8080916577157184e-06, "loss": 0.0, "step": 10206 }, { "epoch": 0.6577946768060836, "grad_norm": 0.008663384096209157, "learning_rate": 3.8073755818116727e-06, "loss": 0.0, "step": 10207 }, { "epoch": 0.6578591222530128, "grad_norm": 9.288414808061254e-05, "learning_rate": 3.8066595059076266e-06, "loss": 0.0, "step": 10208 }, { "epoch": 0.657923567699942, "grad_norm": 0.007428766665651455, "learning_rate": 3.805943430003581e-06, "loss": 0.0, "step": 10209 }, { "epoch": 0.6579880131468712, "grad_norm": 0.026942313183641934, "learning_rate": 3.8052273540995348e-06, "loss": 0.0, "step": 10210 }, { "epoch": 0.6580524585938003, "grad_norm": 0.0005140052428280983, "learning_rate": 3.804511278195489e-06, "loss": 0.0, "step": 10211 }, { "epoch": 0.6581169040407295, "grad_norm": 0.07336983683314319, "learning_rate": 3.8037952022914434e-06, "loss": 0.0001, "step": 10212 }, { "epoch": 0.6581813494876587, "grad_norm": 0.03837160424420366, "learning_rate": 3.8030791263873972e-06, "loss": 0.0001, "step": 10213 }, { "epoch": 0.6582457949345879, "grad_norm": 0.03194277067175515, "learning_rate": 3.8023630504833515e-06, "loss": 0.0, "step": 10214 }, { "epoch": 0.6583102403815171, "grad_norm": 0.008725165434776942, "learning_rate": 3.8016469745793054e-06, "loss": 0.0, "step": 10215 }, { "epoch": 0.6583746858284463, "grad_norm": 0.011985939675556128, "learning_rate": 3.8009308986752597e-06, "loss": 0.0, "step": 10216 }, { "epoch": 0.6584391312753753, "grad_norm": 0.09138096272130088, "learning_rate": 3.8002148227712136e-06, "loss": 0.0001, "step": 10217 }, { "epoch": 0.6585035767223045, "grad_norm": 0.1891860258878625, "learning_rate": 3.7994987468671683e-06, "loss": 0.0002, "step": 10218 }, { "epoch": 0.6585680221692337, "grad_norm": 0.08866577064407546, "learning_rate": 3.7987826709631226e-06, "loss": 0.0001, "step": 10219 }, { "epoch": 0.6586324676161629, "grad_norm": 0.006611086351549965, "learning_rate": 3.798066595059077e-06, "loss": 0.0, "step": 10220 }, { "epoch": 0.6586969130630921, "grad_norm": 0.00027910876529814774, "learning_rate": 3.7973505191550308e-06, "loss": 0.0, "step": 10221 }, { "epoch": 0.6587613585100213, "grad_norm": 0.007999838552962334, "learning_rate": 3.796634443250985e-06, "loss": 0.0001, "step": 10222 }, { "epoch": 0.6588258039569505, "grad_norm": 0.2032794548559423, "learning_rate": 3.795918367346939e-06, "loss": 0.0005, "step": 10223 }, { "epoch": 0.6588902494038796, "grad_norm": 0.0003595132206011157, "learning_rate": 3.7952022914428932e-06, "loss": 0.0, "step": 10224 }, { "epoch": 0.6589546948508088, "grad_norm": 0.0008375620930174066, "learning_rate": 3.794486215538847e-06, "loss": 0.0, "step": 10225 }, { "epoch": 0.659019140297738, "grad_norm": 0.020827546998719356, "learning_rate": 3.7937701396348014e-06, "loss": 0.0, "step": 10226 }, { "epoch": 0.6590835857446672, "grad_norm": 0.03600446840366598, "learning_rate": 3.7930540637307557e-06, "loss": 0.0001, "step": 10227 }, { "epoch": 0.6591480311915963, "grad_norm": 0.004241069782651026, "learning_rate": 3.7923379878267096e-06, "loss": 0.0, "step": 10228 }, { "epoch": 0.6592124766385254, "grad_norm": 0.019451562174052747, "learning_rate": 3.7916219119226643e-06, "loss": 0.0001, "step": 10229 }, { "epoch": 0.6592769220854546, "grad_norm": 0.002471784102794206, "learning_rate": 3.7909058360186186e-06, "loss": 0.0, "step": 10230 }, { "epoch": 0.6593413675323838, "grad_norm": 0.006852716105307031, "learning_rate": 3.7901897601145725e-06, "loss": 0.0, "step": 10231 }, { "epoch": 0.659405812979313, "grad_norm": 0.06718536487225223, "learning_rate": 3.789473684210527e-06, "loss": 0.0003, "step": 10232 }, { "epoch": 0.6594702584262422, "grad_norm": 0.15153008586304992, "learning_rate": 3.7887576083064807e-06, "loss": 0.0007, "step": 10233 }, { "epoch": 0.6595347038731714, "grad_norm": 0.10961949745578928, "learning_rate": 3.788041532402435e-06, "loss": 0.0001, "step": 10234 }, { "epoch": 0.6595991493201006, "grad_norm": 0.00020195634307442343, "learning_rate": 3.7873254564983893e-06, "loss": 0.0, "step": 10235 }, { "epoch": 0.6596635947670297, "grad_norm": 0.008753787719257302, "learning_rate": 3.786609380594343e-06, "loss": 0.0001, "step": 10236 }, { "epoch": 0.6597280402139589, "grad_norm": 0.00017843003587698393, "learning_rate": 3.7858933046902974e-06, "loss": 0.0, "step": 10237 }, { "epoch": 0.6597924856608881, "grad_norm": 0.003163697938415987, "learning_rate": 3.7851772287862513e-06, "loss": 0.0, "step": 10238 }, { "epoch": 0.6598569311078172, "grad_norm": 0.0016409395954385996, "learning_rate": 3.7844611528822056e-06, "loss": 0.0, "step": 10239 }, { "epoch": 0.6599213765547464, "grad_norm": 0.0016194677414049581, "learning_rate": 3.78374507697816e-06, "loss": 0.0, "step": 10240 }, { "epoch": 0.6599858220016755, "grad_norm": 0.2341164513889306, "learning_rate": 3.783029001074114e-06, "loss": 0.002, "step": 10241 }, { "epoch": 0.6600502674486047, "grad_norm": 0.00014059234136107755, "learning_rate": 3.7823129251700685e-06, "loss": 0.0, "step": 10242 }, { "epoch": 0.6601147128955339, "grad_norm": 0.012938931855012437, "learning_rate": 3.781596849266023e-06, "loss": 0.0001, "step": 10243 }, { "epoch": 0.6601791583424631, "grad_norm": 0.00014948623886025877, "learning_rate": 3.7808807733619767e-06, "loss": 0.0, "step": 10244 }, { "epoch": 0.6602436037893923, "grad_norm": 0.0002305776851317747, "learning_rate": 3.780164697457931e-06, "loss": 0.0, "step": 10245 }, { "epoch": 0.6603080492363215, "grad_norm": 0.0003601501449476667, "learning_rate": 3.779448621553885e-06, "loss": 0.0, "step": 10246 }, { "epoch": 0.6603724946832507, "grad_norm": 0.006198069005480283, "learning_rate": 3.778732545649839e-06, "loss": 0.0, "step": 10247 }, { "epoch": 0.6604369401301798, "grad_norm": 0.003078498213774524, "learning_rate": 3.7780164697457935e-06, "loss": 0.0, "step": 10248 }, { "epoch": 0.660501385577109, "grad_norm": 0.0014853955072896012, "learning_rate": 3.7773003938417473e-06, "loss": 0.0, "step": 10249 }, { "epoch": 0.6605658310240381, "grad_norm": 0.0002482578605181744, "learning_rate": 3.7765843179377016e-06, "loss": 0.0, "step": 10250 }, { "epoch": 0.6606302764709673, "grad_norm": 0.8442832180262457, "learning_rate": 3.7758682420336555e-06, "loss": 0.0057, "step": 10251 }, { "epoch": 0.6606947219178965, "grad_norm": 9.143114696421962e-06, "learning_rate": 3.77515216612961e-06, "loss": 0.0, "step": 10252 }, { "epoch": 0.6607591673648257, "grad_norm": 0.05428738982897947, "learning_rate": 3.7744360902255645e-06, "loss": 0.0001, "step": 10253 }, { "epoch": 0.6608236128117548, "grad_norm": 0.0005961785885248, "learning_rate": 3.7737200143215184e-06, "loss": 0.0, "step": 10254 }, { "epoch": 0.660888058258684, "grad_norm": 0.003677904195814865, "learning_rate": 3.7730039384174727e-06, "loss": 0.0, "step": 10255 }, { "epoch": 0.6609525037056132, "grad_norm": 0.03861703337575527, "learning_rate": 3.772287862513427e-06, "loss": 0.0001, "step": 10256 }, { "epoch": 0.6610169491525424, "grad_norm": 0.000588057262521846, "learning_rate": 3.771571786609381e-06, "loss": 0.0, "step": 10257 }, { "epoch": 0.6610813945994716, "grad_norm": 0.0016087935946511298, "learning_rate": 3.770855710705335e-06, "loss": 0.0, "step": 10258 }, { "epoch": 0.6611458400464008, "grad_norm": 0.0012067817277527145, "learning_rate": 3.770139634801289e-06, "loss": 0.0, "step": 10259 }, { "epoch": 0.6612102854933299, "grad_norm": 0.00023797434159941603, "learning_rate": 3.7694235588972433e-06, "loss": 0.0, "step": 10260 }, { "epoch": 0.6612747309402591, "grad_norm": 0.0037044470691454294, "learning_rate": 3.7687074829931976e-06, "loss": 0.0, "step": 10261 }, { "epoch": 0.6613391763871882, "grad_norm": 8.325739223354846e-05, "learning_rate": 3.7679914070891515e-06, "loss": 0.0, "step": 10262 }, { "epoch": 0.6614036218341174, "grad_norm": 0.45104876403275584, "learning_rate": 3.767275331185106e-06, "loss": 0.0009, "step": 10263 }, { "epoch": 0.6614680672810466, "grad_norm": 0.03306756149105825, "learning_rate": 3.7665592552810605e-06, "loss": 0.0002, "step": 10264 }, { "epoch": 0.6615325127279758, "grad_norm": 0.0009416338385984487, "learning_rate": 3.7658431793770144e-06, "loss": 0.0, "step": 10265 }, { "epoch": 0.6615969581749049, "grad_norm": 0.000314439940892325, "learning_rate": 3.7651271034729687e-06, "loss": 0.0, "step": 10266 }, { "epoch": 0.6616614036218341, "grad_norm": 0.000485502264331188, "learning_rate": 3.7644110275689226e-06, "loss": 0.0, "step": 10267 }, { "epoch": 0.6617258490687633, "grad_norm": 0.0014131418138240244, "learning_rate": 3.763694951664877e-06, "loss": 0.0, "step": 10268 }, { "epoch": 0.6617902945156925, "grad_norm": 0.0012902404209281864, "learning_rate": 3.762978875760831e-06, "loss": 0.0, "step": 10269 }, { "epoch": 0.6618547399626217, "grad_norm": 0.5734556928030273, "learning_rate": 3.762262799856785e-06, "loss": 0.0018, "step": 10270 }, { "epoch": 0.6619191854095509, "grad_norm": 0.016855807832785976, "learning_rate": 3.7615467239527394e-06, "loss": 0.0, "step": 10271 }, { "epoch": 0.66198363085648, "grad_norm": 0.06585408753475606, "learning_rate": 3.7608306480486932e-06, "loss": 0.0001, "step": 10272 }, { "epoch": 0.6620480763034091, "grad_norm": 0.0023837119585305598, "learning_rate": 3.7601145721446475e-06, "loss": 0.0, "step": 10273 }, { "epoch": 0.6621125217503383, "grad_norm": 0.03470923425898855, "learning_rate": 3.7593984962406014e-06, "loss": 0.0, "step": 10274 }, { "epoch": 0.6621769671972675, "grad_norm": 0.004313669130485635, "learning_rate": 3.7586824203365557e-06, "loss": 0.0, "step": 10275 }, { "epoch": 0.6622414126441967, "grad_norm": 0.21375927750213114, "learning_rate": 3.7579663444325104e-06, "loss": 0.0007, "step": 10276 }, { "epoch": 0.6623058580911259, "grad_norm": 0.00922187225142663, "learning_rate": 3.7572502685284647e-06, "loss": 0.0, "step": 10277 }, { "epoch": 0.662370303538055, "grad_norm": 0.0019563550661400313, "learning_rate": 3.7565341926244186e-06, "loss": 0.0, "step": 10278 }, { "epoch": 0.6624347489849842, "grad_norm": 0.012985174580154232, "learning_rate": 3.755818116720373e-06, "loss": 0.0, "step": 10279 }, { "epoch": 0.6624991944319134, "grad_norm": 0.1419093634249794, "learning_rate": 3.7551020408163268e-06, "loss": 0.0015, "step": 10280 }, { "epoch": 0.6625636398788426, "grad_norm": 7.844805718374012e-05, "learning_rate": 3.754385964912281e-06, "loss": 0.0, "step": 10281 }, { "epoch": 0.6626280853257718, "grad_norm": 0.000882768606512407, "learning_rate": 3.753669889008235e-06, "loss": 0.0, "step": 10282 }, { "epoch": 0.662692530772701, "grad_norm": 0.0002696189077644942, "learning_rate": 3.7529538131041892e-06, "loss": 0.0, "step": 10283 }, { "epoch": 0.66275697621963, "grad_norm": 0.064502581492965, "learning_rate": 3.7522377372001435e-06, "loss": 0.0011, "step": 10284 }, { "epoch": 0.6628214216665592, "grad_norm": 0.00019324913286995657, "learning_rate": 3.7515216612960974e-06, "loss": 0.0, "step": 10285 }, { "epoch": 0.6628858671134884, "grad_norm": 0.005003145435338595, "learning_rate": 3.7508055853920517e-06, "loss": 0.0, "step": 10286 }, { "epoch": 0.6629503125604176, "grad_norm": 0.005832136003543727, "learning_rate": 3.7500895094880056e-06, "loss": 0.0, "step": 10287 }, { "epoch": 0.6630147580073468, "grad_norm": 0.008914760200775532, "learning_rate": 3.7493734335839603e-06, "loss": 0.0, "step": 10288 }, { "epoch": 0.663079203454276, "grad_norm": 0.06776215709505574, "learning_rate": 3.7486573576799146e-06, "loss": 0.0001, "step": 10289 }, { "epoch": 0.6631436489012051, "grad_norm": 0.0010801679433969562, "learning_rate": 3.7479412817758685e-06, "loss": 0.0, "step": 10290 }, { "epoch": 0.6632080943481343, "grad_norm": 0.3396181804918623, "learning_rate": 3.747225205871823e-06, "loss": 0.0009, "step": 10291 }, { "epoch": 0.6632725397950635, "grad_norm": 0.1640217764584769, "learning_rate": 3.746509129967777e-06, "loss": 0.0024, "step": 10292 }, { "epoch": 0.6633369852419927, "grad_norm": 0.0013012118961681762, "learning_rate": 3.745793054063731e-06, "loss": 0.0, "step": 10293 }, { "epoch": 0.6634014306889219, "grad_norm": 0.05860680792265722, "learning_rate": 3.7450769781596853e-06, "loss": 0.0002, "step": 10294 }, { "epoch": 0.663465876135851, "grad_norm": 0.8549390572584554, "learning_rate": 3.744360902255639e-06, "loss": 0.0212, "step": 10295 }, { "epoch": 0.6635303215827801, "grad_norm": 0.0017172884277614458, "learning_rate": 3.7436448263515934e-06, "loss": 0.0, "step": 10296 }, { "epoch": 0.6635947670297093, "grad_norm": 0.08668761005700598, "learning_rate": 3.7429287504475477e-06, "loss": 0.0001, "step": 10297 }, { "epoch": 0.6636592124766385, "grad_norm": 0.0023396848876374708, "learning_rate": 3.7422126745435016e-06, "loss": 0.0, "step": 10298 }, { "epoch": 0.6637236579235677, "grad_norm": 0.0006625158127470572, "learning_rate": 3.7414965986394563e-06, "loss": 0.0, "step": 10299 }, { "epoch": 0.6637881033704969, "grad_norm": 0.006708177138559468, "learning_rate": 3.7407805227354106e-06, "loss": 0.0, "step": 10300 }, { "epoch": 0.6638525488174261, "grad_norm": 0.0007932890888525091, "learning_rate": 3.7400644468313645e-06, "loss": 0.0, "step": 10301 }, { "epoch": 0.6639169942643552, "grad_norm": 0.0015757591637932753, "learning_rate": 3.739348370927319e-06, "loss": 0.0, "step": 10302 }, { "epoch": 0.6639814397112844, "grad_norm": 0.002359474580749016, "learning_rate": 3.7386322950232727e-06, "loss": 0.0, "step": 10303 }, { "epoch": 0.6640458851582136, "grad_norm": 0.047649399272796086, "learning_rate": 3.737916219119227e-06, "loss": 0.0002, "step": 10304 }, { "epoch": 0.6641103306051428, "grad_norm": 0.02729586683217372, "learning_rate": 3.7372001432151813e-06, "loss": 0.0001, "step": 10305 }, { "epoch": 0.6641747760520719, "grad_norm": 0.0031866841413431637, "learning_rate": 3.736484067311135e-06, "loss": 0.0, "step": 10306 }, { "epoch": 0.664239221499001, "grad_norm": 0.0036964935657331686, "learning_rate": 3.7357679914070895e-06, "loss": 0.0, "step": 10307 }, { "epoch": 0.6643036669459302, "grad_norm": 0.003219235261201563, "learning_rate": 3.7350519155030433e-06, "loss": 0.0, "step": 10308 }, { "epoch": 0.6643681123928594, "grad_norm": 0.22426532434435242, "learning_rate": 3.7343358395989976e-06, "loss": 0.0003, "step": 10309 }, { "epoch": 0.6644325578397886, "grad_norm": 0.0018211682093739476, "learning_rate": 3.733619763694952e-06, "loss": 0.0, "step": 10310 }, { "epoch": 0.6644970032867178, "grad_norm": 0.2506031122662469, "learning_rate": 3.7329036877909062e-06, "loss": 0.0008, "step": 10311 }, { "epoch": 0.664561448733647, "grad_norm": 0.0004966368882332178, "learning_rate": 3.7321876118868605e-06, "loss": 0.0, "step": 10312 }, { "epoch": 0.6646258941805762, "grad_norm": 0.03001260390392036, "learning_rate": 3.731471535982815e-06, "loss": 0.0, "step": 10313 }, { "epoch": 0.6646903396275053, "grad_norm": 0.0038538890073602046, "learning_rate": 3.7307554600787687e-06, "loss": 0.0, "step": 10314 }, { "epoch": 0.6647547850744345, "grad_norm": 0.02009810895984538, "learning_rate": 3.730039384174723e-06, "loss": 0.0, "step": 10315 }, { "epoch": 0.6648192305213637, "grad_norm": 0.0017787021194924147, "learning_rate": 3.729323308270677e-06, "loss": 0.0, "step": 10316 }, { "epoch": 0.6648836759682928, "grad_norm": 0.2367472360801086, "learning_rate": 3.728607232366631e-06, "loss": 0.0008, "step": 10317 }, { "epoch": 0.664948121415222, "grad_norm": 0.001101869151755424, "learning_rate": 3.7278911564625855e-06, "loss": 0.0, "step": 10318 }, { "epoch": 0.6650125668621512, "grad_norm": 6.84407851724217e-05, "learning_rate": 3.7271750805585393e-06, "loss": 0.0, "step": 10319 }, { "epoch": 0.6650770123090803, "grad_norm": 0.04718631936326828, "learning_rate": 3.7264590046544936e-06, "loss": 0.0001, "step": 10320 }, { "epoch": 0.6651414577560095, "grad_norm": 0.002101132604809257, "learning_rate": 3.7257429287504475e-06, "loss": 0.0, "step": 10321 }, { "epoch": 0.6652059032029387, "grad_norm": 0.054981715372835395, "learning_rate": 3.725026852846402e-06, "loss": 0.0, "step": 10322 }, { "epoch": 0.6652703486498679, "grad_norm": 0.0002458432285043592, "learning_rate": 3.7243107769423565e-06, "loss": 0.0, "step": 10323 }, { "epoch": 0.6653347940967971, "grad_norm": 0.14842282398353007, "learning_rate": 3.7235947010383104e-06, "loss": 0.0018, "step": 10324 }, { "epoch": 0.6653992395437263, "grad_norm": 0.06344344302113403, "learning_rate": 3.7228786251342647e-06, "loss": 0.0, "step": 10325 }, { "epoch": 0.6654636849906554, "grad_norm": 0.0023448721186625793, "learning_rate": 3.722162549230219e-06, "loss": 0.0, "step": 10326 }, { "epoch": 0.6655281304375846, "grad_norm": 0.019546412006886026, "learning_rate": 3.721446473326173e-06, "loss": 0.0, "step": 10327 }, { "epoch": 0.6655925758845137, "grad_norm": 0.16962522064579758, "learning_rate": 3.720730397422127e-06, "loss": 0.0005, "step": 10328 }, { "epoch": 0.6656570213314429, "grad_norm": 0.01771669440975255, "learning_rate": 3.720014321518081e-06, "loss": 0.0, "step": 10329 }, { "epoch": 0.6657214667783721, "grad_norm": 0.00024993857282373024, "learning_rate": 3.7192982456140354e-06, "loss": 0.0, "step": 10330 }, { "epoch": 0.6657859122253013, "grad_norm": 0.3408004951727263, "learning_rate": 3.7185821697099892e-06, "loss": 0.0013, "step": 10331 }, { "epoch": 0.6658503576722304, "grad_norm": 0.0005357757778321938, "learning_rate": 3.7178660938059435e-06, "loss": 0.0, "step": 10332 }, { "epoch": 0.6659148031191596, "grad_norm": 0.03956123790735388, "learning_rate": 3.717150017901898e-06, "loss": 0.0001, "step": 10333 }, { "epoch": 0.6659792485660888, "grad_norm": 0.0036982926831180154, "learning_rate": 3.7164339419978526e-06, "loss": 0.0, "step": 10334 }, { "epoch": 0.666043694013018, "grad_norm": 0.0012340388580778937, "learning_rate": 3.7157178660938064e-06, "loss": 0.0, "step": 10335 }, { "epoch": 0.6661081394599472, "grad_norm": 0.007880068164499425, "learning_rate": 3.7150017901897607e-06, "loss": 0.0, "step": 10336 }, { "epoch": 0.6661725849068764, "grad_norm": 0.03773377443955219, "learning_rate": 3.7142857142857146e-06, "loss": 0.0004, "step": 10337 }, { "epoch": 0.6662370303538055, "grad_norm": 0.0001938463646608074, "learning_rate": 3.713569638381669e-06, "loss": 0.0, "step": 10338 }, { "epoch": 0.6663014758007347, "grad_norm": 0.0012649033419837533, "learning_rate": 3.7128535624776228e-06, "loss": 0.0, "step": 10339 }, { "epoch": 0.6663659212476638, "grad_norm": 0.0038339946928142945, "learning_rate": 3.712137486573577e-06, "loss": 0.0, "step": 10340 }, { "epoch": 0.666430366694593, "grad_norm": 0.7119580583622275, "learning_rate": 3.7114214106695314e-06, "loss": 0.0041, "step": 10341 }, { "epoch": 0.6664948121415222, "grad_norm": 0.06270795364927652, "learning_rate": 3.7107053347654852e-06, "loss": 0.0, "step": 10342 }, { "epoch": 0.6665592575884514, "grad_norm": 0.0682359125476423, "learning_rate": 3.7099892588614395e-06, "loss": 0.0017, "step": 10343 }, { "epoch": 0.6666237030353805, "grad_norm": 0.027771674137431628, "learning_rate": 3.7092731829573934e-06, "loss": 0.0003, "step": 10344 }, { "epoch": 0.6666881484823097, "grad_norm": 0.06264411062499124, "learning_rate": 3.7085571070533477e-06, "loss": 0.001, "step": 10345 }, { "epoch": 0.6667525939292389, "grad_norm": 0.25018068579057157, "learning_rate": 3.7078410311493024e-06, "loss": 0.003, "step": 10346 }, { "epoch": 0.6668170393761681, "grad_norm": 0.0006350143128432878, "learning_rate": 3.7071249552452563e-06, "loss": 0.0, "step": 10347 }, { "epoch": 0.6668814848230973, "grad_norm": 0.004212846550878931, "learning_rate": 3.7064088793412106e-06, "loss": 0.0, "step": 10348 }, { "epoch": 0.6669459302700265, "grad_norm": 0.00048795961950414513, "learning_rate": 3.705692803437165e-06, "loss": 0.0, "step": 10349 }, { "epoch": 0.6670103757169557, "grad_norm": 0.2391944801432455, "learning_rate": 3.704976727533119e-06, "loss": 0.0008, "step": 10350 }, { "epoch": 0.6670748211638847, "grad_norm": 0.000245488891439777, "learning_rate": 3.704260651629073e-06, "loss": 0.0, "step": 10351 }, { "epoch": 0.6671392666108139, "grad_norm": 2.1831801820844412e-05, "learning_rate": 3.703544575725027e-06, "loss": 0.0, "step": 10352 }, { "epoch": 0.6672037120577431, "grad_norm": 0.06861953276863456, "learning_rate": 3.7028284998209813e-06, "loss": 0.0001, "step": 10353 }, { "epoch": 0.6672681575046723, "grad_norm": 0.21130783632373465, "learning_rate": 3.7021124239169356e-06, "loss": 0.0009, "step": 10354 }, { "epoch": 0.6673326029516015, "grad_norm": 0.0006989204759455222, "learning_rate": 3.7013963480128894e-06, "loss": 0.0, "step": 10355 }, { "epoch": 0.6673970483985306, "grad_norm": 0.00032966569814352545, "learning_rate": 3.7006802721088437e-06, "loss": 0.0, "step": 10356 }, { "epoch": 0.6674614938454598, "grad_norm": 0.29515427786156617, "learning_rate": 3.6999641962047976e-06, "loss": 0.0017, "step": 10357 }, { "epoch": 0.667525939292389, "grad_norm": 0.003538618157151033, "learning_rate": 3.6992481203007523e-06, "loss": 0.0, "step": 10358 }, { "epoch": 0.6675903847393182, "grad_norm": 0.02578721018192703, "learning_rate": 3.6985320443967066e-06, "loss": 0.0, "step": 10359 }, { "epoch": 0.6676548301862474, "grad_norm": 0.000487793444415082, "learning_rate": 3.6978159684926605e-06, "loss": 0.0, "step": 10360 }, { "epoch": 0.6677192756331766, "grad_norm": 0.0018810635425694684, "learning_rate": 3.697099892588615e-06, "loss": 0.0, "step": 10361 }, { "epoch": 0.6677837210801056, "grad_norm": 0.0018085405660967183, "learning_rate": 3.696383816684569e-06, "loss": 0.0, "step": 10362 }, { "epoch": 0.6678481665270348, "grad_norm": 0.1033503724846543, "learning_rate": 3.695667740780523e-06, "loss": 0.0001, "step": 10363 }, { "epoch": 0.667912611973964, "grad_norm": 0.03163602936890037, "learning_rate": 3.6949516648764773e-06, "loss": 0.0001, "step": 10364 }, { "epoch": 0.6679770574208932, "grad_norm": 0.0005767042099623167, "learning_rate": 3.694235588972431e-06, "loss": 0.0, "step": 10365 }, { "epoch": 0.6680415028678224, "grad_norm": 0.17640823233059116, "learning_rate": 3.6935195130683855e-06, "loss": 0.0018, "step": 10366 }, { "epoch": 0.6681059483147516, "grad_norm": 0.0022090416866323596, "learning_rate": 3.6928034371643393e-06, "loss": 0.0, "step": 10367 }, { "epoch": 0.6681703937616807, "grad_norm": 0.044256572962756785, "learning_rate": 3.6920873612602936e-06, "loss": 0.0, "step": 10368 }, { "epoch": 0.6682348392086099, "grad_norm": 0.011968673310753146, "learning_rate": 3.6913712853562483e-06, "loss": 0.0001, "step": 10369 }, { "epoch": 0.6682992846555391, "grad_norm": 0.002222004775121942, "learning_rate": 3.6906552094522026e-06, "loss": 0.0, "step": 10370 }, { "epoch": 0.6683637301024683, "grad_norm": 0.003974169480094497, "learning_rate": 3.6899391335481565e-06, "loss": 0.0, "step": 10371 }, { "epoch": 0.6684281755493975, "grad_norm": 0.011160728574854355, "learning_rate": 3.689223057644111e-06, "loss": 0.0001, "step": 10372 }, { "epoch": 0.6684926209963266, "grad_norm": 0.03456428294752379, "learning_rate": 3.6885069817400647e-06, "loss": 0.0004, "step": 10373 }, { "epoch": 0.6685570664432557, "grad_norm": 0.004038779991600407, "learning_rate": 3.687790905836019e-06, "loss": 0.0, "step": 10374 }, { "epoch": 0.6686215118901849, "grad_norm": 0.000989251636449544, "learning_rate": 3.687074829931973e-06, "loss": 0.0, "step": 10375 }, { "epoch": 0.6686859573371141, "grad_norm": 0.004165436098205323, "learning_rate": 3.686358754027927e-06, "loss": 0.0, "step": 10376 }, { "epoch": 0.6687504027840433, "grad_norm": 0.14913321884813732, "learning_rate": 3.6856426781238815e-06, "loss": 0.0003, "step": 10377 }, { "epoch": 0.6688148482309725, "grad_norm": 0.00021340746164696231, "learning_rate": 3.6849266022198353e-06, "loss": 0.0, "step": 10378 }, { "epoch": 0.6688792936779017, "grad_norm": 0.001224805831090876, "learning_rate": 3.6842105263157896e-06, "loss": 0.0, "step": 10379 }, { "epoch": 0.6689437391248308, "grad_norm": 0.059466480614118154, "learning_rate": 3.6834944504117435e-06, "loss": 0.0004, "step": 10380 }, { "epoch": 0.66900818457176, "grad_norm": 0.022219569955405607, "learning_rate": 3.6827783745076982e-06, "loss": 0.0, "step": 10381 }, { "epoch": 0.6690726300186892, "grad_norm": 0.010246315911304519, "learning_rate": 3.6820622986036525e-06, "loss": 0.0001, "step": 10382 }, { "epoch": 0.6691370754656184, "grad_norm": 0.18016104909322808, "learning_rate": 3.6813462226996064e-06, "loss": 0.0015, "step": 10383 }, { "epoch": 0.6692015209125475, "grad_norm": 0.0039480985634492635, "learning_rate": 3.6806301467955607e-06, "loss": 0.0, "step": 10384 }, { "epoch": 0.6692659663594767, "grad_norm": 0.00021953914315241395, "learning_rate": 3.679914070891515e-06, "loss": 0.0, "step": 10385 }, { "epoch": 0.6693304118064058, "grad_norm": 0.00140818974422586, "learning_rate": 3.679197994987469e-06, "loss": 0.0, "step": 10386 }, { "epoch": 0.669394857253335, "grad_norm": 0.06190358518524449, "learning_rate": 3.678481919083423e-06, "loss": 0.0002, "step": 10387 }, { "epoch": 0.6694593027002642, "grad_norm": 0.00048093340199941554, "learning_rate": 3.677765843179377e-06, "loss": 0.0, "step": 10388 }, { "epoch": 0.6695237481471934, "grad_norm": 0.010102305479308519, "learning_rate": 3.6770497672753314e-06, "loss": 0.0, "step": 10389 }, { "epoch": 0.6695881935941226, "grad_norm": 0.012027537219855382, "learning_rate": 3.6763336913712857e-06, "loss": 0.0, "step": 10390 }, { "epoch": 0.6696526390410518, "grad_norm": 0.00474196388602497, "learning_rate": 3.6756176154672395e-06, "loss": 0.0, "step": 10391 }, { "epoch": 0.669717084487981, "grad_norm": 0.00038679097703264, "learning_rate": 3.674901539563194e-06, "loss": 0.0, "step": 10392 }, { "epoch": 0.6697815299349101, "grad_norm": 7.549534460174918e-05, "learning_rate": 3.6741854636591486e-06, "loss": 0.0, "step": 10393 }, { "epoch": 0.6698459753818393, "grad_norm": 0.0027491068659299105, "learning_rate": 3.6734693877551024e-06, "loss": 0.0, "step": 10394 }, { "epoch": 0.6699104208287684, "grad_norm": 0.05399843993640478, "learning_rate": 3.6727533118510567e-06, "loss": 0.0001, "step": 10395 }, { "epoch": 0.6699748662756976, "grad_norm": 0.004283962775883192, "learning_rate": 3.6720372359470106e-06, "loss": 0.0, "step": 10396 }, { "epoch": 0.6700393117226268, "grad_norm": 0.0001430948322371208, "learning_rate": 3.671321160042965e-06, "loss": 0.0, "step": 10397 }, { "epoch": 0.670103757169556, "grad_norm": 0.00017731739681702292, "learning_rate": 3.670605084138919e-06, "loss": 0.0, "step": 10398 }, { "epoch": 0.6701682026164851, "grad_norm": 0.000991244561069628, "learning_rate": 3.669889008234873e-06, "loss": 0.0, "step": 10399 }, { "epoch": 0.6702326480634143, "grad_norm": 0.0001893566769277467, "learning_rate": 3.6691729323308274e-06, "loss": 0.0, "step": 10400 }, { "epoch": 0.6702970935103435, "grad_norm": 0.004589059406335244, "learning_rate": 3.6684568564267812e-06, "loss": 0.0, "step": 10401 }, { "epoch": 0.6703615389572727, "grad_norm": 3.647468846965012e-05, "learning_rate": 3.6677407805227355e-06, "loss": 0.0, "step": 10402 }, { "epoch": 0.6704259844042019, "grad_norm": 6.37729973034846e-05, "learning_rate": 3.66702470461869e-06, "loss": 0.0, "step": 10403 }, { "epoch": 0.670490429851131, "grad_norm": 0.2196042122873555, "learning_rate": 3.666308628714644e-06, "loss": 0.0008, "step": 10404 }, { "epoch": 0.6705548752980602, "grad_norm": 0.007662267436804598, "learning_rate": 3.6655925528105984e-06, "loss": 0.0, "step": 10405 }, { "epoch": 0.6706193207449893, "grad_norm": 0.0004067778012657678, "learning_rate": 3.6648764769065527e-06, "loss": 0.0, "step": 10406 }, { "epoch": 0.6706837661919185, "grad_norm": 0.010651062407556951, "learning_rate": 3.6641604010025066e-06, "loss": 0.0, "step": 10407 }, { "epoch": 0.6707482116388477, "grad_norm": 0.0031969810332983287, "learning_rate": 3.663444325098461e-06, "loss": 0.0, "step": 10408 }, { "epoch": 0.6708126570857769, "grad_norm": 0.00044501171077525185, "learning_rate": 3.662728249194415e-06, "loss": 0.0, "step": 10409 }, { "epoch": 0.670877102532706, "grad_norm": 0.00013680105479665556, "learning_rate": 3.662012173290369e-06, "loss": 0.0, "step": 10410 }, { "epoch": 0.6709415479796352, "grad_norm": 0.025411179136745145, "learning_rate": 3.6612960973863234e-06, "loss": 0.0, "step": 10411 }, { "epoch": 0.6710059934265644, "grad_norm": 0.05550540515503469, "learning_rate": 3.6605800214822773e-06, "loss": 0.0001, "step": 10412 }, { "epoch": 0.6710704388734936, "grad_norm": 3.929251421293004e-05, "learning_rate": 3.6598639455782316e-06, "loss": 0.0, "step": 10413 }, { "epoch": 0.6711348843204228, "grad_norm": 0.09189680423013248, "learning_rate": 3.6591478696741854e-06, "loss": 0.0002, "step": 10414 }, { "epoch": 0.671199329767352, "grad_norm": 0.0006463515367147947, "learning_rate": 3.6584317937701397e-06, "loss": 0.0, "step": 10415 }, { "epoch": 0.6712637752142812, "grad_norm": 0.00046602857314811133, "learning_rate": 3.6577157178660945e-06, "loss": 0.0, "step": 10416 }, { "epoch": 0.6713282206612103, "grad_norm": 8.019380777964712e-05, "learning_rate": 3.6569996419620483e-06, "loss": 0.0, "step": 10417 }, { "epoch": 0.6713926661081394, "grad_norm": 0.035754243974769166, "learning_rate": 3.6562835660580026e-06, "loss": 0.0, "step": 10418 }, { "epoch": 0.6714571115550686, "grad_norm": 0.03264983678887585, "learning_rate": 3.655567490153957e-06, "loss": 0.0001, "step": 10419 }, { "epoch": 0.6715215570019978, "grad_norm": 0.00046047314390313494, "learning_rate": 3.654851414249911e-06, "loss": 0.0, "step": 10420 }, { "epoch": 0.671586002448927, "grad_norm": 0.14819997681575406, "learning_rate": 3.654135338345865e-06, "loss": 0.0023, "step": 10421 }, { "epoch": 0.6716504478958562, "grad_norm": 0.12865062133924046, "learning_rate": 3.653419262441819e-06, "loss": 0.0002, "step": 10422 }, { "epoch": 0.6717148933427853, "grad_norm": 0.000465925691879833, "learning_rate": 3.6527031865377733e-06, "loss": 0.0, "step": 10423 }, { "epoch": 0.6717793387897145, "grad_norm": 0.10362138383915132, "learning_rate": 3.651987110633727e-06, "loss": 0.0008, "step": 10424 }, { "epoch": 0.6718437842366437, "grad_norm": 0.11788996914326605, "learning_rate": 3.6512710347296815e-06, "loss": 0.0001, "step": 10425 }, { "epoch": 0.6719082296835729, "grad_norm": 0.0004748098370773154, "learning_rate": 3.6505549588256358e-06, "loss": 0.0, "step": 10426 }, { "epoch": 0.6719726751305021, "grad_norm": 0.06221575286604827, "learning_rate": 3.6498388829215896e-06, "loss": 0.0002, "step": 10427 }, { "epoch": 0.6720371205774313, "grad_norm": 0.0006763647039382567, "learning_rate": 3.6491228070175443e-06, "loss": 0.0, "step": 10428 }, { "epoch": 0.6721015660243603, "grad_norm": 0.013047591917105377, "learning_rate": 3.6484067311134986e-06, "loss": 0.0, "step": 10429 }, { "epoch": 0.6721660114712895, "grad_norm": 0.0018885810922089606, "learning_rate": 3.6476906552094525e-06, "loss": 0.0, "step": 10430 }, { "epoch": 0.6722304569182187, "grad_norm": 0.00018979485411064437, "learning_rate": 3.646974579305407e-06, "loss": 0.0, "step": 10431 }, { "epoch": 0.6722949023651479, "grad_norm": 0.25609584975065497, "learning_rate": 3.6462585034013607e-06, "loss": 0.0009, "step": 10432 }, { "epoch": 0.6723593478120771, "grad_norm": 0.024535905068322705, "learning_rate": 3.645542427497315e-06, "loss": 0.0001, "step": 10433 }, { "epoch": 0.6724237932590063, "grad_norm": 0.0027585886411952447, "learning_rate": 3.6448263515932693e-06, "loss": 0.0, "step": 10434 }, { "epoch": 0.6724882387059354, "grad_norm": 0.028292844046307754, "learning_rate": 3.644110275689223e-06, "loss": 0.0001, "step": 10435 }, { "epoch": 0.6725526841528646, "grad_norm": 0.027540698239601234, "learning_rate": 3.6433941997851775e-06, "loss": 0.0001, "step": 10436 }, { "epoch": 0.6726171295997938, "grad_norm": 0.00963730360489511, "learning_rate": 3.6426781238811313e-06, "loss": 0.0001, "step": 10437 }, { "epoch": 0.672681575046723, "grad_norm": 0.0018280268136798214, "learning_rate": 3.6419620479770856e-06, "loss": 0.0, "step": 10438 }, { "epoch": 0.6727460204936522, "grad_norm": 0.0001900797635635939, "learning_rate": 3.6412459720730404e-06, "loss": 0.0, "step": 10439 }, { "epoch": 0.6728104659405812, "grad_norm": 0.00023532304022595498, "learning_rate": 3.6405298961689942e-06, "loss": 0.0, "step": 10440 }, { "epoch": 0.6728749113875104, "grad_norm": 0.0019142251629258287, "learning_rate": 3.6398138202649485e-06, "loss": 0.0, "step": 10441 }, { "epoch": 0.6729393568344396, "grad_norm": 0.00029465788696560115, "learning_rate": 3.639097744360903e-06, "loss": 0.0, "step": 10442 }, { "epoch": 0.6730038022813688, "grad_norm": 0.00023300991071189333, "learning_rate": 3.6383816684568567e-06, "loss": 0.0, "step": 10443 }, { "epoch": 0.673068247728298, "grad_norm": 0.22997037699398992, "learning_rate": 3.637665592552811e-06, "loss": 0.0006, "step": 10444 }, { "epoch": 0.6731326931752272, "grad_norm": 0.41474730166085766, "learning_rate": 3.636949516648765e-06, "loss": 0.0018, "step": 10445 }, { "epoch": 0.6731971386221564, "grad_norm": 0.0018304284832990753, "learning_rate": 3.636233440744719e-06, "loss": 0.0, "step": 10446 }, { "epoch": 0.6732615840690855, "grad_norm": 0.2089151285548803, "learning_rate": 3.6355173648406735e-06, "loss": 0.0019, "step": 10447 }, { "epoch": 0.6733260295160147, "grad_norm": 0.0003634330347498069, "learning_rate": 3.6348012889366274e-06, "loss": 0.0, "step": 10448 }, { "epoch": 0.6733904749629439, "grad_norm": 0.001954738083069696, "learning_rate": 3.6340852130325817e-06, "loss": 0.0, "step": 10449 }, { "epoch": 0.6734549204098731, "grad_norm": 0.001967566134604329, "learning_rate": 3.6333691371285355e-06, "loss": 0.0, "step": 10450 }, { "epoch": 0.6735193658568022, "grad_norm": 0.01803615169688362, "learning_rate": 3.6326530612244903e-06, "loss": 0.0002, "step": 10451 }, { "epoch": 0.6735838113037314, "grad_norm": 0.18593643853341382, "learning_rate": 3.6319369853204446e-06, "loss": 0.0004, "step": 10452 }, { "epoch": 0.6736482567506605, "grad_norm": 0.0021236017655750163, "learning_rate": 3.6312209094163984e-06, "loss": 0.0, "step": 10453 }, { "epoch": 0.6737127021975897, "grad_norm": 0.0017699274242623294, "learning_rate": 3.6305048335123527e-06, "loss": 0.0, "step": 10454 }, { "epoch": 0.6737771476445189, "grad_norm": 0.0001289320386872569, "learning_rate": 3.629788757608307e-06, "loss": 0.0, "step": 10455 }, { "epoch": 0.6738415930914481, "grad_norm": 0.017909558811695953, "learning_rate": 3.629072681704261e-06, "loss": 0.0, "step": 10456 }, { "epoch": 0.6739060385383773, "grad_norm": 0.8763559597886815, "learning_rate": 3.628356605800215e-06, "loss": 0.0046, "step": 10457 }, { "epoch": 0.6739704839853065, "grad_norm": 0.00027276141796046864, "learning_rate": 3.627640529896169e-06, "loss": 0.0, "step": 10458 }, { "epoch": 0.6740349294322356, "grad_norm": 2.0164551910408486, "learning_rate": 3.6269244539921234e-06, "loss": 0.0096, "step": 10459 }, { "epoch": 0.6740993748791648, "grad_norm": 0.00017352627869547739, "learning_rate": 3.6262083780880777e-06, "loss": 0.0, "step": 10460 }, { "epoch": 0.674163820326094, "grad_norm": 0.0023188071801069775, "learning_rate": 3.6254923021840315e-06, "loss": 0.0, "step": 10461 }, { "epoch": 0.6742282657730231, "grad_norm": 0.004480081186474678, "learning_rate": 3.624776226279986e-06, "loss": 0.0, "step": 10462 }, { "epoch": 0.6742927112199523, "grad_norm": 0.003887006294521679, "learning_rate": 3.6240601503759406e-06, "loss": 0.0, "step": 10463 }, { "epoch": 0.6743571566668815, "grad_norm": 0.00026653596353573024, "learning_rate": 3.6233440744718944e-06, "loss": 0.0, "step": 10464 }, { "epoch": 0.6744216021138106, "grad_norm": 0.00012657650139262516, "learning_rate": 3.6226279985678487e-06, "loss": 0.0, "step": 10465 }, { "epoch": 0.6744860475607398, "grad_norm": 0.0031173247361041124, "learning_rate": 3.6219119226638026e-06, "loss": 0.0, "step": 10466 }, { "epoch": 0.674550493007669, "grad_norm": 0.23030574335774806, "learning_rate": 3.621195846759757e-06, "loss": 0.0005, "step": 10467 }, { "epoch": 0.6746149384545982, "grad_norm": 0.05862473026472335, "learning_rate": 3.6204797708557112e-06, "loss": 0.0006, "step": 10468 }, { "epoch": 0.6746793839015274, "grad_norm": 0.0003351622123052417, "learning_rate": 3.619763694951665e-06, "loss": 0.0, "step": 10469 }, { "epoch": 0.6747438293484566, "grad_norm": 0.00023233519721705282, "learning_rate": 3.6190476190476194e-06, "loss": 0.0, "step": 10470 }, { "epoch": 0.6748082747953857, "grad_norm": 0.0035822470136483175, "learning_rate": 3.6183315431435733e-06, "loss": 0.0, "step": 10471 }, { "epoch": 0.6748727202423149, "grad_norm": 0.0004719819756121723, "learning_rate": 3.6176154672395276e-06, "loss": 0.0, "step": 10472 }, { "epoch": 0.674937165689244, "grad_norm": 0.004377407497624656, "learning_rate": 3.6168993913354814e-06, "loss": 0.0, "step": 10473 }, { "epoch": 0.6750016111361732, "grad_norm": 0.00190075138693745, "learning_rate": 3.616183315431436e-06, "loss": 0.0, "step": 10474 }, { "epoch": 0.6750660565831024, "grad_norm": 0.004317345459707869, "learning_rate": 3.6154672395273905e-06, "loss": 0.0, "step": 10475 }, { "epoch": 0.6751305020300316, "grad_norm": 0.0014907583759944283, "learning_rate": 3.6147511636233448e-06, "loss": 0.0, "step": 10476 }, { "epoch": 0.6751949474769607, "grad_norm": 0.0018248634672141091, "learning_rate": 3.6140350877192986e-06, "loss": 0.0, "step": 10477 }, { "epoch": 0.6752593929238899, "grad_norm": 0.000946823727749897, "learning_rate": 3.613319011815253e-06, "loss": 0.0, "step": 10478 }, { "epoch": 0.6753238383708191, "grad_norm": 0.014769051121375172, "learning_rate": 3.612602935911207e-06, "loss": 0.0001, "step": 10479 }, { "epoch": 0.6753882838177483, "grad_norm": 0.0037277822552123324, "learning_rate": 3.611886860007161e-06, "loss": 0.0, "step": 10480 }, { "epoch": 0.6754527292646775, "grad_norm": 0.003775872304534592, "learning_rate": 3.611170784103115e-06, "loss": 0.0, "step": 10481 }, { "epoch": 0.6755171747116067, "grad_norm": 0.0004943616280413878, "learning_rate": 3.6104547081990693e-06, "loss": 0.0, "step": 10482 }, { "epoch": 0.6755816201585358, "grad_norm": 1.1965923065115471, "learning_rate": 3.6097386322950236e-06, "loss": 0.0035, "step": 10483 }, { "epoch": 0.675646065605465, "grad_norm": 0.04179766004707495, "learning_rate": 3.6090225563909775e-06, "loss": 0.0001, "step": 10484 }, { "epoch": 0.6757105110523941, "grad_norm": 0.006248109641524533, "learning_rate": 3.6083064804869318e-06, "loss": 0.0, "step": 10485 }, { "epoch": 0.6757749564993233, "grad_norm": 0.0003839978250398899, "learning_rate": 3.6075904045828865e-06, "loss": 0.0, "step": 10486 }, { "epoch": 0.6758394019462525, "grad_norm": 0.006934538377296952, "learning_rate": 3.6068743286788403e-06, "loss": 0.0, "step": 10487 }, { "epoch": 0.6759038473931817, "grad_norm": 0.00016873140626715315, "learning_rate": 3.6061582527747946e-06, "loss": 0.0, "step": 10488 }, { "epoch": 0.6759682928401108, "grad_norm": 0.01762084273842604, "learning_rate": 3.6054421768707485e-06, "loss": 0.0, "step": 10489 }, { "epoch": 0.67603273828704, "grad_norm": 0.46920061704573907, "learning_rate": 3.604726100966703e-06, "loss": 0.0009, "step": 10490 }, { "epoch": 0.6760971837339692, "grad_norm": 0.05753273553305687, "learning_rate": 3.604010025062657e-06, "loss": 0.0004, "step": 10491 }, { "epoch": 0.6761616291808984, "grad_norm": 0.0003436129141898001, "learning_rate": 3.603293949158611e-06, "loss": 0.0, "step": 10492 }, { "epoch": 0.6762260746278276, "grad_norm": 0.0037809435500818145, "learning_rate": 3.6025778732545653e-06, "loss": 0.0, "step": 10493 }, { "epoch": 0.6762905200747568, "grad_norm": 0.0003521812249406442, "learning_rate": 3.601861797350519e-06, "loss": 0.0, "step": 10494 }, { "epoch": 0.676354965521686, "grad_norm": 0.00025709992147468773, "learning_rate": 3.6011457214464735e-06, "loss": 0.0, "step": 10495 }, { "epoch": 0.676419410968615, "grad_norm": 0.005573763881191372, "learning_rate": 3.6004296455424278e-06, "loss": 0.0, "step": 10496 }, { "epoch": 0.6764838564155442, "grad_norm": 0.02430756228068615, "learning_rate": 3.5997135696383816e-06, "loss": 0.0, "step": 10497 }, { "epoch": 0.6765483018624734, "grad_norm": 0.017898788840930383, "learning_rate": 3.5989974937343364e-06, "loss": 0.0, "step": 10498 }, { "epoch": 0.6766127473094026, "grad_norm": 0.008393354787299115, "learning_rate": 3.5982814178302907e-06, "loss": 0.0, "step": 10499 }, { "epoch": 0.6766771927563318, "grad_norm": 0.003671404471434068, "learning_rate": 3.5975653419262445e-06, "loss": 0.0, "step": 10500 }, { "epoch": 0.6767416382032609, "grad_norm": 0.0050457432757222654, "learning_rate": 3.596849266022199e-06, "loss": 0.0, "step": 10501 }, { "epoch": 0.6768060836501901, "grad_norm": 0.00114763842394845, "learning_rate": 3.5961331901181527e-06, "loss": 0.0, "step": 10502 }, { "epoch": 0.6768705290971193, "grad_norm": 0.0008545279761466147, "learning_rate": 3.595417114214107e-06, "loss": 0.0, "step": 10503 }, { "epoch": 0.6769349745440485, "grad_norm": 0.00034751624162334137, "learning_rate": 3.5947010383100613e-06, "loss": 0.0, "step": 10504 }, { "epoch": 0.6769994199909777, "grad_norm": 0.015582994439986578, "learning_rate": 3.593984962406015e-06, "loss": 0.0, "step": 10505 }, { "epoch": 0.6770638654379069, "grad_norm": 0.040964320145442906, "learning_rate": 3.5932688865019695e-06, "loss": 0.0001, "step": 10506 }, { "epoch": 0.6771283108848359, "grad_norm": 0.010853452613408077, "learning_rate": 3.5925528105979234e-06, "loss": 0.0001, "step": 10507 }, { "epoch": 0.6771927563317651, "grad_norm": 0.02525209498935422, "learning_rate": 3.5918367346938777e-06, "loss": 0.0001, "step": 10508 }, { "epoch": 0.6772572017786943, "grad_norm": 0.022178140745299144, "learning_rate": 3.5911206587898324e-06, "loss": 0.0, "step": 10509 }, { "epoch": 0.6773216472256235, "grad_norm": 0.008376311699932986, "learning_rate": 3.5904045828857863e-06, "loss": 0.0001, "step": 10510 }, { "epoch": 0.6773860926725527, "grad_norm": 0.001207447161221422, "learning_rate": 3.5896885069817406e-06, "loss": 0.0, "step": 10511 }, { "epoch": 0.6774505381194819, "grad_norm": 0.0029356591079545027, "learning_rate": 3.588972431077695e-06, "loss": 0.0, "step": 10512 }, { "epoch": 0.677514983566411, "grad_norm": 0.017201078670219305, "learning_rate": 3.5882563551736487e-06, "loss": 0.0, "step": 10513 }, { "epoch": 0.6775794290133402, "grad_norm": 0.00014996079096439788, "learning_rate": 3.587540279269603e-06, "loss": 0.0, "step": 10514 }, { "epoch": 0.6776438744602694, "grad_norm": 0.01063396315857339, "learning_rate": 3.586824203365557e-06, "loss": 0.0001, "step": 10515 }, { "epoch": 0.6777083199071986, "grad_norm": 0.000691680774477271, "learning_rate": 3.586108127461511e-06, "loss": 0.0, "step": 10516 }, { "epoch": 0.6777727653541278, "grad_norm": 0.057157545980472034, "learning_rate": 3.585392051557465e-06, "loss": 0.0002, "step": 10517 }, { "epoch": 0.6778372108010569, "grad_norm": 0.00029040852215511486, "learning_rate": 3.5846759756534194e-06, "loss": 0.0, "step": 10518 }, { "epoch": 0.677901656247986, "grad_norm": 0.003804059215826307, "learning_rate": 3.5839598997493737e-06, "loss": 0.0, "step": 10519 }, { "epoch": 0.6779661016949152, "grad_norm": 0.009584564853263446, "learning_rate": 3.5832438238453275e-06, "loss": 0.0001, "step": 10520 }, { "epoch": 0.6780305471418444, "grad_norm": 0.0015142631223244963, "learning_rate": 3.5825277479412823e-06, "loss": 0.0, "step": 10521 }, { "epoch": 0.6780949925887736, "grad_norm": 0.0005718388729823543, "learning_rate": 3.5818116720372366e-06, "loss": 0.0, "step": 10522 }, { "epoch": 0.6781594380357028, "grad_norm": 0.012310340151570972, "learning_rate": 3.5810955961331904e-06, "loss": 0.0, "step": 10523 }, { "epoch": 0.678223883482632, "grad_norm": 0.00043170374826357976, "learning_rate": 3.5803795202291447e-06, "loss": 0.0, "step": 10524 }, { "epoch": 0.6782883289295611, "grad_norm": 0.027632992934753346, "learning_rate": 3.5796634443250986e-06, "loss": 0.0, "step": 10525 }, { "epoch": 0.6783527743764903, "grad_norm": 0.001345522934734624, "learning_rate": 3.578947368421053e-06, "loss": 0.0, "step": 10526 }, { "epoch": 0.6784172198234195, "grad_norm": 0.0008807635063323086, "learning_rate": 3.5782312925170072e-06, "loss": 0.0, "step": 10527 }, { "epoch": 0.6784816652703487, "grad_norm": 0.002128459962488065, "learning_rate": 3.577515216612961e-06, "loss": 0.0, "step": 10528 }, { "epoch": 0.6785461107172778, "grad_norm": 0.0006520468088653372, "learning_rate": 3.5767991407089154e-06, "loss": 0.0, "step": 10529 }, { "epoch": 0.678610556164207, "grad_norm": 0.02735517573911248, "learning_rate": 3.5760830648048693e-06, "loss": 0.0, "step": 10530 }, { "epoch": 0.6786750016111361, "grad_norm": 0.4512681734435724, "learning_rate": 3.5753669889008236e-06, "loss": 0.0032, "step": 10531 }, { "epoch": 0.6787394470580653, "grad_norm": 0.0036808522723295995, "learning_rate": 3.574650912996778e-06, "loss": 0.0, "step": 10532 }, { "epoch": 0.6788038925049945, "grad_norm": 7.517311356908132e-05, "learning_rate": 3.573934837092732e-06, "loss": 0.0, "step": 10533 }, { "epoch": 0.6788683379519237, "grad_norm": 0.11774557663446128, "learning_rate": 3.5732187611886865e-06, "loss": 0.0018, "step": 10534 }, { "epoch": 0.6789327833988529, "grad_norm": 0.02647121095954798, "learning_rate": 3.5725026852846408e-06, "loss": 0.0002, "step": 10535 }, { "epoch": 0.6789972288457821, "grad_norm": 0.035277858321916856, "learning_rate": 3.5717866093805946e-06, "loss": 0.0, "step": 10536 }, { "epoch": 0.6790616742927112, "grad_norm": 0.41236802431897623, "learning_rate": 3.571070533476549e-06, "loss": 0.0012, "step": 10537 }, { "epoch": 0.6791261197396404, "grad_norm": 0.0002817103551778652, "learning_rate": 3.570354457572503e-06, "loss": 0.0, "step": 10538 }, { "epoch": 0.6791905651865696, "grad_norm": 0.0067230817027691785, "learning_rate": 3.569638381668457e-06, "loss": 0.0, "step": 10539 }, { "epoch": 0.6792550106334987, "grad_norm": 0.0019842109186937367, "learning_rate": 3.5689223057644114e-06, "loss": 0.0, "step": 10540 }, { "epoch": 0.6793194560804279, "grad_norm": 0.0008768804890963424, "learning_rate": 3.5682062298603653e-06, "loss": 0.0, "step": 10541 }, { "epoch": 0.6793839015273571, "grad_norm": 0.052039909390406425, "learning_rate": 3.5674901539563196e-06, "loss": 0.0, "step": 10542 }, { "epoch": 0.6794483469742862, "grad_norm": 0.021989190979891722, "learning_rate": 3.5667740780522735e-06, "loss": 0.0, "step": 10543 }, { "epoch": 0.6795127924212154, "grad_norm": 0.0007638219482915713, "learning_rate": 3.5660580021482278e-06, "loss": 0.0, "step": 10544 }, { "epoch": 0.6795772378681446, "grad_norm": 1.0232287477942066, "learning_rate": 3.5653419262441825e-06, "loss": 0.0047, "step": 10545 }, { "epoch": 0.6796416833150738, "grad_norm": 0.001587062494048603, "learning_rate": 3.5646258503401363e-06, "loss": 0.0, "step": 10546 }, { "epoch": 0.679706128762003, "grad_norm": 0.018462534965671778, "learning_rate": 3.5639097744360906e-06, "loss": 0.0, "step": 10547 }, { "epoch": 0.6797705742089322, "grad_norm": 0.013377734314796297, "learning_rate": 3.563193698532045e-06, "loss": 0.0, "step": 10548 }, { "epoch": 0.6798350196558614, "grad_norm": 0.9623252028734142, "learning_rate": 3.562477622627999e-06, "loss": 0.0018, "step": 10549 }, { "epoch": 0.6798994651027905, "grad_norm": 0.0017705625053728948, "learning_rate": 3.561761546723953e-06, "loss": 0.0, "step": 10550 }, { "epoch": 0.6799639105497196, "grad_norm": 0.00017713197781208631, "learning_rate": 3.561045470819907e-06, "loss": 0.0, "step": 10551 }, { "epoch": 0.6800283559966488, "grad_norm": 0.020427811779944585, "learning_rate": 3.5603293949158613e-06, "loss": 0.0, "step": 10552 }, { "epoch": 0.680092801443578, "grad_norm": 2.4890916304191784, "learning_rate": 3.5596133190118156e-06, "loss": 0.0203, "step": 10553 }, { "epoch": 0.6801572468905072, "grad_norm": 0.001329291279453643, "learning_rate": 3.5588972431077695e-06, "loss": 0.0, "step": 10554 }, { "epoch": 0.6802216923374363, "grad_norm": 0.1980014711359068, "learning_rate": 3.5581811672037238e-06, "loss": 0.0014, "step": 10555 }, { "epoch": 0.6802861377843655, "grad_norm": 0.0051990412329180015, "learning_rate": 3.5574650912996785e-06, "loss": 0.0, "step": 10556 }, { "epoch": 0.6803505832312947, "grad_norm": 0.017665047112924433, "learning_rate": 3.5567490153956324e-06, "loss": 0.0, "step": 10557 }, { "epoch": 0.6804150286782239, "grad_norm": 0.001657024012985856, "learning_rate": 3.5560329394915867e-06, "loss": 0.0, "step": 10558 }, { "epoch": 0.6804794741251531, "grad_norm": 0.0008793134489039452, "learning_rate": 3.5553168635875405e-06, "loss": 0.0, "step": 10559 }, { "epoch": 0.6805439195720823, "grad_norm": 0.0007335920395671671, "learning_rate": 3.554600787683495e-06, "loss": 0.0, "step": 10560 }, { "epoch": 0.6806083650190115, "grad_norm": 0.0045903698338804495, "learning_rate": 3.553884711779449e-06, "loss": 0.0, "step": 10561 }, { "epoch": 0.6806728104659406, "grad_norm": 0.000662578100139206, "learning_rate": 3.553168635875403e-06, "loss": 0.0, "step": 10562 }, { "epoch": 0.6807372559128697, "grad_norm": 0.0009688112778837717, "learning_rate": 3.5524525599713573e-06, "loss": 0.0, "step": 10563 }, { "epoch": 0.6808017013597989, "grad_norm": 0.0014027335978677121, "learning_rate": 3.551736484067311e-06, "loss": 0.0, "step": 10564 }, { "epoch": 0.6808661468067281, "grad_norm": 6.014109765455335e-05, "learning_rate": 3.5510204081632655e-06, "loss": 0.0, "step": 10565 }, { "epoch": 0.6809305922536573, "grad_norm": 0.0012696360857282496, "learning_rate": 3.5503043322592194e-06, "loss": 0.0, "step": 10566 }, { "epoch": 0.6809950377005864, "grad_norm": 0.0020264019625621287, "learning_rate": 3.5495882563551737e-06, "loss": 0.0, "step": 10567 }, { "epoch": 0.6810594831475156, "grad_norm": 0.0001300260332313125, "learning_rate": 3.5488721804511284e-06, "loss": 0.0, "step": 10568 }, { "epoch": 0.6811239285944448, "grad_norm": 0.11144132046079638, "learning_rate": 3.5481561045470827e-06, "loss": 0.0001, "step": 10569 }, { "epoch": 0.681188374041374, "grad_norm": 0.0040325540813843305, "learning_rate": 3.5474400286430366e-06, "loss": 0.0, "step": 10570 }, { "epoch": 0.6812528194883032, "grad_norm": 0.00011549701540412366, "learning_rate": 3.546723952738991e-06, "loss": 0.0, "step": 10571 }, { "epoch": 0.6813172649352324, "grad_norm": 0.011268333555497053, "learning_rate": 3.5460078768349447e-06, "loss": 0.0001, "step": 10572 }, { "epoch": 0.6813817103821616, "grad_norm": 0.0006847428079941398, "learning_rate": 3.545291800930899e-06, "loss": 0.0, "step": 10573 }, { "epoch": 0.6814461558290906, "grad_norm": 0.009468708355515205, "learning_rate": 3.544575725026853e-06, "loss": 0.0, "step": 10574 }, { "epoch": 0.6815106012760198, "grad_norm": 0.006315914786455601, "learning_rate": 3.543859649122807e-06, "loss": 0.0, "step": 10575 }, { "epoch": 0.681575046722949, "grad_norm": 0.006827846703311107, "learning_rate": 3.5431435732187615e-06, "loss": 0.0, "step": 10576 }, { "epoch": 0.6816394921698782, "grad_norm": 0.029604628539176416, "learning_rate": 3.5424274973147154e-06, "loss": 0.0002, "step": 10577 }, { "epoch": 0.6817039376168074, "grad_norm": 0.051215081592810865, "learning_rate": 3.5417114214106697e-06, "loss": 0.0002, "step": 10578 }, { "epoch": 0.6817683830637365, "grad_norm": 0.016480723087823855, "learning_rate": 3.5409953455066235e-06, "loss": 0.0001, "step": 10579 }, { "epoch": 0.6818328285106657, "grad_norm": 0.0006766804201587516, "learning_rate": 3.5402792696025783e-06, "loss": 0.0, "step": 10580 }, { "epoch": 0.6818972739575949, "grad_norm": 0.0019866411763065327, "learning_rate": 3.5395631936985326e-06, "loss": 0.0, "step": 10581 }, { "epoch": 0.6819617194045241, "grad_norm": 0.0004252701808851787, "learning_rate": 3.5388471177944864e-06, "loss": 0.0, "step": 10582 }, { "epoch": 0.6820261648514533, "grad_norm": 0.00031296475467953647, "learning_rate": 3.5381310418904407e-06, "loss": 0.0, "step": 10583 }, { "epoch": 0.6820906102983825, "grad_norm": 0.00832683055652773, "learning_rate": 3.537414965986395e-06, "loss": 0.0, "step": 10584 }, { "epoch": 0.6821550557453115, "grad_norm": 0.021867903048846355, "learning_rate": 3.536698890082349e-06, "loss": 0.0002, "step": 10585 }, { "epoch": 0.6822195011922407, "grad_norm": 0.48420196949728816, "learning_rate": 3.5359828141783032e-06, "loss": 0.0034, "step": 10586 }, { "epoch": 0.6822839466391699, "grad_norm": 0.0024605339805774456, "learning_rate": 3.535266738274257e-06, "loss": 0.0, "step": 10587 }, { "epoch": 0.6823483920860991, "grad_norm": 0.011360759604440345, "learning_rate": 3.5345506623702114e-06, "loss": 0.0, "step": 10588 }, { "epoch": 0.6824128375330283, "grad_norm": 0.0009775462378554197, "learning_rate": 3.5338345864661657e-06, "loss": 0.0, "step": 10589 }, { "epoch": 0.6824772829799575, "grad_norm": 0.001092895735309418, "learning_rate": 3.5331185105621196e-06, "loss": 0.0, "step": 10590 }, { "epoch": 0.6825417284268867, "grad_norm": 0.0027246687183617134, "learning_rate": 3.5324024346580743e-06, "loss": 0.0, "step": 10591 }, { "epoch": 0.6826061738738158, "grad_norm": 0.0007618257843385489, "learning_rate": 3.5316863587540286e-06, "loss": 0.0, "step": 10592 }, { "epoch": 0.682670619320745, "grad_norm": 0.18731919153917567, "learning_rate": 3.5309702828499825e-06, "loss": 0.0006, "step": 10593 }, { "epoch": 0.6827350647676742, "grad_norm": 0.00033679473596064693, "learning_rate": 3.5302542069459368e-06, "loss": 0.0, "step": 10594 }, { "epoch": 0.6827995102146034, "grad_norm": 0.3369809402210824, "learning_rate": 3.5295381310418906e-06, "loss": 0.0029, "step": 10595 }, { "epoch": 0.6828639556615325, "grad_norm": 0.007815817597920173, "learning_rate": 3.528822055137845e-06, "loss": 0.0, "step": 10596 }, { "epoch": 0.6829284011084616, "grad_norm": 0.01382235643620594, "learning_rate": 3.5281059792337992e-06, "loss": 0.0, "step": 10597 }, { "epoch": 0.6829928465553908, "grad_norm": 0.0020800628314902774, "learning_rate": 3.527389903329753e-06, "loss": 0.0, "step": 10598 }, { "epoch": 0.68305729200232, "grad_norm": 0.014260892033320325, "learning_rate": 3.5266738274257074e-06, "loss": 0.0, "step": 10599 }, { "epoch": 0.6831217374492492, "grad_norm": 0.0028445670215190093, "learning_rate": 3.5259577515216613e-06, "loss": 0.0, "step": 10600 }, { "epoch": 0.6831861828961784, "grad_norm": 0.0008771312919833636, "learning_rate": 3.5252416756176156e-06, "loss": 0.0, "step": 10601 }, { "epoch": 0.6832506283431076, "grad_norm": 0.00018122247011554844, "learning_rate": 3.52452559971357e-06, "loss": 0.0, "step": 10602 }, { "epoch": 0.6833150737900368, "grad_norm": 0.0008672090977392164, "learning_rate": 3.523809523809524e-06, "loss": 0.0, "step": 10603 }, { "epoch": 0.6833795192369659, "grad_norm": 0.0035352795664838213, "learning_rate": 3.5230934479054785e-06, "loss": 0.0, "step": 10604 }, { "epoch": 0.6834439646838951, "grad_norm": 0.0002724304476376662, "learning_rate": 3.5223773720014328e-06, "loss": 0.0, "step": 10605 }, { "epoch": 0.6835084101308243, "grad_norm": 0.5370559665829012, "learning_rate": 3.5216612960973866e-06, "loss": 0.0063, "step": 10606 }, { "epoch": 0.6835728555777534, "grad_norm": 0.0007439298862113308, "learning_rate": 3.520945220193341e-06, "loss": 0.0, "step": 10607 }, { "epoch": 0.6836373010246826, "grad_norm": 0.009702558574781294, "learning_rate": 3.520229144289295e-06, "loss": 0.0, "step": 10608 }, { "epoch": 0.6837017464716117, "grad_norm": 0.0011016534404801282, "learning_rate": 3.519513068385249e-06, "loss": 0.0, "step": 10609 }, { "epoch": 0.6837661919185409, "grad_norm": 0.017491655293344472, "learning_rate": 3.5187969924812034e-06, "loss": 0.0015, "step": 10610 }, { "epoch": 0.6838306373654701, "grad_norm": 0.173947823491574, "learning_rate": 3.5180809165771573e-06, "loss": 0.0015, "step": 10611 }, { "epoch": 0.6838950828123993, "grad_norm": 0.0004411864460499935, "learning_rate": 3.5173648406731116e-06, "loss": 0.0, "step": 10612 }, { "epoch": 0.6839595282593285, "grad_norm": 0.05571853331626954, "learning_rate": 3.5166487647690655e-06, "loss": 0.0004, "step": 10613 }, { "epoch": 0.6840239737062577, "grad_norm": 0.0010041793745665642, "learning_rate": 3.5159326888650198e-06, "loss": 0.0, "step": 10614 }, { "epoch": 0.6840884191531869, "grad_norm": 0.0983773975207399, "learning_rate": 3.5152166129609745e-06, "loss": 0.0002, "step": 10615 }, { "epoch": 0.684152864600116, "grad_norm": 0.028895436137705906, "learning_rate": 3.5145005370569284e-06, "loss": 0.0, "step": 10616 }, { "epoch": 0.6842173100470452, "grad_norm": 0.03385962597373734, "learning_rate": 3.5137844611528827e-06, "loss": 0.0, "step": 10617 }, { "epoch": 0.6842817554939743, "grad_norm": 0.023507120372402705, "learning_rate": 3.513068385248837e-06, "loss": 0.0002, "step": 10618 }, { "epoch": 0.6843462009409035, "grad_norm": 0.024889008607317492, "learning_rate": 3.512352309344791e-06, "loss": 0.0, "step": 10619 }, { "epoch": 0.6844106463878327, "grad_norm": 0.0014179847128868335, "learning_rate": 3.511636233440745e-06, "loss": 0.0, "step": 10620 }, { "epoch": 0.6844750918347619, "grad_norm": 0.013406477162943287, "learning_rate": 3.510920157536699e-06, "loss": 0.0, "step": 10621 }, { "epoch": 0.684539537281691, "grad_norm": 0.28027808953122846, "learning_rate": 3.5102040816326533e-06, "loss": 0.0023, "step": 10622 }, { "epoch": 0.6846039827286202, "grad_norm": 0.00861103756734106, "learning_rate": 3.509488005728607e-06, "loss": 0.0, "step": 10623 }, { "epoch": 0.6846684281755494, "grad_norm": 0.09979447762360293, "learning_rate": 3.5087719298245615e-06, "loss": 0.0017, "step": 10624 }, { "epoch": 0.6847328736224786, "grad_norm": 0.08866655842139125, "learning_rate": 3.5080558539205158e-06, "loss": 0.0017, "step": 10625 }, { "epoch": 0.6847973190694078, "grad_norm": 0.09441540926634437, "learning_rate": 3.5073397780164705e-06, "loss": 0.0024, "step": 10626 }, { "epoch": 0.684861764516337, "grad_norm": 0.10564041736905677, "learning_rate": 3.5066237021124244e-06, "loss": 0.0009, "step": 10627 }, { "epoch": 0.6849262099632661, "grad_norm": 0.00668683870026609, "learning_rate": 3.5059076262083787e-06, "loss": 0.0, "step": 10628 }, { "epoch": 0.6849906554101952, "grad_norm": 0.007881264256661852, "learning_rate": 3.5051915503043326e-06, "loss": 0.0, "step": 10629 }, { "epoch": 0.6850551008571244, "grad_norm": 0.009405162217880085, "learning_rate": 3.504475474400287e-06, "loss": 0.0, "step": 10630 }, { "epoch": 0.6851195463040536, "grad_norm": 0.002327160651186275, "learning_rate": 3.5037593984962407e-06, "loss": 0.0, "step": 10631 }, { "epoch": 0.6851839917509828, "grad_norm": 0.02192513837913387, "learning_rate": 3.503043322592195e-06, "loss": 0.0, "step": 10632 }, { "epoch": 0.685248437197912, "grad_norm": 4.1137151297167716e-05, "learning_rate": 3.5023272466881493e-06, "loss": 0.0, "step": 10633 }, { "epoch": 0.6853128826448411, "grad_norm": 0.26734327883653797, "learning_rate": 3.501611170784103e-06, "loss": 0.0002, "step": 10634 }, { "epoch": 0.6853773280917703, "grad_norm": 0.03286824451174531, "learning_rate": 3.5008950948800575e-06, "loss": 0.0001, "step": 10635 }, { "epoch": 0.6854417735386995, "grad_norm": 0.03350731756160981, "learning_rate": 3.5001790189760114e-06, "loss": 0.0001, "step": 10636 }, { "epoch": 0.6855062189856287, "grad_norm": 0.003075251300806112, "learning_rate": 3.4994629430719657e-06, "loss": 0.0, "step": 10637 }, { "epoch": 0.6855706644325579, "grad_norm": 0.010674466356599537, "learning_rate": 3.4987468671679204e-06, "loss": 0.0, "step": 10638 }, { "epoch": 0.6856351098794871, "grad_norm": 0.013242521425976893, "learning_rate": 3.4980307912638743e-06, "loss": 0.0, "step": 10639 }, { "epoch": 0.6856995553264162, "grad_norm": 0.23402005179956256, "learning_rate": 3.4973147153598286e-06, "loss": 0.0005, "step": 10640 }, { "epoch": 0.6857640007733453, "grad_norm": 0.006823108867421351, "learning_rate": 3.496598639455783e-06, "loss": 0.0, "step": 10641 }, { "epoch": 0.6858284462202745, "grad_norm": 0.005193235394536426, "learning_rate": 3.4958825635517367e-06, "loss": 0.0, "step": 10642 }, { "epoch": 0.6858928916672037, "grad_norm": 0.05758601897356446, "learning_rate": 3.495166487647691e-06, "loss": 0.0001, "step": 10643 }, { "epoch": 0.6859573371141329, "grad_norm": 0.0002420785467362924, "learning_rate": 3.494450411743645e-06, "loss": 0.0, "step": 10644 }, { "epoch": 0.686021782561062, "grad_norm": 0.013903106800684462, "learning_rate": 3.4937343358395992e-06, "loss": 0.0, "step": 10645 }, { "epoch": 0.6860862280079912, "grad_norm": 0.0013727656492007864, "learning_rate": 3.4930182599355535e-06, "loss": 0.0, "step": 10646 }, { "epoch": 0.6861506734549204, "grad_norm": 0.005639060991519206, "learning_rate": 3.4923021840315074e-06, "loss": 0.0, "step": 10647 }, { "epoch": 0.6862151189018496, "grad_norm": 0.00012059895211766672, "learning_rate": 3.4915861081274617e-06, "loss": 0.0, "step": 10648 }, { "epoch": 0.6862795643487788, "grad_norm": 0.000295021304383494, "learning_rate": 3.4908700322234156e-06, "loss": 0.0, "step": 10649 }, { "epoch": 0.686344009795708, "grad_norm": 0.006692717847118855, "learning_rate": 3.4901539563193703e-06, "loss": 0.0, "step": 10650 }, { "epoch": 0.6864084552426372, "grad_norm": 0.0008308969576633938, "learning_rate": 3.4894378804153246e-06, "loss": 0.0, "step": 10651 }, { "epoch": 0.6864729006895662, "grad_norm": 0.02864746925349982, "learning_rate": 3.4887218045112785e-06, "loss": 0.0001, "step": 10652 }, { "epoch": 0.6865373461364954, "grad_norm": 0.0004295749878789247, "learning_rate": 3.4880057286072328e-06, "loss": 0.0, "step": 10653 }, { "epoch": 0.6866017915834246, "grad_norm": 0.0003032108960250809, "learning_rate": 3.487289652703187e-06, "loss": 0.0, "step": 10654 }, { "epoch": 0.6866662370303538, "grad_norm": 0.0032508764152708682, "learning_rate": 3.486573576799141e-06, "loss": 0.0, "step": 10655 }, { "epoch": 0.686730682477283, "grad_norm": 0.3218022338317263, "learning_rate": 3.4858575008950952e-06, "loss": 0.0021, "step": 10656 }, { "epoch": 0.6867951279242122, "grad_norm": 0.013413374439972325, "learning_rate": 3.485141424991049e-06, "loss": 0.0001, "step": 10657 }, { "epoch": 0.6868595733711413, "grad_norm": 0.0022459060523140166, "learning_rate": 3.4844253490870034e-06, "loss": 0.0, "step": 10658 }, { "epoch": 0.6869240188180705, "grad_norm": 0.0025463195329138914, "learning_rate": 3.4837092731829573e-06, "loss": 0.0, "step": 10659 }, { "epoch": 0.6869884642649997, "grad_norm": 0.07051437260233849, "learning_rate": 3.4829931972789116e-06, "loss": 0.0017, "step": 10660 }, { "epoch": 0.6870529097119289, "grad_norm": 0.002290644215445257, "learning_rate": 3.4822771213748663e-06, "loss": 0.0, "step": 10661 }, { "epoch": 0.6871173551588581, "grad_norm": 0.00039243013710876817, "learning_rate": 3.4815610454708206e-06, "loss": 0.0, "step": 10662 }, { "epoch": 0.6871818006057872, "grad_norm": 0.001251349809574215, "learning_rate": 3.4808449695667745e-06, "loss": 0.0, "step": 10663 }, { "epoch": 0.6872462460527163, "grad_norm": 0.02047947219666481, "learning_rate": 3.4801288936627288e-06, "loss": 0.0001, "step": 10664 }, { "epoch": 0.6873106914996455, "grad_norm": 0.030119726822941218, "learning_rate": 3.4794128177586826e-06, "loss": 0.0001, "step": 10665 }, { "epoch": 0.6873751369465747, "grad_norm": 0.005128161573191081, "learning_rate": 3.478696741854637e-06, "loss": 0.0, "step": 10666 }, { "epoch": 0.6874395823935039, "grad_norm": 0.06837941625274693, "learning_rate": 3.477980665950591e-06, "loss": 0.0001, "step": 10667 }, { "epoch": 0.6875040278404331, "grad_norm": 0.03753905404742778, "learning_rate": 3.477264590046545e-06, "loss": 0.0, "step": 10668 }, { "epoch": 0.6875684732873623, "grad_norm": 0.0034440145309386637, "learning_rate": 3.4765485141424994e-06, "loss": 0.0, "step": 10669 }, { "epoch": 0.6876329187342914, "grad_norm": 0.14563323382401655, "learning_rate": 3.4758324382384533e-06, "loss": 0.0002, "step": 10670 }, { "epoch": 0.6876973641812206, "grad_norm": 0.007722652524045524, "learning_rate": 3.4751163623344076e-06, "loss": 0.0, "step": 10671 }, { "epoch": 0.6877618096281498, "grad_norm": 0.0032592152884733613, "learning_rate": 3.4744002864303615e-06, "loss": 0.0, "step": 10672 }, { "epoch": 0.687826255075079, "grad_norm": 0.008455749548395619, "learning_rate": 3.473684210526316e-06, "loss": 0.0, "step": 10673 }, { "epoch": 0.6878907005220081, "grad_norm": 0.09676174091931598, "learning_rate": 3.4729681346222705e-06, "loss": 0.0003, "step": 10674 }, { "epoch": 0.6879551459689373, "grad_norm": 0.0010924824758405294, "learning_rate": 3.4722520587182244e-06, "loss": 0.0, "step": 10675 }, { "epoch": 0.6880195914158664, "grad_norm": 0.010774355308097124, "learning_rate": 3.4715359828141787e-06, "loss": 0.0016, "step": 10676 }, { "epoch": 0.6880840368627956, "grad_norm": 0.01795689798363324, "learning_rate": 3.470819906910133e-06, "loss": 0.0, "step": 10677 }, { "epoch": 0.6881484823097248, "grad_norm": 0.0027896545235474085, "learning_rate": 3.470103831006087e-06, "loss": 0.0, "step": 10678 }, { "epoch": 0.688212927756654, "grad_norm": 0.22212767183159907, "learning_rate": 3.469387755102041e-06, "loss": 0.0005, "step": 10679 }, { "epoch": 0.6882773732035832, "grad_norm": 0.0003912980966343014, "learning_rate": 3.468671679197995e-06, "loss": 0.0, "step": 10680 }, { "epoch": 0.6883418186505124, "grad_norm": 0.0005156493416187653, "learning_rate": 3.4679556032939493e-06, "loss": 0.0, "step": 10681 }, { "epoch": 0.6884062640974415, "grad_norm": 0.021590134423169444, "learning_rate": 3.4672395273899036e-06, "loss": 0.0, "step": 10682 }, { "epoch": 0.6884707095443707, "grad_norm": 0.023141501828001405, "learning_rate": 3.4665234514858575e-06, "loss": 0.0, "step": 10683 }, { "epoch": 0.6885351549912999, "grad_norm": 0.007118016829719887, "learning_rate": 3.4658073755818118e-06, "loss": 0.0001, "step": 10684 }, { "epoch": 0.688599600438229, "grad_norm": 0.25918209531883035, "learning_rate": 3.4650912996777665e-06, "loss": 0.0005, "step": 10685 }, { "epoch": 0.6886640458851582, "grad_norm": 0.004237713823522767, "learning_rate": 3.4643752237737204e-06, "loss": 0.0, "step": 10686 }, { "epoch": 0.6887284913320874, "grad_norm": 0.038278050860711475, "learning_rate": 3.4636591478696747e-06, "loss": 0.0, "step": 10687 }, { "epoch": 0.6887929367790165, "grad_norm": 0.000426746379782578, "learning_rate": 3.4629430719656286e-06, "loss": 0.0, "step": 10688 }, { "epoch": 0.6888573822259457, "grad_norm": 0.0034051439268017562, "learning_rate": 3.462226996061583e-06, "loss": 0.0, "step": 10689 }, { "epoch": 0.6889218276728749, "grad_norm": 0.018312988913133918, "learning_rate": 3.461510920157537e-06, "loss": 0.0, "step": 10690 }, { "epoch": 0.6889862731198041, "grad_norm": 0.0008704069031677928, "learning_rate": 3.460794844253491e-06, "loss": 0.0, "step": 10691 }, { "epoch": 0.6890507185667333, "grad_norm": 0.00011138326842831115, "learning_rate": 3.4600787683494453e-06, "loss": 0.0, "step": 10692 }, { "epoch": 0.6891151640136625, "grad_norm": 6.343770674378296e-05, "learning_rate": 3.459362692445399e-06, "loss": 0.0, "step": 10693 }, { "epoch": 0.6891796094605916, "grad_norm": 0.000850738362306896, "learning_rate": 3.4586466165413535e-06, "loss": 0.0, "step": 10694 }, { "epoch": 0.6892440549075208, "grad_norm": 0.15241558875373135, "learning_rate": 3.457930540637308e-06, "loss": 0.0005, "step": 10695 }, { "epoch": 0.6893085003544499, "grad_norm": 0.0037602512408664913, "learning_rate": 3.457214464733262e-06, "loss": 0.0, "step": 10696 }, { "epoch": 0.6893729458013791, "grad_norm": 0.16921284611583368, "learning_rate": 3.4564983888292164e-06, "loss": 0.0014, "step": 10697 }, { "epoch": 0.6894373912483083, "grad_norm": 0.04126671726871299, "learning_rate": 3.4557823129251707e-06, "loss": 0.0001, "step": 10698 }, { "epoch": 0.6895018366952375, "grad_norm": 0.24241588960094448, "learning_rate": 3.4550662370211246e-06, "loss": 0.0006, "step": 10699 }, { "epoch": 0.6895662821421666, "grad_norm": 0.0011653326035232546, "learning_rate": 3.454350161117079e-06, "loss": 0.0, "step": 10700 }, { "epoch": 0.6896307275890958, "grad_norm": 0.0053412736709698705, "learning_rate": 3.4536340852130327e-06, "loss": 0.0001, "step": 10701 }, { "epoch": 0.689695173036025, "grad_norm": 0.0027695711960786054, "learning_rate": 3.452918009308987e-06, "loss": 0.0, "step": 10702 }, { "epoch": 0.6897596184829542, "grad_norm": 0.000903931335369121, "learning_rate": 3.4522019334049413e-06, "loss": 0.0, "step": 10703 }, { "epoch": 0.6898240639298834, "grad_norm": 0.0032606382890580307, "learning_rate": 3.4514858575008952e-06, "loss": 0.0, "step": 10704 }, { "epoch": 0.6898885093768126, "grad_norm": 0.0019891495099166236, "learning_rate": 3.4507697815968495e-06, "loss": 0.0, "step": 10705 }, { "epoch": 0.6899529548237417, "grad_norm": 0.014469597284214894, "learning_rate": 3.4500537056928034e-06, "loss": 0.0001, "step": 10706 }, { "epoch": 0.6900174002706708, "grad_norm": 0.0036697474437936265, "learning_rate": 3.4493376297887577e-06, "loss": 0.0001, "step": 10707 }, { "epoch": 0.6900818457176, "grad_norm": 0.001639690389296713, "learning_rate": 3.4486215538847124e-06, "loss": 0.0, "step": 10708 }, { "epoch": 0.6901462911645292, "grad_norm": 0.001005555053943566, "learning_rate": 3.4479054779806663e-06, "loss": 0.0, "step": 10709 }, { "epoch": 0.6902107366114584, "grad_norm": 0.09674031832156237, "learning_rate": 3.4471894020766206e-06, "loss": 0.0002, "step": 10710 }, { "epoch": 0.6902751820583876, "grad_norm": 0.17584697766118745, "learning_rate": 3.446473326172575e-06, "loss": 0.0001, "step": 10711 }, { "epoch": 0.6903396275053167, "grad_norm": 0.0003299470982010914, "learning_rate": 3.4457572502685288e-06, "loss": 0.0, "step": 10712 }, { "epoch": 0.6904040729522459, "grad_norm": 0.0012676769435483054, "learning_rate": 3.445041174364483e-06, "loss": 0.0, "step": 10713 }, { "epoch": 0.6904685183991751, "grad_norm": 0.0001250973702220075, "learning_rate": 3.444325098460437e-06, "loss": 0.0, "step": 10714 }, { "epoch": 0.6905329638461043, "grad_norm": 0.0016230453999726533, "learning_rate": 3.4436090225563912e-06, "loss": 0.0, "step": 10715 }, { "epoch": 0.6905974092930335, "grad_norm": 0.7712714178984845, "learning_rate": 3.442892946652345e-06, "loss": 0.0016, "step": 10716 }, { "epoch": 0.6906618547399627, "grad_norm": 0.012430636333727142, "learning_rate": 3.4421768707482994e-06, "loss": 0.0, "step": 10717 }, { "epoch": 0.6907263001868919, "grad_norm": 0.00038744205676761773, "learning_rate": 3.4414607948442537e-06, "loss": 0.0, "step": 10718 }, { "epoch": 0.6907907456338209, "grad_norm": 0.001783591718322277, "learning_rate": 3.4407447189402076e-06, "loss": 0.0, "step": 10719 }, { "epoch": 0.6908551910807501, "grad_norm": 0.00012095653921781966, "learning_rate": 3.4400286430361623e-06, "loss": 0.0, "step": 10720 }, { "epoch": 0.6909196365276793, "grad_norm": 1.0810774582805374, "learning_rate": 3.4393125671321166e-06, "loss": 0.0064, "step": 10721 }, { "epoch": 0.6909840819746085, "grad_norm": 0.006299809688504188, "learning_rate": 3.4385964912280705e-06, "loss": 0.0, "step": 10722 }, { "epoch": 0.6910485274215377, "grad_norm": 0.002780990180928392, "learning_rate": 3.4378804153240248e-06, "loss": 0.0, "step": 10723 }, { "epoch": 0.6911129728684668, "grad_norm": 0.057636782983209595, "learning_rate": 3.4371643394199786e-06, "loss": 0.0001, "step": 10724 }, { "epoch": 0.691177418315396, "grad_norm": 0.0008725221319251466, "learning_rate": 3.436448263515933e-06, "loss": 0.0, "step": 10725 }, { "epoch": 0.6912418637623252, "grad_norm": 0.026326179625309587, "learning_rate": 3.4357321876118872e-06, "loss": 0.0002, "step": 10726 }, { "epoch": 0.6913063092092544, "grad_norm": 0.521066305189868, "learning_rate": 3.435016111707841e-06, "loss": 0.0036, "step": 10727 }, { "epoch": 0.6913707546561836, "grad_norm": 0.00021508146730564562, "learning_rate": 3.4343000358037954e-06, "loss": 0.0, "step": 10728 }, { "epoch": 0.6914352001031128, "grad_norm": 5.2332481397573e-05, "learning_rate": 3.4335839598997493e-06, "loss": 0.0, "step": 10729 }, { "epoch": 0.6914996455500418, "grad_norm": 0.0025788359544813106, "learning_rate": 3.4328678839957036e-06, "loss": 0.0, "step": 10730 }, { "epoch": 0.691564090996971, "grad_norm": 0.7060573678077907, "learning_rate": 3.4321518080916583e-06, "loss": 0.0049, "step": 10731 }, { "epoch": 0.6916285364439002, "grad_norm": 0.44814332690464703, "learning_rate": 3.431435732187612e-06, "loss": 0.0036, "step": 10732 }, { "epoch": 0.6916929818908294, "grad_norm": 0.3525222816594591, "learning_rate": 3.4307196562835665e-06, "loss": 0.0026, "step": 10733 }, { "epoch": 0.6917574273377586, "grad_norm": 0.10370206238152077, "learning_rate": 3.4300035803795208e-06, "loss": 0.0001, "step": 10734 }, { "epoch": 0.6918218727846878, "grad_norm": 0.00335698601833503, "learning_rate": 3.4292875044754747e-06, "loss": 0.0, "step": 10735 }, { "epoch": 0.691886318231617, "grad_norm": 0.006926952458820839, "learning_rate": 3.428571428571429e-06, "loss": 0.0001, "step": 10736 }, { "epoch": 0.6919507636785461, "grad_norm": 0.0003903578948580782, "learning_rate": 3.427855352667383e-06, "loss": 0.0, "step": 10737 }, { "epoch": 0.6920152091254753, "grad_norm": 0.06039492128259943, "learning_rate": 3.427139276763337e-06, "loss": 0.0016, "step": 10738 }, { "epoch": 0.6920796545724045, "grad_norm": 0.21314970053730722, "learning_rate": 3.4264232008592914e-06, "loss": 0.0006, "step": 10739 }, { "epoch": 0.6921441000193337, "grad_norm": 0.009887192339525711, "learning_rate": 3.4257071249552453e-06, "loss": 0.0001, "step": 10740 }, { "epoch": 0.6922085454662628, "grad_norm": 0.0023324859903385378, "learning_rate": 3.4249910490511996e-06, "loss": 0.0, "step": 10741 }, { "epoch": 0.6922729909131919, "grad_norm": 0.0017754964193178054, "learning_rate": 3.4242749731471535e-06, "loss": 0.0, "step": 10742 }, { "epoch": 0.6923374363601211, "grad_norm": 0.11862733365427512, "learning_rate": 3.423558897243108e-06, "loss": 0.0002, "step": 10743 }, { "epoch": 0.6924018818070503, "grad_norm": 0.12538965409042613, "learning_rate": 3.4228428213390625e-06, "loss": 0.0017, "step": 10744 }, { "epoch": 0.6924663272539795, "grad_norm": 0.1790398036759841, "learning_rate": 3.4221267454350164e-06, "loss": 0.0012, "step": 10745 }, { "epoch": 0.6925307727009087, "grad_norm": 0.016628649121499365, "learning_rate": 3.4214106695309707e-06, "loss": 0.0001, "step": 10746 }, { "epoch": 0.6925952181478379, "grad_norm": 0.003371619886892593, "learning_rate": 3.420694593626925e-06, "loss": 0.0, "step": 10747 }, { "epoch": 0.692659663594767, "grad_norm": 0.24389006154688553, "learning_rate": 3.419978517722879e-06, "loss": 0.0006, "step": 10748 }, { "epoch": 0.6927241090416962, "grad_norm": 0.14232802337674683, "learning_rate": 3.419262441818833e-06, "loss": 0.0003, "step": 10749 }, { "epoch": 0.6927885544886254, "grad_norm": 0.019736813074936816, "learning_rate": 3.418546365914787e-06, "loss": 0.0, "step": 10750 }, { "epoch": 0.6928529999355546, "grad_norm": 0.05097154578008529, "learning_rate": 3.4178302900107413e-06, "loss": 0.0, "step": 10751 }, { "epoch": 0.6929174453824837, "grad_norm": 0.00107446008995132, "learning_rate": 3.4171142141066956e-06, "loss": 0.0, "step": 10752 }, { "epoch": 0.6929818908294129, "grad_norm": 0.012435354325548929, "learning_rate": 3.4163981382026495e-06, "loss": 0.0, "step": 10753 }, { "epoch": 0.693046336276342, "grad_norm": 0.021957133028454156, "learning_rate": 3.415682062298604e-06, "loss": 0.0, "step": 10754 }, { "epoch": 0.6931107817232712, "grad_norm": 0.0002480213227601337, "learning_rate": 3.4149659863945585e-06, "loss": 0.0, "step": 10755 }, { "epoch": 0.6931752271702004, "grad_norm": 0.0623140810818036, "learning_rate": 3.4142499104905124e-06, "loss": 0.0, "step": 10756 }, { "epoch": 0.6932396726171296, "grad_norm": 0.0017141559011994213, "learning_rate": 3.4135338345864667e-06, "loss": 0.0, "step": 10757 }, { "epoch": 0.6933041180640588, "grad_norm": 0.014615192039343805, "learning_rate": 3.4128177586824206e-06, "loss": 0.0, "step": 10758 }, { "epoch": 0.693368563510988, "grad_norm": 0.0017439914087220587, "learning_rate": 3.412101682778375e-06, "loss": 0.0, "step": 10759 }, { "epoch": 0.6934330089579172, "grad_norm": 0.0012153270549224956, "learning_rate": 3.411385606874329e-06, "loss": 0.0, "step": 10760 }, { "epoch": 0.6934974544048463, "grad_norm": 0.014303665596357573, "learning_rate": 3.410669530970283e-06, "loss": 0.0, "step": 10761 }, { "epoch": 0.6935618998517755, "grad_norm": 7.439093973990937e-05, "learning_rate": 3.4099534550662373e-06, "loss": 0.0, "step": 10762 }, { "epoch": 0.6936263452987046, "grad_norm": 0.19960271073953648, "learning_rate": 3.4092373791621912e-06, "loss": 0.0003, "step": 10763 }, { "epoch": 0.6936907907456338, "grad_norm": 0.018784679469903552, "learning_rate": 3.4085213032581455e-06, "loss": 0.0001, "step": 10764 }, { "epoch": 0.693755236192563, "grad_norm": 0.06285531773678558, "learning_rate": 3.4078052273540994e-06, "loss": 0.0003, "step": 10765 }, { "epoch": 0.6938196816394921, "grad_norm": 0.02711289526911007, "learning_rate": 3.407089151450054e-06, "loss": 0.0, "step": 10766 }, { "epoch": 0.6938841270864213, "grad_norm": 0.005589801722090483, "learning_rate": 3.4063730755460084e-06, "loss": 0.0, "step": 10767 }, { "epoch": 0.6939485725333505, "grad_norm": 0.012897300775219056, "learning_rate": 3.4056569996419627e-06, "loss": 0.0001, "step": 10768 }, { "epoch": 0.6940130179802797, "grad_norm": 0.01099284164409652, "learning_rate": 3.4049409237379166e-06, "loss": 0.0001, "step": 10769 }, { "epoch": 0.6940774634272089, "grad_norm": 0.0016891558399507187, "learning_rate": 3.404224847833871e-06, "loss": 0.0, "step": 10770 }, { "epoch": 0.6941419088741381, "grad_norm": 0.006634654576318617, "learning_rate": 3.4035087719298248e-06, "loss": 0.0, "step": 10771 }, { "epoch": 0.6942063543210673, "grad_norm": 0.13746926170005874, "learning_rate": 3.402792696025779e-06, "loss": 0.0001, "step": 10772 }, { "epoch": 0.6942707997679964, "grad_norm": 0.02645937380972612, "learning_rate": 3.402076620121733e-06, "loss": 0.0001, "step": 10773 }, { "epoch": 0.6943352452149255, "grad_norm": 0.0033664323788412985, "learning_rate": 3.4013605442176872e-06, "loss": 0.0, "step": 10774 }, { "epoch": 0.6943996906618547, "grad_norm": 0.0022188361754276117, "learning_rate": 3.4006444683136415e-06, "loss": 0.0, "step": 10775 }, { "epoch": 0.6944641361087839, "grad_norm": 0.0004594087068661742, "learning_rate": 3.3999283924095954e-06, "loss": 0.0, "step": 10776 }, { "epoch": 0.6945285815557131, "grad_norm": 0.0032890205841026223, "learning_rate": 3.3992123165055497e-06, "loss": 0.0, "step": 10777 }, { "epoch": 0.6945930270026422, "grad_norm": 0.07355642935563832, "learning_rate": 3.3984962406015044e-06, "loss": 0.0001, "step": 10778 }, { "epoch": 0.6946574724495714, "grad_norm": 0.019127169783181307, "learning_rate": 3.3977801646974583e-06, "loss": 0.0, "step": 10779 }, { "epoch": 0.6947219178965006, "grad_norm": 0.0010884441376112783, "learning_rate": 3.3970640887934126e-06, "loss": 0.0, "step": 10780 }, { "epoch": 0.6947863633434298, "grad_norm": 0.0023750329760530457, "learning_rate": 3.3963480128893665e-06, "loss": 0.0, "step": 10781 }, { "epoch": 0.694850808790359, "grad_norm": 0.0007830149276151957, "learning_rate": 3.3956319369853208e-06, "loss": 0.0, "step": 10782 }, { "epoch": 0.6949152542372882, "grad_norm": 0.04364455540834853, "learning_rate": 3.394915861081275e-06, "loss": 0.0001, "step": 10783 }, { "epoch": 0.6949796996842174, "grad_norm": 0.016637574609427337, "learning_rate": 3.394199785177229e-06, "loss": 0.0002, "step": 10784 }, { "epoch": 0.6950441451311464, "grad_norm": 0.002434681407404538, "learning_rate": 3.3934837092731832e-06, "loss": 0.0, "step": 10785 }, { "epoch": 0.6951085905780756, "grad_norm": 0.006948871061763611, "learning_rate": 3.392767633369137e-06, "loss": 0.0, "step": 10786 }, { "epoch": 0.6951730360250048, "grad_norm": 0.004640961971091515, "learning_rate": 3.3920515574650914e-06, "loss": 0.0, "step": 10787 }, { "epoch": 0.695237481471934, "grad_norm": 0.00041903898312994556, "learning_rate": 3.3913354815610457e-06, "loss": 0.0, "step": 10788 }, { "epoch": 0.6953019269188632, "grad_norm": 0.053400623619324995, "learning_rate": 3.3906194056569996e-06, "loss": 0.0001, "step": 10789 }, { "epoch": 0.6953663723657924, "grad_norm": 0.0006922881994538185, "learning_rate": 3.3899033297529543e-06, "loss": 0.0, "step": 10790 }, { "epoch": 0.6954308178127215, "grad_norm": 0.009405572997517235, "learning_rate": 3.3891872538489086e-06, "loss": 0.0, "step": 10791 }, { "epoch": 0.6954952632596507, "grad_norm": 0.00023536262169622017, "learning_rate": 3.3884711779448625e-06, "loss": 0.0, "step": 10792 }, { "epoch": 0.6955597087065799, "grad_norm": 0.000701178056448638, "learning_rate": 3.3877551020408168e-06, "loss": 0.0, "step": 10793 }, { "epoch": 0.6956241541535091, "grad_norm": 0.04493621669798482, "learning_rate": 3.3870390261367707e-06, "loss": 0.0001, "step": 10794 }, { "epoch": 0.6956885996004383, "grad_norm": 0.03422308278740534, "learning_rate": 3.386322950232725e-06, "loss": 0.0, "step": 10795 }, { "epoch": 0.6957530450473675, "grad_norm": 0.0007628641240363434, "learning_rate": 3.3856068743286793e-06, "loss": 0.0, "step": 10796 }, { "epoch": 0.6958174904942965, "grad_norm": 9.180779913812038e-05, "learning_rate": 3.384890798424633e-06, "loss": 0.0, "step": 10797 }, { "epoch": 0.6958819359412257, "grad_norm": 0.10470937724474787, "learning_rate": 3.3841747225205874e-06, "loss": 0.0004, "step": 10798 }, { "epoch": 0.6959463813881549, "grad_norm": 0.05540156375470692, "learning_rate": 3.3834586466165413e-06, "loss": 0.0002, "step": 10799 }, { "epoch": 0.6960108268350841, "grad_norm": 0.0035984583674340755, "learning_rate": 3.3827425707124956e-06, "loss": 0.0, "step": 10800 }, { "epoch": 0.6960752722820133, "grad_norm": 0.0015088185612061752, "learning_rate": 3.3820264948084503e-06, "loss": 0.0, "step": 10801 }, { "epoch": 0.6961397177289425, "grad_norm": 0.04853284722866676, "learning_rate": 3.381310418904404e-06, "loss": 0.0001, "step": 10802 }, { "epoch": 0.6962041631758716, "grad_norm": 0.0012557955823111358, "learning_rate": 3.3805943430003585e-06, "loss": 0.0, "step": 10803 }, { "epoch": 0.6962686086228008, "grad_norm": 0.01631435553766888, "learning_rate": 3.379878267096313e-06, "loss": 0.0, "step": 10804 }, { "epoch": 0.69633305406973, "grad_norm": 0.003211445832762659, "learning_rate": 3.3791621911922667e-06, "loss": 0.0, "step": 10805 }, { "epoch": 0.6963974995166592, "grad_norm": 0.0006773092919922552, "learning_rate": 3.378446115288221e-06, "loss": 0.0, "step": 10806 }, { "epoch": 0.6964619449635884, "grad_norm": 0.097851148669192, "learning_rate": 3.377730039384175e-06, "loss": 0.0002, "step": 10807 }, { "epoch": 0.6965263904105174, "grad_norm": 0.000700090125051526, "learning_rate": 3.377013963480129e-06, "loss": 0.0, "step": 10808 }, { "epoch": 0.6965908358574466, "grad_norm": 0.01792088747671209, "learning_rate": 3.376297887576083e-06, "loss": 0.0001, "step": 10809 }, { "epoch": 0.6966552813043758, "grad_norm": 0.004783641588541656, "learning_rate": 3.3755818116720373e-06, "loss": 0.0, "step": 10810 }, { "epoch": 0.696719726751305, "grad_norm": 0.0020453910864311918, "learning_rate": 3.3748657357679916e-06, "loss": 0.0, "step": 10811 }, { "epoch": 0.6967841721982342, "grad_norm": 0.0832148634326752, "learning_rate": 3.3741496598639455e-06, "loss": 0.0005, "step": 10812 }, { "epoch": 0.6968486176451634, "grad_norm": 0.0001007275952549603, "learning_rate": 3.3734335839599002e-06, "loss": 0.0, "step": 10813 }, { "epoch": 0.6969130630920926, "grad_norm": 0.005044496693702052, "learning_rate": 3.3727175080558545e-06, "loss": 0.0, "step": 10814 }, { "epoch": 0.6969775085390217, "grad_norm": 0.17172673180819398, "learning_rate": 3.3720014321518084e-06, "loss": 0.0019, "step": 10815 }, { "epoch": 0.6970419539859509, "grad_norm": 0.0009186030854356705, "learning_rate": 3.3712853562477627e-06, "loss": 0.0, "step": 10816 }, { "epoch": 0.6971063994328801, "grad_norm": 0.012453707151726626, "learning_rate": 3.3705692803437166e-06, "loss": 0.0, "step": 10817 }, { "epoch": 0.6971708448798093, "grad_norm": 0.0009751430932883996, "learning_rate": 3.369853204439671e-06, "loss": 0.0, "step": 10818 }, { "epoch": 0.6972352903267384, "grad_norm": 0.0004079109002148108, "learning_rate": 3.369137128535625e-06, "loss": 0.0, "step": 10819 }, { "epoch": 0.6972997357736676, "grad_norm": 0.2528612640796825, "learning_rate": 3.368421052631579e-06, "loss": 0.0003, "step": 10820 }, { "epoch": 0.6973641812205967, "grad_norm": 0.1139363960253434, "learning_rate": 3.3677049767275333e-06, "loss": 0.0013, "step": 10821 }, { "epoch": 0.6974286266675259, "grad_norm": 0.005500716077919033, "learning_rate": 3.3669889008234872e-06, "loss": 0.0, "step": 10822 }, { "epoch": 0.6974930721144551, "grad_norm": 0.08116801795662411, "learning_rate": 3.3662728249194415e-06, "loss": 0.0001, "step": 10823 }, { "epoch": 0.6975575175613843, "grad_norm": 0.0035024566652384916, "learning_rate": 3.365556749015396e-06, "loss": 0.0, "step": 10824 }, { "epoch": 0.6976219630083135, "grad_norm": 0.015370411768365658, "learning_rate": 3.36484067311135e-06, "loss": 0.0001, "step": 10825 }, { "epoch": 0.6976864084552427, "grad_norm": 0.06178277284897271, "learning_rate": 3.3641245972073044e-06, "loss": 0.0002, "step": 10826 }, { "epoch": 0.6977508539021718, "grad_norm": 0.00037744400225636377, "learning_rate": 3.3634085213032587e-06, "loss": 0.0, "step": 10827 }, { "epoch": 0.697815299349101, "grad_norm": 0.005827135498293183, "learning_rate": 3.3626924453992126e-06, "loss": 0.0, "step": 10828 }, { "epoch": 0.6978797447960302, "grad_norm": 0.22508163560634026, "learning_rate": 3.361976369495167e-06, "loss": 0.0002, "step": 10829 }, { "epoch": 0.6979441902429593, "grad_norm": 0.00026929893020132337, "learning_rate": 3.3612602935911208e-06, "loss": 0.0, "step": 10830 }, { "epoch": 0.6980086356898885, "grad_norm": 0.0020361584289357894, "learning_rate": 3.360544217687075e-06, "loss": 0.0, "step": 10831 }, { "epoch": 0.6980730811368177, "grad_norm": 0.0004221505435830251, "learning_rate": 3.3598281417830294e-06, "loss": 0.0, "step": 10832 }, { "epoch": 0.6981375265837468, "grad_norm": 0.05349353661611049, "learning_rate": 3.3591120658789832e-06, "loss": 0.0001, "step": 10833 }, { "epoch": 0.698201972030676, "grad_norm": 0.05797448742304965, "learning_rate": 3.3583959899749375e-06, "loss": 0.0008, "step": 10834 }, { "epoch": 0.6982664174776052, "grad_norm": 0.0008506722447421768, "learning_rate": 3.3576799140708914e-06, "loss": 0.0, "step": 10835 }, { "epoch": 0.6983308629245344, "grad_norm": 0.00021320946359430795, "learning_rate": 3.356963838166846e-06, "loss": 0.0, "step": 10836 }, { "epoch": 0.6983953083714636, "grad_norm": 0.39711350760972824, "learning_rate": 3.3562477622628004e-06, "loss": 0.0011, "step": 10837 }, { "epoch": 0.6984597538183928, "grad_norm": 0.04127152969316703, "learning_rate": 3.3555316863587543e-06, "loss": 0.0001, "step": 10838 }, { "epoch": 0.6985241992653219, "grad_norm": 0.006618943598043729, "learning_rate": 3.3548156104547086e-06, "loss": 0.0, "step": 10839 }, { "epoch": 0.6985886447122511, "grad_norm": 0.04176915064793223, "learning_rate": 3.354099534550663e-06, "loss": 0.0001, "step": 10840 }, { "epoch": 0.6986530901591802, "grad_norm": 0.0001714131345175587, "learning_rate": 3.3533834586466168e-06, "loss": 0.0, "step": 10841 }, { "epoch": 0.6987175356061094, "grad_norm": 0.0010024964582433073, "learning_rate": 3.352667382742571e-06, "loss": 0.0, "step": 10842 }, { "epoch": 0.6987819810530386, "grad_norm": 0.00040623033050484755, "learning_rate": 3.351951306838525e-06, "loss": 0.0, "step": 10843 }, { "epoch": 0.6988464264999678, "grad_norm": 0.00402618346109113, "learning_rate": 3.3512352309344792e-06, "loss": 0.0, "step": 10844 }, { "epoch": 0.6989108719468969, "grad_norm": 0.22191415897871572, "learning_rate": 3.3505191550304335e-06, "loss": 0.0002, "step": 10845 }, { "epoch": 0.6989753173938261, "grad_norm": 0.014029645455560074, "learning_rate": 3.3498030791263874e-06, "loss": 0.0015, "step": 10846 }, { "epoch": 0.6990397628407553, "grad_norm": 8.324158896226437e-05, "learning_rate": 3.3490870032223417e-06, "loss": 0.0, "step": 10847 }, { "epoch": 0.6991042082876845, "grad_norm": 0.0006412068556455778, "learning_rate": 3.3483709273182964e-06, "loss": 0.0, "step": 10848 }, { "epoch": 0.6991686537346137, "grad_norm": 4.787087265715539e-05, "learning_rate": 3.3476548514142503e-06, "loss": 0.0, "step": 10849 }, { "epoch": 0.6992330991815429, "grad_norm": 0.00030164510962597276, "learning_rate": 3.3469387755102046e-06, "loss": 0.0, "step": 10850 }, { "epoch": 0.699297544628472, "grad_norm": 0.30012512478553754, "learning_rate": 3.3462226996061585e-06, "loss": 0.0024, "step": 10851 }, { "epoch": 0.6993619900754011, "grad_norm": 0.0059113082282672945, "learning_rate": 3.3455066237021128e-06, "loss": 0.0001, "step": 10852 }, { "epoch": 0.6994264355223303, "grad_norm": 0.000438918178855173, "learning_rate": 3.344790547798067e-06, "loss": 0.0, "step": 10853 }, { "epoch": 0.6994908809692595, "grad_norm": 0.0004580738369121127, "learning_rate": 3.344074471894021e-06, "loss": 0.0, "step": 10854 }, { "epoch": 0.6995553264161887, "grad_norm": 0.0008452896284506048, "learning_rate": 3.3433583959899753e-06, "loss": 0.0, "step": 10855 }, { "epoch": 0.6996197718631179, "grad_norm": 0.0020973779835961976, "learning_rate": 3.342642320085929e-06, "loss": 0.0, "step": 10856 }, { "epoch": 0.699684217310047, "grad_norm": 0.0008297177221649112, "learning_rate": 3.3419262441818834e-06, "loss": 0.0, "step": 10857 }, { "epoch": 0.6997486627569762, "grad_norm": 0.0027529704284585976, "learning_rate": 3.3412101682778373e-06, "loss": 0.0, "step": 10858 }, { "epoch": 0.6998131082039054, "grad_norm": 0.0008741259795175414, "learning_rate": 3.3404940923737916e-06, "loss": 0.0, "step": 10859 }, { "epoch": 0.6998775536508346, "grad_norm": 0.0035309142146040173, "learning_rate": 3.3397780164697463e-06, "loss": 0.0, "step": 10860 }, { "epoch": 0.6999419990977638, "grad_norm": 0.4958865867061118, "learning_rate": 3.3390619405657006e-06, "loss": 0.0028, "step": 10861 }, { "epoch": 0.700006444544693, "grad_norm": 0.0072535462915399975, "learning_rate": 3.3383458646616545e-06, "loss": 0.0, "step": 10862 }, { "epoch": 0.7000708899916221, "grad_norm": 0.02617736765886348, "learning_rate": 3.337629788757609e-06, "loss": 0.0, "step": 10863 }, { "epoch": 0.7001353354385512, "grad_norm": 0.025339570434611148, "learning_rate": 3.3369137128535627e-06, "loss": 0.0, "step": 10864 }, { "epoch": 0.7001997808854804, "grad_norm": 0.0001549518610992421, "learning_rate": 3.336197636949517e-06, "loss": 0.0, "step": 10865 }, { "epoch": 0.7002642263324096, "grad_norm": 0.0006099219318847603, "learning_rate": 3.335481561045471e-06, "loss": 0.0, "step": 10866 }, { "epoch": 0.7003286717793388, "grad_norm": 0.00019973177616691002, "learning_rate": 3.334765485141425e-06, "loss": 0.0, "step": 10867 }, { "epoch": 0.700393117226268, "grad_norm": 0.0203871412425311, "learning_rate": 3.3340494092373794e-06, "loss": 0.0, "step": 10868 }, { "epoch": 0.7004575626731971, "grad_norm": 0.00012341835821285828, "learning_rate": 3.3333333333333333e-06, "loss": 0.0, "step": 10869 }, { "epoch": 0.7005220081201263, "grad_norm": 3.691681563023068e-05, "learning_rate": 3.3326172574292876e-06, "loss": 0.0, "step": 10870 }, { "epoch": 0.7005864535670555, "grad_norm": 0.041492466191393934, "learning_rate": 3.3319011815252423e-06, "loss": 0.0002, "step": 10871 }, { "epoch": 0.7006508990139847, "grad_norm": 0.16212739040050764, "learning_rate": 3.3311851056211962e-06, "loss": 0.0005, "step": 10872 }, { "epoch": 0.7007153444609139, "grad_norm": 0.001993052236712686, "learning_rate": 3.3304690297171505e-06, "loss": 0.0, "step": 10873 }, { "epoch": 0.7007797899078431, "grad_norm": 0.0021670367114100574, "learning_rate": 3.3297529538131044e-06, "loss": 0.0, "step": 10874 }, { "epoch": 0.7008442353547721, "grad_norm": 0.0013950064005321817, "learning_rate": 3.3290368779090587e-06, "loss": 0.0, "step": 10875 }, { "epoch": 0.7009086808017013, "grad_norm": 0.0007154109309629973, "learning_rate": 3.328320802005013e-06, "loss": 0.0, "step": 10876 }, { "epoch": 0.7009731262486305, "grad_norm": 0.03324730647386008, "learning_rate": 3.327604726100967e-06, "loss": 0.0003, "step": 10877 }, { "epoch": 0.7010375716955597, "grad_norm": 0.001851781740293082, "learning_rate": 3.326888650196921e-06, "loss": 0.0, "step": 10878 }, { "epoch": 0.7011020171424889, "grad_norm": 0.47544556412340017, "learning_rate": 3.326172574292875e-06, "loss": 0.0027, "step": 10879 }, { "epoch": 0.7011664625894181, "grad_norm": 0.03185670473705229, "learning_rate": 3.3254564983888293e-06, "loss": 0.0, "step": 10880 }, { "epoch": 0.7012309080363472, "grad_norm": 7.054423748793358e-05, "learning_rate": 3.3247404224847836e-06, "loss": 0.0, "step": 10881 }, { "epoch": 0.7012953534832764, "grad_norm": 0.006720652727550893, "learning_rate": 3.3240243465807375e-06, "loss": 0.0001, "step": 10882 }, { "epoch": 0.7013597989302056, "grad_norm": 0.00926893960988273, "learning_rate": 3.3233082706766922e-06, "loss": 0.0, "step": 10883 }, { "epoch": 0.7014242443771348, "grad_norm": 0.0037271138191373935, "learning_rate": 3.3225921947726465e-06, "loss": 0.0, "step": 10884 }, { "epoch": 0.701488689824064, "grad_norm": 0.26028400697964355, "learning_rate": 3.3218761188686004e-06, "loss": 0.0001, "step": 10885 }, { "epoch": 0.7015531352709931, "grad_norm": 0.008150517832203049, "learning_rate": 3.3211600429645547e-06, "loss": 0.0, "step": 10886 }, { "epoch": 0.7016175807179222, "grad_norm": 0.016123935218947682, "learning_rate": 3.3204439670605086e-06, "loss": 0.0, "step": 10887 }, { "epoch": 0.7016820261648514, "grad_norm": 0.0025847379419570832, "learning_rate": 3.319727891156463e-06, "loss": 0.0, "step": 10888 }, { "epoch": 0.7017464716117806, "grad_norm": 0.02264905884294404, "learning_rate": 3.319011815252417e-06, "loss": 0.0, "step": 10889 }, { "epoch": 0.7018109170587098, "grad_norm": 0.012890156146756319, "learning_rate": 3.318295739348371e-06, "loss": 0.0, "step": 10890 }, { "epoch": 0.701875362505639, "grad_norm": 0.0005745350547465109, "learning_rate": 3.3175796634443254e-06, "loss": 0.0, "step": 10891 }, { "epoch": 0.7019398079525682, "grad_norm": 0.015874661805627098, "learning_rate": 3.3168635875402792e-06, "loss": 0.0, "step": 10892 }, { "epoch": 0.7020042533994973, "grad_norm": 0.0008491966764384103, "learning_rate": 3.3161475116362335e-06, "loss": 0.0, "step": 10893 }, { "epoch": 0.7020686988464265, "grad_norm": 0.004415929786913111, "learning_rate": 3.315431435732188e-06, "loss": 0.0, "step": 10894 }, { "epoch": 0.7021331442933557, "grad_norm": 0.04686867154635354, "learning_rate": 3.314715359828142e-06, "loss": 0.0001, "step": 10895 }, { "epoch": 0.7021975897402849, "grad_norm": 0.026635296990076995, "learning_rate": 3.3139992839240964e-06, "loss": 0.0001, "step": 10896 }, { "epoch": 0.702262035187214, "grad_norm": 0.0002269513361057032, "learning_rate": 3.3132832080200507e-06, "loss": 0.0, "step": 10897 }, { "epoch": 0.7023264806341432, "grad_norm": 0.0008907517858752973, "learning_rate": 3.3125671321160046e-06, "loss": 0.0, "step": 10898 }, { "epoch": 0.7023909260810723, "grad_norm": 0.13432873522726482, "learning_rate": 3.311851056211959e-06, "loss": 0.0003, "step": 10899 }, { "epoch": 0.7024553715280015, "grad_norm": 0.005984730876438511, "learning_rate": 3.3111349803079128e-06, "loss": 0.0, "step": 10900 }, { "epoch": 0.7025198169749307, "grad_norm": 0.17898883995504597, "learning_rate": 3.310418904403867e-06, "loss": 0.0002, "step": 10901 }, { "epoch": 0.7025842624218599, "grad_norm": 0.00810524339822239, "learning_rate": 3.3097028284998214e-06, "loss": 0.0, "step": 10902 }, { "epoch": 0.7026487078687891, "grad_norm": 0.002444304481964341, "learning_rate": 3.3089867525957752e-06, "loss": 0.0, "step": 10903 }, { "epoch": 0.7027131533157183, "grad_norm": 0.0017770346103979729, "learning_rate": 3.3082706766917295e-06, "loss": 0.0, "step": 10904 }, { "epoch": 0.7027775987626474, "grad_norm": 0.001104809674803772, "learning_rate": 3.3075546007876834e-06, "loss": 0.0, "step": 10905 }, { "epoch": 0.7028420442095766, "grad_norm": 0.009800180749657906, "learning_rate": 3.306838524883638e-06, "loss": 0.0, "step": 10906 }, { "epoch": 0.7029064896565058, "grad_norm": 0.0026749097474474373, "learning_rate": 3.3061224489795924e-06, "loss": 0.0, "step": 10907 }, { "epoch": 0.7029709351034349, "grad_norm": 0.01313033705099765, "learning_rate": 3.3054063730755463e-06, "loss": 0.0001, "step": 10908 }, { "epoch": 0.7030353805503641, "grad_norm": 0.00436655342451471, "learning_rate": 3.3046902971715006e-06, "loss": 0.0, "step": 10909 }, { "epoch": 0.7030998259972933, "grad_norm": 0.007957100060090051, "learning_rate": 3.303974221267455e-06, "loss": 0.0, "step": 10910 }, { "epoch": 0.7031642714442224, "grad_norm": 0.6992021270992388, "learning_rate": 3.3032581453634088e-06, "loss": 0.0096, "step": 10911 }, { "epoch": 0.7032287168911516, "grad_norm": 0.004975517335532034, "learning_rate": 3.302542069459363e-06, "loss": 0.0, "step": 10912 }, { "epoch": 0.7032931623380808, "grad_norm": 0.013508631034990674, "learning_rate": 3.301825993555317e-06, "loss": 0.0, "step": 10913 }, { "epoch": 0.70335760778501, "grad_norm": 0.8323221906874271, "learning_rate": 3.3011099176512713e-06, "loss": 0.0153, "step": 10914 }, { "epoch": 0.7034220532319392, "grad_norm": 0.0002831184487363148, "learning_rate": 3.300393841747225e-06, "loss": 0.0, "step": 10915 }, { "epoch": 0.7034864986788684, "grad_norm": 0.014769501888290858, "learning_rate": 3.2996777658431794e-06, "loss": 0.0, "step": 10916 }, { "epoch": 0.7035509441257976, "grad_norm": 0.4236323875892939, "learning_rate": 3.2989616899391337e-06, "loss": 0.0031, "step": 10917 }, { "epoch": 0.7036153895727267, "grad_norm": 0.011189639856460357, "learning_rate": 3.2982456140350885e-06, "loss": 0.0, "step": 10918 }, { "epoch": 0.7036798350196558, "grad_norm": 0.00829430680010469, "learning_rate": 3.2975295381310423e-06, "loss": 0.0, "step": 10919 }, { "epoch": 0.703744280466585, "grad_norm": 0.0039546963976976515, "learning_rate": 3.2968134622269966e-06, "loss": 0.0, "step": 10920 }, { "epoch": 0.7038087259135142, "grad_norm": 0.01591126280223268, "learning_rate": 3.2960973863229505e-06, "loss": 0.0002, "step": 10921 }, { "epoch": 0.7038731713604434, "grad_norm": 0.012323091831577818, "learning_rate": 3.295381310418905e-06, "loss": 0.0, "step": 10922 }, { "epoch": 0.7039376168073725, "grad_norm": 0.0007462944453321257, "learning_rate": 3.2946652345148587e-06, "loss": 0.0, "step": 10923 }, { "epoch": 0.7040020622543017, "grad_norm": 0.0035263945187977517, "learning_rate": 3.293949158610813e-06, "loss": 0.0, "step": 10924 }, { "epoch": 0.7040665077012309, "grad_norm": 0.006692252532699696, "learning_rate": 3.2932330827067673e-06, "loss": 0.0, "step": 10925 }, { "epoch": 0.7041309531481601, "grad_norm": 0.011862744356579047, "learning_rate": 3.292517006802721e-06, "loss": 0.0, "step": 10926 }, { "epoch": 0.7041953985950893, "grad_norm": 0.0009588384878591355, "learning_rate": 3.2918009308986754e-06, "loss": 0.0, "step": 10927 }, { "epoch": 0.7042598440420185, "grad_norm": 0.06882280888848988, "learning_rate": 3.2910848549946293e-06, "loss": 0.0002, "step": 10928 }, { "epoch": 0.7043242894889477, "grad_norm": 0.03040901424031329, "learning_rate": 3.2903687790905836e-06, "loss": 0.0, "step": 10929 }, { "epoch": 0.7043887349358767, "grad_norm": 0.003522038167252063, "learning_rate": 3.2896527031865383e-06, "loss": 0.0, "step": 10930 }, { "epoch": 0.7044531803828059, "grad_norm": 0.15972103322031145, "learning_rate": 3.2889366272824922e-06, "loss": 0.0018, "step": 10931 }, { "epoch": 0.7045176258297351, "grad_norm": 0.06893447717889657, "learning_rate": 3.2882205513784465e-06, "loss": 0.0001, "step": 10932 }, { "epoch": 0.7045820712766643, "grad_norm": 3.0882135037847522, "learning_rate": 3.287504475474401e-06, "loss": 0.0268, "step": 10933 }, { "epoch": 0.7046465167235935, "grad_norm": 0.139790129637713, "learning_rate": 3.2867883995703547e-06, "loss": 0.0003, "step": 10934 }, { "epoch": 0.7047109621705226, "grad_norm": 0.01926119261248183, "learning_rate": 3.286072323666309e-06, "loss": 0.0, "step": 10935 }, { "epoch": 0.7047754076174518, "grad_norm": 0.003360752891597998, "learning_rate": 3.285356247762263e-06, "loss": 0.0, "step": 10936 }, { "epoch": 0.704839853064381, "grad_norm": 0.02846292441579501, "learning_rate": 3.284640171858217e-06, "loss": 0.0, "step": 10937 }, { "epoch": 0.7049042985113102, "grad_norm": 0.010289595574202861, "learning_rate": 3.2839240959541715e-06, "loss": 0.0, "step": 10938 }, { "epoch": 0.7049687439582394, "grad_norm": 0.022801437024493606, "learning_rate": 3.2832080200501253e-06, "loss": 0.0, "step": 10939 }, { "epoch": 0.7050331894051686, "grad_norm": 0.00020393405837112934, "learning_rate": 3.2824919441460796e-06, "loss": 0.0, "step": 10940 }, { "epoch": 0.7050976348520978, "grad_norm": 0.012538392880569913, "learning_rate": 3.2817758682420335e-06, "loss": 0.0, "step": 10941 }, { "epoch": 0.7051620802990268, "grad_norm": 0.02402381009882897, "learning_rate": 3.2810597923379882e-06, "loss": 0.0, "step": 10942 }, { "epoch": 0.705226525745956, "grad_norm": 0.03254472748774419, "learning_rate": 3.2803437164339425e-06, "loss": 0.0, "step": 10943 }, { "epoch": 0.7052909711928852, "grad_norm": 0.004207380008318214, "learning_rate": 3.2796276405298964e-06, "loss": 0.0, "step": 10944 }, { "epoch": 0.7053554166398144, "grad_norm": 0.020072217846481624, "learning_rate": 3.2789115646258507e-06, "loss": 0.0, "step": 10945 }, { "epoch": 0.7054198620867436, "grad_norm": 0.016132958636072284, "learning_rate": 3.278195488721805e-06, "loss": 0.0, "step": 10946 }, { "epoch": 0.7054843075336727, "grad_norm": 0.04939703204957824, "learning_rate": 3.277479412817759e-06, "loss": 0.0001, "step": 10947 }, { "epoch": 0.7055487529806019, "grad_norm": 0.0008245602376543607, "learning_rate": 3.276763336913713e-06, "loss": 0.0, "step": 10948 }, { "epoch": 0.7056131984275311, "grad_norm": 0.0005412021396897577, "learning_rate": 3.276047261009667e-06, "loss": 0.0, "step": 10949 }, { "epoch": 0.7056776438744603, "grad_norm": 0.0034232693202888597, "learning_rate": 3.2753311851056214e-06, "loss": 0.0, "step": 10950 }, { "epoch": 0.7057420893213895, "grad_norm": 0.005485021994327029, "learning_rate": 3.2746151092015752e-06, "loss": 0.0, "step": 10951 }, { "epoch": 0.7058065347683187, "grad_norm": 0.017868621028440626, "learning_rate": 3.2738990332975295e-06, "loss": 0.0001, "step": 10952 }, { "epoch": 0.7058709802152477, "grad_norm": 0.002559377594809387, "learning_rate": 3.2731829573934843e-06, "loss": 0.0, "step": 10953 }, { "epoch": 0.7059354256621769, "grad_norm": 0.10073643138781171, "learning_rate": 3.2724668814894385e-06, "loss": 0.0001, "step": 10954 }, { "epoch": 0.7059998711091061, "grad_norm": 0.005557456868990858, "learning_rate": 3.2717508055853924e-06, "loss": 0.0, "step": 10955 }, { "epoch": 0.7060643165560353, "grad_norm": 0.018022847987917433, "learning_rate": 3.2710347296813467e-06, "loss": 0.0, "step": 10956 }, { "epoch": 0.7061287620029645, "grad_norm": 0.0010845042955528442, "learning_rate": 3.2703186537773006e-06, "loss": 0.0, "step": 10957 }, { "epoch": 0.7061932074498937, "grad_norm": 0.006678590974714848, "learning_rate": 3.269602577873255e-06, "loss": 0.0, "step": 10958 }, { "epoch": 0.7062576528968229, "grad_norm": 0.24238967916520135, "learning_rate": 3.2688865019692088e-06, "loss": 0.0003, "step": 10959 }, { "epoch": 0.706322098343752, "grad_norm": 0.0005615450731238256, "learning_rate": 3.268170426065163e-06, "loss": 0.0, "step": 10960 }, { "epoch": 0.7063865437906812, "grad_norm": 0.001237721068779436, "learning_rate": 3.2674543501611174e-06, "loss": 0.0, "step": 10961 }, { "epoch": 0.7064509892376104, "grad_norm": 0.0022154238217625168, "learning_rate": 3.2667382742570712e-06, "loss": 0.0, "step": 10962 }, { "epoch": 0.7065154346845396, "grad_norm": 0.00036969182107827645, "learning_rate": 3.2660221983530255e-06, "loss": 0.0, "step": 10963 }, { "epoch": 0.7065798801314687, "grad_norm": 0.14243230155867326, "learning_rate": 3.2653061224489794e-06, "loss": 0.0003, "step": 10964 }, { "epoch": 0.7066443255783978, "grad_norm": 0.005058644391544198, "learning_rate": 3.264590046544934e-06, "loss": 0.0, "step": 10965 }, { "epoch": 0.706708771025327, "grad_norm": 0.0032059561848995183, "learning_rate": 3.2638739706408884e-06, "loss": 0.0, "step": 10966 }, { "epoch": 0.7067732164722562, "grad_norm": 0.013179660912521117, "learning_rate": 3.2631578947368423e-06, "loss": 0.0001, "step": 10967 }, { "epoch": 0.7068376619191854, "grad_norm": 0.11744684687956847, "learning_rate": 3.2624418188327966e-06, "loss": 0.0002, "step": 10968 }, { "epoch": 0.7069021073661146, "grad_norm": 0.004042410681713448, "learning_rate": 3.261725742928751e-06, "loss": 0.0, "step": 10969 }, { "epoch": 0.7069665528130438, "grad_norm": 4.283922259766499e-05, "learning_rate": 3.2610096670247048e-06, "loss": 0.0, "step": 10970 }, { "epoch": 0.707030998259973, "grad_norm": 0.9901557003270737, "learning_rate": 3.260293591120659e-06, "loss": 0.006, "step": 10971 }, { "epoch": 0.7070954437069021, "grad_norm": 0.037523802452588115, "learning_rate": 3.259577515216613e-06, "loss": 0.0, "step": 10972 }, { "epoch": 0.7071598891538313, "grad_norm": 0.015195530897357019, "learning_rate": 3.2588614393125673e-06, "loss": 0.0, "step": 10973 }, { "epoch": 0.7072243346007605, "grad_norm": 0.0022295571638922154, "learning_rate": 3.2581453634085216e-06, "loss": 0.0, "step": 10974 }, { "epoch": 0.7072887800476896, "grad_norm": 0.8156517229050663, "learning_rate": 3.2574292875044754e-06, "loss": 0.0035, "step": 10975 }, { "epoch": 0.7073532254946188, "grad_norm": 0.004237609517313484, "learning_rate": 3.2567132116004297e-06, "loss": 0.0, "step": 10976 }, { "epoch": 0.707417670941548, "grad_norm": 0.005571668982925098, "learning_rate": 3.2559971356963845e-06, "loss": 0.0, "step": 10977 }, { "epoch": 0.7074821163884771, "grad_norm": 0.0036520828137762886, "learning_rate": 3.2552810597923383e-06, "loss": 0.0, "step": 10978 }, { "epoch": 0.7075465618354063, "grad_norm": 0.016016935812411235, "learning_rate": 3.2545649838882926e-06, "loss": 0.0, "step": 10979 }, { "epoch": 0.7076110072823355, "grad_norm": 0.3980306343157112, "learning_rate": 3.2538489079842465e-06, "loss": 0.0041, "step": 10980 }, { "epoch": 0.7076754527292647, "grad_norm": 0.0009197268556172491, "learning_rate": 3.253132832080201e-06, "loss": 0.0, "step": 10981 }, { "epoch": 0.7077398981761939, "grad_norm": 0.00043302879034731197, "learning_rate": 3.252416756176155e-06, "loss": 0.0, "step": 10982 }, { "epoch": 0.7078043436231231, "grad_norm": 0.004272269559729301, "learning_rate": 3.251700680272109e-06, "loss": 0.0, "step": 10983 }, { "epoch": 0.7078687890700522, "grad_norm": 0.0024739338398368465, "learning_rate": 3.2509846043680633e-06, "loss": 0.0, "step": 10984 }, { "epoch": 0.7079332345169814, "grad_norm": 0.1845882851059872, "learning_rate": 3.250268528464017e-06, "loss": 0.0028, "step": 10985 }, { "epoch": 0.7079976799639105, "grad_norm": 0.08929369829022431, "learning_rate": 3.2495524525599714e-06, "loss": 0.0002, "step": 10986 }, { "epoch": 0.7080621254108397, "grad_norm": 0.0027870529419560435, "learning_rate": 3.2488363766559257e-06, "loss": 0.0, "step": 10987 }, { "epoch": 0.7081265708577689, "grad_norm": 0.0017530348572803891, "learning_rate": 3.24812030075188e-06, "loss": 0.0, "step": 10988 }, { "epoch": 0.708191016304698, "grad_norm": 0.006516597776420439, "learning_rate": 3.2474042248478343e-06, "loss": 0.0001, "step": 10989 }, { "epoch": 0.7082554617516272, "grad_norm": 0.05491665317800124, "learning_rate": 3.2466881489437886e-06, "loss": 0.0006, "step": 10990 }, { "epoch": 0.7083199071985564, "grad_norm": 0.007344119285479627, "learning_rate": 3.2459720730397425e-06, "loss": 0.0, "step": 10991 }, { "epoch": 0.7083843526454856, "grad_norm": 0.004823036421794768, "learning_rate": 3.245255997135697e-06, "loss": 0.0, "step": 10992 }, { "epoch": 0.7084487980924148, "grad_norm": 0.001133053437795517, "learning_rate": 3.2445399212316507e-06, "loss": 0.0, "step": 10993 }, { "epoch": 0.708513243539344, "grad_norm": 0.010246483494655423, "learning_rate": 3.243823845327605e-06, "loss": 0.0001, "step": 10994 }, { "epoch": 0.7085776889862732, "grad_norm": 0.00031286769307990606, "learning_rate": 3.2431077694235593e-06, "loss": 0.0, "step": 10995 }, { "epoch": 0.7086421344332023, "grad_norm": 0.025983279433569137, "learning_rate": 3.242391693519513e-06, "loss": 0.0003, "step": 10996 }, { "epoch": 0.7087065798801314, "grad_norm": 0.002991161752113345, "learning_rate": 3.2416756176154675e-06, "loss": 0.0, "step": 10997 }, { "epoch": 0.7087710253270606, "grad_norm": 0.00019414406743275953, "learning_rate": 3.2409595417114213e-06, "loss": 0.0, "step": 10998 }, { "epoch": 0.7088354707739898, "grad_norm": 0.0051728910727569175, "learning_rate": 3.2402434658073756e-06, "loss": 0.0, "step": 10999 }, { "epoch": 0.708899916220919, "grad_norm": 0.032033257450606374, "learning_rate": 3.2395273899033304e-06, "loss": 0.0, "step": 11000 }, { "epoch": 0.7089643616678482, "grad_norm": 0.0013291294574410593, "learning_rate": 3.2388113139992842e-06, "loss": 0.0, "step": 11001 }, { "epoch": 0.7090288071147773, "grad_norm": 0.0030029524874015485, "learning_rate": 3.2380952380952385e-06, "loss": 0.0, "step": 11002 }, { "epoch": 0.7090932525617065, "grad_norm": 0.018705699350202824, "learning_rate": 3.237379162191193e-06, "loss": 0.0, "step": 11003 }, { "epoch": 0.7091576980086357, "grad_norm": 0.12145905230771745, "learning_rate": 3.2366630862871467e-06, "loss": 0.0004, "step": 11004 }, { "epoch": 0.7092221434555649, "grad_norm": 0.002445939444972882, "learning_rate": 3.235947010383101e-06, "loss": 0.0, "step": 11005 }, { "epoch": 0.7092865889024941, "grad_norm": 0.0030465461515954964, "learning_rate": 3.235230934479055e-06, "loss": 0.0, "step": 11006 }, { "epoch": 0.7093510343494233, "grad_norm": 0.0022634303186151597, "learning_rate": 3.234514858575009e-06, "loss": 0.0, "step": 11007 }, { "epoch": 0.7094154797963523, "grad_norm": 0.1156427131436469, "learning_rate": 3.233798782670963e-06, "loss": 0.0002, "step": 11008 }, { "epoch": 0.7094799252432815, "grad_norm": 0.002947625272147337, "learning_rate": 3.2330827067669174e-06, "loss": 0.0, "step": 11009 }, { "epoch": 0.7095443706902107, "grad_norm": 0.04937201848564881, "learning_rate": 3.2323666308628717e-06, "loss": 0.0001, "step": 11010 }, { "epoch": 0.7096088161371399, "grad_norm": 0.011345745946028784, "learning_rate": 3.2316505549588255e-06, "loss": 0.0, "step": 11011 }, { "epoch": 0.7096732615840691, "grad_norm": 0.13372508837535974, "learning_rate": 3.2309344790547803e-06, "loss": 0.0005, "step": 11012 }, { "epoch": 0.7097377070309983, "grad_norm": 0.02591297992435188, "learning_rate": 3.2302184031507345e-06, "loss": 0.0, "step": 11013 }, { "epoch": 0.7098021524779274, "grad_norm": 0.0005216657897116044, "learning_rate": 3.2295023272466884e-06, "loss": 0.0, "step": 11014 }, { "epoch": 0.7098665979248566, "grad_norm": 0.001228820551302051, "learning_rate": 3.2287862513426427e-06, "loss": 0.0, "step": 11015 }, { "epoch": 0.7099310433717858, "grad_norm": 0.004049530687622813, "learning_rate": 3.2280701754385966e-06, "loss": 0.0, "step": 11016 }, { "epoch": 0.709995488818715, "grad_norm": 0.01042890442965104, "learning_rate": 3.227354099534551e-06, "loss": 0.0, "step": 11017 }, { "epoch": 0.7100599342656442, "grad_norm": 0.20473263111957016, "learning_rate": 3.226638023630505e-06, "loss": 0.0013, "step": 11018 }, { "epoch": 0.7101243797125734, "grad_norm": 0.003618323682590707, "learning_rate": 3.225921947726459e-06, "loss": 0.0, "step": 11019 }, { "epoch": 0.7101888251595024, "grad_norm": 0.00013886623571113355, "learning_rate": 3.2252058718224134e-06, "loss": 0.0, "step": 11020 }, { "epoch": 0.7102532706064316, "grad_norm": 0.00031239600076566764, "learning_rate": 3.2244897959183672e-06, "loss": 0.0, "step": 11021 }, { "epoch": 0.7103177160533608, "grad_norm": 0.015242570841153947, "learning_rate": 3.2237737200143215e-06, "loss": 0.0001, "step": 11022 }, { "epoch": 0.71038216150029, "grad_norm": 5.747915247272944e-05, "learning_rate": 3.2230576441102763e-06, "loss": 0.0, "step": 11023 }, { "epoch": 0.7104466069472192, "grad_norm": 0.0062475296909040865, "learning_rate": 3.22234156820623e-06, "loss": 0.0, "step": 11024 }, { "epoch": 0.7105110523941484, "grad_norm": 0.0017142098867375052, "learning_rate": 3.2216254923021844e-06, "loss": 0.0, "step": 11025 }, { "epoch": 0.7105754978410775, "grad_norm": 0.0011041837520175915, "learning_rate": 3.2209094163981387e-06, "loss": 0.0, "step": 11026 }, { "epoch": 0.7106399432880067, "grad_norm": 0.00122298031984066, "learning_rate": 3.2201933404940926e-06, "loss": 0.0, "step": 11027 }, { "epoch": 0.7107043887349359, "grad_norm": 0.00011782227449732348, "learning_rate": 3.219477264590047e-06, "loss": 0.0, "step": 11028 }, { "epoch": 0.7107688341818651, "grad_norm": 0.000478511478482095, "learning_rate": 3.2187611886860008e-06, "loss": 0.0, "step": 11029 }, { "epoch": 0.7108332796287943, "grad_norm": 0.0075666354507896, "learning_rate": 3.218045112781955e-06, "loss": 0.0, "step": 11030 }, { "epoch": 0.7108977250757234, "grad_norm": 0.040470238174043854, "learning_rate": 3.2173290368779094e-06, "loss": 0.0003, "step": 11031 }, { "epoch": 0.7109621705226525, "grad_norm": 0.049252596260576695, "learning_rate": 3.2166129609738633e-06, "loss": 0.0005, "step": 11032 }, { "epoch": 0.7110266159695817, "grad_norm": 0.5214163060714664, "learning_rate": 3.2158968850698176e-06, "loss": 0.0017, "step": 11033 }, { "epoch": 0.7110910614165109, "grad_norm": 0.004091547353671431, "learning_rate": 3.2151808091657714e-06, "loss": 0.0, "step": 11034 }, { "epoch": 0.7111555068634401, "grad_norm": 0.8728484540582238, "learning_rate": 3.214464733261726e-06, "loss": 0.0037, "step": 11035 }, { "epoch": 0.7112199523103693, "grad_norm": 0.002028648083962951, "learning_rate": 3.2137486573576805e-06, "loss": 0.0, "step": 11036 }, { "epoch": 0.7112843977572985, "grad_norm": 0.0017844158187033214, "learning_rate": 3.2130325814536343e-06, "loss": 0.0, "step": 11037 }, { "epoch": 0.7113488432042276, "grad_norm": 0.29092505665033047, "learning_rate": 3.2123165055495886e-06, "loss": 0.0003, "step": 11038 }, { "epoch": 0.7114132886511568, "grad_norm": 0.03781926653402004, "learning_rate": 3.211600429645543e-06, "loss": 0.0001, "step": 11039 }, { "epoch": 0.711477734098086, "grad_norm": 0.023017320635540486, "learning_rate": 3.210884353741497e-06, "loss": 0.0, "step": 11040 }, { "epoch": 0.7115421795450152, "grad_norm": 0.2787249919125269, "learning_rate": 3.210168277837451e-06, "loss": 0.001, "step": 11041 }, { "epoch": 0.7116066249919443, "grad_norm": 0.0002637866973357019, "learning_rate": 3.209452201933405e-06, "loss": 0.0, "step": 11042 }, { "epoch": 0.7116710704388735, "grad_norm": 0.003413300700603889, "learning_rate": 3.2087361260293593e-06, "loss": 0.0, "step": 11043 }, { "epoch": 0.7117355158858026, "grad_norm": 0.008288206540353161, "learning_rate": 3.2080200501253136e-06, "loss": 0.0, "step": 11044 }, { "epoch": 0.7117999613327318, "grad_norm": 0.0009564137452828161, "learning_rate": 3.2073039742212674e-06, "loss": 0.0, "step": 11045 }, { "epoch": 0.711864406779661, "grad_norm": 0.011072656092947879, "learning_rate": 3.2065878983172217e-06, "loss": 0.0, "step": 11046 }, { "epoch": 0.7119288522265902, "grad_norm": 0.1796167379448892, "learning_rate": 3.2058718224131765e-06, "loss": 0.0003, "step": 11047 }, { "epoch": 0.7119932976735194, "grad_norm": 0.07329979538349857, "learning_rate": 3.2051557465091303e-06, "loss": 0.0001, "step": 11048 }, { "epoch": 0.7120577431204486, "grad_norm": 0.1903261245429498, "learning_rate": 3.2044396706050846e-06, "loss": 0.002, "step": 11049 }, { "epoch": 0.7121221885673777, "grad_norm": 0.0020960073602700443, "learning_rate": 3.2037235947010385e-06, "loss": 0.0, "step": 11050 }, { "epoch": 0.7121866340143069, "grad_norm": 0.15443077795960122, "learning_rate": 3.203007518796993e-06, "loss": 0.002, "step": 11051 }, { "epoch": 0.7122510794612361, "grad_norm": 0.006115973097704426, "learning_rate": 3.202291442892947e-06, "loss": 0.0, "step": 11052 }, { "epoch": 0.7123155249081652, "grad_norm": 0.00581395781401189, "learning_rate": 3.201575366988901e-06, "loss": 0.0, "step": 11053 }, { "epoch": 0.7123799703550944, "grad_norm": 0.0022123917383665612, "learning_rate": 3.2008592910848553e-06, "loss": 0.0, "step": 11054 }, { "epoch": 0.7124444158020236, "grad_norm": 0.00022742322294811237, "learning_rate": 3.200143215180809e-06, "loss": 0.0, "step": 11055 }, { "epoch": 0.7125088612489527, "grad_norm": 0.030732401054467395, "learning_rate": 3.1994271392767635e-06, "loss": 0.0, "step": 11056 }, { "epoch": 0.7125733066958819, "grad_norm": 0.0022031101795498163, "learning_rate": 3.1987110633727173e-06, "loss": 0.0, "step": 11057 }, { "epoch": 0.7126377521428111, "grad_norm": 0.05268874666893051, "learning_rate": 3.197994987468672e-06, "loss": 0.0001, "step": 11058 }, { "epoch": 0.7127021975897403, "grad_norm": 0.000596779920373873, "learning_rate": 3.1972789115646264e-06, "loss": 0.0, "step": 11059 }, { "epoch": 0.7127666430366695, "grad_norm": 0.015269021418092311, "learning_rate": 3.1965628356605807e-06, "loss": 0.0, "step": 11060 }, { "epoch": 0.7128310884835987, "grad_norm": 0.0007433832035599262, "learning_rate": 3.1958467597565345e-06, "loss": 0.0, "step": 11061 }, { "epoch": 0.7128955339305278, "grad_norm": 0.00024632248124767153, "learning_rate": 3.195130683852489e-06, "loss": 0.0, "step": 11062 }, { "epoch": 0.712959979377457, "grad_norm": 6.792495538918946e-05, "learning_rate": 3.1944146079484427e-06, "loss": 0.0, "step": 11063 }, { "epoch": 0.7130244248243861, "grad_norm": 0.0007723524883490751, "learning_rate": 3.193698532044397e-06, "loss": 0.0, "step": 11064 }, { "epoch": 0.7130888702713153, "grad_norm": 0.00046314913899469146, "learning_rate": 3.192982456140351e-06, "loss": 0.0, "step": 11065 }, { "epoch": 0.7131533157182445, "grad_norm": 0.0002443464187616548, "learning_rate": 3.192266380236305e-06, "loss": 0.0, "step": 11066 }, { "epoch": 0.7132177611651737, "grad_norm": 0.03511749555454775, "learning_rate": 3.1915503043322595e-06, "loss": 0.0, "step": 11067 }, { "epoch": 0.7132822066121028, "grad_norm": 0.08750064745305847, "learning_rate": 3.1908342284282134e-06, "loss": 0.0001, "step": 11068 }, { "epoch": 0.713346652059032, "grad_norm": 0.0007083306043677525, "learning_rate": 3.1901181525241677e-06, "loss": 0.0, "step": 11069 }, { "epoch": 0.7134110975059612, "grad_norm": 0.004821037321704811, "learning_rate": 3.1894020766201224e-06, "loss": 0.0, "step": 11070 }, { "epoch": 0.7134755429528904, "grad_norm": 6.842244899806434e-05, "learning_rate": 3.1886860007160762e-06, "loss": 0.0, "step": 11071 }, { "epoch": 0.7135399883998196, "grad_norm": 0.19849096388992776, "learning_rate": 3.1879699248120305e-06, "loss": 0.0017, "step": 11072 }, { "epoch": 0.7136044338467488, "grad_norm": 0.001744117216499498, "learning_rate": 3.1872538489079844e-06, "loss": 0.0, "step": 11073 }, { "epoch": 0.713668879293678, "grad_norm": 0.0007771459762669267, "learning_rate": 3.1865377730039387e-06, "loss": 0.0, "step": 11074 }, { "epoch": 0.713733324740607, "grad_norm": 0.0016081975587549844, "learning_rate": 3.185821697099893e-06, "loss": 0.0, "step": 11075 }, { "epoch": 0.7137977701875362, "grad_norm": 0.09325613134932795, "learning_rate": 3.185105621195847e-06, "loss": 0.0002, "step": 11076 }, { "epoch": 0.7138622156344654, "grad_norm": 0.332418115417522, "learning_rate": 3.184389545291801e-06, "loss": 0.0014, "step": 11077 }, { "epoch": 0.7139266610813946, "grad_norm": 0.04041215457695259, "learning_rate": 3.183673469387755e-06, "loss": 0.0001, "step": 11078 }, { "epoch": 0.7139911065283238, "grad_norm": 0.08855809440335718, "learning_rate": 3.1829573934837094e-06, "loss": 0.0016, "step": 11079 }, { "epoch": 0.714055551975253, "grad_norm": 0.037115998367209524, "learning_rate": 3.1822413175796637e-06, "loss": 0.0001, "step": 11080 }, { "epoch": 0.7141199974221821, "grad_norm": 0.0009066699956542551, "learning_rate": 3.1815252416756175e-06, "loss": 0.0, "step": 11081 }, { "epoch": 0.7141844428691113, "grad_norm": 0.019780705026331263, "learning_rate": 3.1808091657715723e-06, "loss": 0.0, "step": 11082 }, { "epoch": 0.7142488883160405, "grad_norm": 0.00261057496389828, "learning_rate": 3.1800930898675266e-06, "loss": 0.0, "step": 11083 }, { "epoch": 0.7143133337629697, "grad_norm": 0.004270109738808761, "learning_rate": 3.1793770139634804e-06, "loss": 0.0, "step": 11084 }, { "epoch": 0.7143777792098989, "grad_norm": 0.00031775442414158883, "learning_rate": 3.1786609380594347e-06, "loss": 0.0, "step": 11085 }, { "epoch": 0.7144422246568279, "grad_norm": 0.12453918901981105, "learning_rate": 3.1779448621553886e-06, "loss": 0.0002, "step": 11086 }, { "epoch": 0.7145066701037571, "grad_norm": 0.02119295088790315, "learning_rate": 3.177228786251343e-06, "loss": 0.0, "step": 11087 }, { "epoch": 0.7145711155506863, "grad_norm": 0.0006572972137359756, "learning_rate": 3.176512710347297e-06, "loss": 0.0, "step": 11088 }, { "epoch": 0.7146355609976155, "grad_norm": 0.0015856123993797646, "learning_rate": 3.175796634443251e-06, "loss": 0.0, "step": 11089 }, { "epoch": 0.7147000064445447, "grad_norm": 0.005097678348488337, "learning_rate": 3.1750805585392054e-06, "loss": 0.0, "step": 11090 }, { "epoch": 0.7147644518914739, "grad_norm": 0.03935784061800326, "learning_rate": 3.1743644826351593e-06, "loss": 0.0001, "step": 11091 }, { "epoch": 0.714828897338403, "grad_norm": 0.00048666767231215894, "learning_rate": 3.1736484067311136e-06, "loss": 0.0, "step": 11092 }, { "epoch": 0.7148933427853322, "grad_norm": 0.004039960942877218, "learning_rate": 3.1729323308270683e-06, "loss": 0.0, "step": 11093 }, { "epoch": 0.7149577882322614, "grad_norm": 0.05651111224218124, "learning_rate": 3.172216254923022e-06, "loss": 0.0001, "step": 11094 }, { "epoch": 0.7150222336791906, "grad_norm": 0.0012988435437589815, "learning_rate": 3.1715001790189765e-06, "loss": 0.0, "step": 11095 }, { "epoch": 0.7150866791261198, "grad_norm": 0.0006677970549519835, "learning_rate": 3.1707841031149308e-06, "loss": 0.0, "step": 11096 }, { "epoch": 0.715151124573049, "grad_norm": 0.0030971611143586165, "learning_rate": 3.1700680272108846e-06, "loss": 0.0, "step": 11097 }, { "epoch": 0.715215570019978, "grad_norm": 0.05772528771724839, "learning_rate": 3.169351951306839e-06, "loss": 0.0001, "step": 11098 }, { "epoch": 0.7152800154669072, "grad_norm": 0.0017996675808251306, "learning_rate": 3.168635875402793e-06, "loss": 0.0, "step": 11099 }, { "epoch": 0.7153444609138364, "grad_norm": 0.0022183191323614845, "learning_rate": 3.167919799498747e-06, "loss": 0.0, "step": 11100 }, { "epoch": 0.7154089063607656, "grad_norm": 0.23467454047809808, "learning_rate": 3.167203723594701e-06, "loss": 0.0058, "step": 11101 }, { "epoch": 0.7154733518076948, "grad_norm": 0.002101152624454598, "learning_rate": 3.1664876476906553e-06, "loss": 0.0, "step": 11102 }, { "epoch": 0.715537797254624, "grad_norm": 0.10132137891567196, "learning_rate": 3.1657715717866096e-06, "loss": 0.0002, "step": 11103 }, { "epoch": 0.7156022427015531, "grad_norm": 0.0013626302825582128, "learning_rate": 3.1650554958825634e-06, "loss": 0.0, "step": 11104 }, { "epoch": 0.7156666881484823, "grad_norm": 0.013922212605411412, "learning_rate": 3.164339419978518e-06, "loss": 0.0, "step": 11105 }, { "epoch": 0.7157311335954115, "grad_norm": 0.5795495550330169, "learning_rate": 3.1636233440744725e-06, "loss": 0.0019, "step": 11106 }, { "epoch": 0.7157955790423407, "grad_norm": 0.019280375892706227, "learning_rate": 3.1629072681704263e-06, "loss": 0.0, "step": 11107 }, { "epoch": 0.7158600244892699, "grad_norm": 3.448348121566918e-05, "learning_rate": 3.1621911922663806e-06, "loss": 0.0, "step": 11108 }, { "epoch": 0.715924469936199, "grad_norm": 0.37455556282125935, "learning_rate": 3.1614751163623345e-06, "loss": 0.0016, "step": 11109 }, { "epoch": 0.7159889153831281, "grad_norm": 0.002586349361368294, "learning_rate": 3.160759040458289e-06, "loss": 0.0, "step": 11110 }, { "epoch": 0.7160533608300573, "grad_norm": 0.008448127471088534, "learning_rate": 3.160042964554243e-06, "loss": 0.0, "step": 11111 }, { "epoch": 0.7161178062769865, "grad_norm": 0.0010937285941726932, "learning_rate": 3.159326888650197e-06, "loss": 0.0, "step": 11112 }, { "epoch": 0.7161822517239157, "grad_norm": 0.0012277663763215767, "learning_rate": 3.1586108127461513e-06, "loss": 0.0, "step": 11113 }, { "epoch": 0.7162466971708449, "grad_norm": 0.0018488032473446409, "learning_rate": 3.157894736842105e-06, "loss": 0.0, "step": 11114 }, { "epoch": 0.7163111426177741, "grad_norm": 0.08111634839048355, "learning_rate": 3.1571786609380595e-06, "loss": 0.0001, "step": 11115 }, { "epoch": 0.7163755880647033, "grad_norm": 0.0031572357244101274, "learning_rate": 3.1564625850340138e-06, "loss": 0.0, "step": 11116 }, { "epoch": 0.7164400335116324, "grad_norm": 0.0014554546438801737, "learning_rate": 3.155746509129968e-06, "loss": 0.0, "step": 11117 }, { "epoch": 0.7165044789585616, "grad_norm": 0.028527429420330017, "learning_rate": 3.1550304332259224e-06, "loss": 0.0, "step": 11118 }, { "epoch": 0.7165689244054908, "grad_norm": 0.0013177336847458132, "learning_rate": 3.1543143573218767e-06, "loss": 0.0, "step": 11119 }, { "epoch": 0.7166333698524199, "grad_norm": 0.02391522038213578, "learning_rate": 3.1535982814178305e-06, "loss": 0.0001, "step": 11120 }, { "epoch": 0.7166978152993491, "grad_norm": 0.001059440528141261, "learning_rate": 3.152882205513785e-06, "loss": 0.0, "step": 11121 }, { "epoch": 0.7167622607462782, "grad_norm": 0.001678356185974784, "learning_rate": 3.1521661296097387e-06, "loss": 0.0, "step": 11122 }, { "epoch": 0.7168267061932074, "grad_norm": 0.00032899135591734686, "learning_rate": 3.151450053705693e-06, "loss": 0.0, "step": 11123 }, { "epoch": 0.7168911516401366, "grad_norm": 0.0042420537766143995, "learning_rate": 3.1507339778016473e-06, "loss": 0.0, "step": 11124 }, { "epoch": 0.7169555970870658, "grad_norm": 0.06737302971743062, "learning_rate": 3.150017901897601e-06, "loss": 0.0001, "step": 11125 }, { "epoch": 0.717020042533995, "grad_norm": 0.10045379921951969, "learning_rate": 3.1493018259935555e-06, "loss": 0.0002, "step": 11126 }, { "epoch": 0.7170844879809242, "grad_norm": 0.03187420971150182, "learning_rate": 3.1485857500895094e-06, "loss": 0.0002, "step": 11127 }, { "epoch": 0.7171489334278534, "grad_norm": 0.001211750648026592, "learning_rate": 3.147869674185464e-06, "loss": 0.0, "step": 11128 }, { "epoch": 0.7172133788747825, "grad_norm": 0.008495406179175139, "learning_rate": 3.1471535982814184e-06, "loss": 0.0, "step": 11129 }, { "epoch": 0.7172778243217117, "grad_norm": 0.0019832293144138556, "learning_rate": 3.1464375223773722e-06, "loss": 0.0, "step": 11130 }, { "epoch": 0.7173422697686408, "grad_norm": 0.001240237132151751, "learning_rate": 3.1457214464733265e-06, "loss": 0.0, "step": 11131 }, { "epoch": 0.71740671521557, "grad_norm": 0.0025074924361898395, "learning_rate": 3.145005370569281e-06, "loss": 0.0, "step": 11132 }, { "epoch": 0.7174711606624992, "grad_norm": 0.0027266612771010145, "learning_rate": 3.1442892946652347e-06, "loss": 0.0, "step": 11133 }, { "epoch": 0.7175356061094283, "grad_norm": 0.05183947841060524, "learning_rate": 3.143573218761189e-06, "loss": 0.0001, "step": 11134 }, { "epoch": 0.7176000515563575, "grad_norm": 0.00013604534610558102, "learning_rate": 3.142857142857143e-06, "loss": 0.0, "step": 11135 }, { "epoch": 0.7176644970032867, "grad_norm": 0.004464684547835725, "learning_rate": 3.142141066953097e-06, "loss": 0.0, "step": 11136 }, { "epoch": 0.7177289424502159, "grad_norm": 0.043388709463875526, "learning_rate": 3.1414249910490515e-06, "loss": 0.0002, "step": 11137 }, { "epoch": 0.7177933878971451, "grad_norm": 0.563788289759631, "learning_rate": 3.1407089151450054e-06, "loss": 0.0048, "step": 11138 }, { "epoch": 0.7178578333440743, "grad_norm": 0.00031376440967617104, "learning_rate": 3.1399928392409597e-06, "loss": 0.0, "step": 11139 }, { "epoch": 0.7179222787910035, "grad_norm": 0.0009322943557485454, "learning_rate": 3.1392767633369144e-06, "loss": 0.0, "step": 11140 }, { "epoch": 0.7179867242379326, "grad_norm": 0.2445635070487584, "learning_rate": 3.1385606874328683e-06, "loss": 0.0011, "step": 11141 }, { "epoch": 0.7180511696848617, "grad_norm": 0.0009303590616047754, "learning_rate": 3.1378446115288226e-06, "loss": 0.0, "step": 11142 }, { "epoch": 0.7181156151317909, "grad_norm": 0.002144644973596857, "learning_rate": 3.1371285356247764e-06, "loss": 0.0, "step": 11143 }, { "epoch": 0.7181800605787201, "grad_norm": 0.24049082303382727, "learning_rate": 3.1364124597207307e-06, "loss": 0.0005, "step": 11144 }, { "epoch": 0.7182445060256493, "grad_norm": 0.010361287644884275, "learning_rate": 3.135696383816685e-06, "loss": 0.0001, "step": 11145 }, { "epoch": 0.7183089514725784, "grad_norm": 0.00045569104002404434, "learning_rate": 3.134980307912639e-06, "loss": 0.0, "step": 11146 }, { "epoch": 0.7183733969195076, "grad_norm": 0.1184363656128077, "learning_rate": 3.134264232008593e-06, "loss": 0.0013, "step": 11147 }, { "epoch": 0.7184378423664368, "grad_norm": 0.0018075368988239252, "learning_rate": 3.133548156104547e-06, "loss": 0.0, "step": 11148 }, { "epoch": 0.718502287813366, "grad_norm": 0.00025330471947959053, "learning_rate": 3.1328320802005014e-06, "loss": 0.0, "step": 11149 }, { "epoch": 0.7185667332602952, "grad_norm": 0.0018817507730820291, "learning_rate": 3.1321160042964553e-06, "loss": 0.0, "step": 11150 }, { "epoch": 0.7186311787072244, "grad_norm": 0.14699625858626997, "learning_rate": 3.1313999283924096e-06, "loss": 0.001, "step": 11151 }, { "epoch": 0.7186956241541536, "grad_norm": 0.010185056320389333, "learning_rate": 3.1306838524883643e-06, "loss": 0.0015, "step": 11152 }, { "epoch": 0.7187600696010826, "grad_norm": 0.00017482119536334955, "learning_rate": 3.1299677765843186e-06, "loss": 0.0, "step": 11153 }, { "epoch": 0.7188245150480118, "grad_norm": 0.008587165702478385, "learning_rate": 3.1292517006802725e-06, "loss": 0.0, "step": 11154 }, { "epoch": 0.718888960494941, "grad_norm": 0.0009752747086168306, "learning_rate": 3.1285356247762268e-06, "loss": 0.0, "step": 11155 }, { "epoch": 0.7189534059418702, "grad_norm": 0.00273619332827515, "learning_rate": 3.1278195488721806e-06, "loss": 0.0, "step": 11156 }, { "epoch": 0.7190178513887994, "grad_norm": 0.0003457374189358038, "learning_rate": 3.127103472968135e-06, "loss": 0.0, "step": 11157 }, { "epoch": 0.7190822968357286, "grad_norm": 0.001673470044109342, "learning_rate": 3.126387397064089e-06, "loss": 0.0, "step": 11158 }, { "epoch": 0.7191467422826577, "grad_norm": 0.00021614544359388704, "learning_rate": 3.125671321160043e-06, "loss": 0.0, "step": 11159 }, { "epoch": 0.7192111877295869, "grad_norm": 0.0036123325054397515, "learning_rate": 3.1249552452559974e-06, "loss": 0.0, "step": 11160 }, { "epoch": 0.7192756331765161, "grad_norm": 0.01439849637720543, "learning_rate": 3.1242391693519513e-06, "loss": 0.0, "step": 11161 }, { "epoch": 0.7193400786234453, "grad_norm": 0.0011721288297011038, "learning_rate": 3.1235230934479056e-06, "loss": 0.0, "step": 11162 }, { "epoch": 0.7194045240703745, "grad_norm": 0.004723643909055465, "learning_rate": 3.1228070175438603e-06, "loss": 0.0, "step": 11163 }, { "epoch": 0.7194689695173035, "grad_norm": 0.0035602629306015162, "learning_rate": 3.122090941639814e-06, "loss": 0.0, "step": 11164 }, { "epoch": 0.7195334149642327, "grad_norm": 0.0003956756912840429, "learning_rate": 3.1213748657357685e-06, "loss": 0.0, "step": 11165 }, { "epoch": 0.7195978604111619, "grad_norm": 0.001660037121350968, "learning_rate": 3.1206587898317223e-06, "loss": 0.0, "step": 11166 }, { "epoch": 0.7196623058580911, "grad_norm": 0.00039859561483115464, "learning_rate": 3.1199427139276766e-06, "loss": 0.0, "step": 11167 }, { "epoch": 0.7197267513050203, "grad_norm": 0.0060584983061866965, "learning_rate": 3.119226638023631e-06, "loss": 0.0001, "step": 11168 }, { "epoch": 0.7197911967519495, "grad_norm": 0.037109889478630684, "learning_rate": 3.118510562119585e-06, "loss": 0.0, "step": 11169 }, { "epoch": 0.7198556421988787, "grad_norm": 0.00727883548794619, "learning_rate": 3.117794486215539e-06, "loss": 0.0001, "step": 11170 }, { "epoch": 0.7199200876458078, "grad_norm": 0.03501727862860073, "learning_rate": 3.117078410311493e-06, "loss": 0.0, "step": 11171 }, { "epoch": 0.719984533092737, "grad_norm": 8.61872471854203e-05, "learning_rate": 3.1163623344074473e-06, "loss": 0.0, "step": 11172 }, { "epoch": 0.7200489785396662, "grad_norm": 0.045333885416277295, "learning_rate": 3.1156462585034016e-06, "loss": 0.0001, "step": 11173 }, { "epoch": 0.7201134239865954, "grad_norm": 0.0004629402319689133, "learning_rate": 3.1149301825993555e-06, "loss": 0.0, "step": 11174 }, { "epoch": 0.7201778694335246, "grad_norm": 0.001063566655592568, "learning_rate": 3.11421410669531e-06, "loss": 0.0, "step": 11175 }, { "epoch": 0.7202423148804536, "grad_norm": 0.0014662245610195432, "learning_rate": 3.1134980307912645e-06, "loss": 0.0, "step": 11176 }, { "epoch": 0.7203067603273828, "grad_norm": 0.01931647362060871, "learning_rate": 3.1127819548872184e-06, "loss": 0.0, "step": 11177 }, { "epoch": 0.720371205774312, "grad_norm": 0.0004281946405760085, "learning_rate": 3.1120658789831727e-06, "loss": 0.0, "step": 11178 }, { "epoch": 0.7204356512212412, "grad_norm": 7.321520534062253e-05, "learning_rate": 3.1113498030791265e-06, "loss": 0.0, "step": 11179 }, { "epoch": 0.7205000966681704, "grad_norm": 8.563287008322675e-05, "learning_rate": 3.110633727175081e-06, "loss": 0.0, "step": 11180 }, { "epoch": 0.7205645421150996, "grad_norm": 0.00019715790289080482, "learning_rate": 3.109917651271035e-06, "loss": 0.0, "step": 11181 }, { "epoch": 0.7206289875620288, "grad_norm": 0.002634901520461059, "learning_rate": 3.109201575366989e-06, "loss": 0.0, "step": 11182 }, { "epoch": 0.7206934330089579, "grad_norm": 0.0008865863108786099, "learning_rate": 3.1084854994629433e-06, "loss": 0.0, "step": 11183 }, { "epoch": 0.7207578784558871, "grad_norm": 0.025815671687311366, "learning_rate": 3.107769423558897e-06, "loss": 0.0, "step": 11184 }, { "epoch": 0.7208223239028163, "grad_norm": 0.0032002885997212812, "learning_rate": 3.1070533476548515e-06, "loss": 0.0, "step": 11185 }, { "epoch": 0.7208867693497455, "grad_norm": 0.0004901728854833662, "learning_rate": 3.1063372717508058e-06, "loss": 0.0, "step": 11186 }, { "epoch": 0.7209512147966746, "grad_norm": 0.00753785072550071, "learning_rate": 3.10562119584676e-06, "loss": 0.0, "step": 11187 }, { "epoch": 0.7210156602436038, "grad_norm": 0.001151215509133265, "learning_rate": 3.1049051199427144e-06, "loss": 0.0, "step": 11188 }, { "epoch": 0.7210801056905329, "grad_norm": 0.00012703756509971546, "learning_rate": 3.1041890440386687e-06, "loss": 0.0, "step": 11189 }, { "epoch": 0.7211445511374621, "grad_norm": 0.00027725036286674646, "learning_rate": 3.1034729681346225e-06, "loss": 0.0, "step": 11190 }, { "epoch": 0.7212089965843913, "grad_norm": 0.00032789403026681533, "learning_rate": 3.102756892230577e-06, "loss": 0.0, "step": 11191 }, { "epoch": 0.7212734420313205, "grad_norm": 0.5477644907254653, "learning_rate": 3.1020408163265307e-06, "loss": 0.0013, "step": 11192 }, { "epoch": 0.7213378874782497, "grad_norm": 0.022567781263925505, "learning_rate": 3.101324740422485e-06, "loss": 0.0003, "step": 11193 }, { "epoch": 0.7214023329251789, "grad_norm": 0.00021412049244247774, "learning_rate": 3.1006086645184393e-06, "loss": 0.0, "step": 11194 }, { "epoch": 0.721466778372108, "grad_norm": 0.04202279788293058, "learning_rate": 3.099892588614393e-06, "loss": 0.0001, "step": 11195 }, { "epoch": 0.7215312238190372, "grad_norm": 0.004843800760419462, "learning_rate": 3.0991765127103475e-06, "loss": 0.0, "step": 11196 }, { "epoch": 0.7215956692659664, "grad_norm": 0.002510341090575078, "learning_rate": 3.0984604368063014e-06, "loss": 0.0, "step": 11197 }, { "epoch": 0.7216601147128955, "grad_norm": 0.01219112790806607, "learning_rate": 3.097744360902256e-06, "loss": 0.0001, "step": 11198 }, { "epoch": 0.7217245601598247, "grad_norm": 6.304089065724722e-05, "learning_rate": 3.0970282849982104e-06, "loss": 0.0, "step": 11199 }, { "epoch": 0.7217890056067539, "grad_norm": 0.5504341113723744, "learning_rate": 3.0963122090941643e-06, "loss": 0.002, "step": 11200 }, { "epoch": 0.721853451053683, "grad_norm": 0.01023833832935001, "learning_rate": 3.0955961331901186e-06, "loss": 0.0001, "step": 11201 }, { "epoch": 0.7219178965006122, "grad_norm": 0.1783995912772252, "learning_rate": 3.094880057286073e-06, "loss": 0.0037, "step": 11202 }, { "epoch": 0.7219823419475414, "grad_norm": 0.0008514347736365616, "learning_rate": 3.0941639813820267e-06, "loss": 0.0, "step": 11203 }, { "epoch": 0.7220467873944706, "grad_norm": 0.0005467954296606304, "learning_rate": 3.093447905477981e-06, "loss": 0.0, "step": 11204 }, { "epoch": 0.7221112328413998, "grad_norm": 0.12932414173611667, "learning_rate": 3.092731829573935e-06, "loss": 0.0003, "step": 11205 }, { "epoch": 0.722175678288329, "grad_norm": 0.00013929640553021798, "learning_rate": 3.092015753669889e-06, "loss": 0.0, "step": 11206 }, { "epoch": 0.7222401237352581, "grad_norm": 0.14328180054046125, "learning_rate": 3.091299677765843e-06, "loss": 0.0006, "step": 11207 }, { "epoch": 0.7223045691821873, "grad_norm": 0.0005452546115663261, "learning_rate": 3.0905836018617974e-06, "loss": 0.0, "step": 11208 }, { "epoch": 0.7223690146291164, "grad_norm": 0.00024169194708091167, "learning_rate": 3.0898675259577517e-06, "loss": 0.0, "step": 11209 }, { "epoch": 0.7224334600760456, "grad_norm": 0.004371841536006174, "learning_rate": 3.0891514500537064e-06, "loss": 0.0, "step": 11210 }, { "epoch": 0.7224979055229748, "grad_norm": 0.0039695361977091495, "learning_rate": 3.0884353741496603e-06, "loss": 0.0, "step": 11211 }, { "epoch": 0.722562350969904, "grad_norm": 0.001597297508301907, "learning_rate": 3.0877192982456146e-06, "loss": 0.0, "step": 11212 }, { "epoch": 0.7226267964168331, "grad_norm": 0.0149499392375087, "learning_rate": 3.0870032223415685e-06, "loss": 0.0001, "step": 11213 }, { "epoch": 0.7226912418637623, "grad_norm": 0.0009184210155997939, "learning_rate": 3.0862871464375228e-06, "loss": 0.0, "step": 11214 }, { "epoch": 0.7227556873106915, "grad_norm": 0.011828507087317438, "learning_rate": 3.0855710705334766e-06, "loss": 0.0, "step": 11215 }, { "epoch": 0.7228201327576207, "grad_norm": 0.004237269538513051, "learning_rate": 3.084854994629431e-06, "loss": 0.0, "step": 11216 }, { "epoch": 0.7228845782045499, "grad_norm": 0.000772409782086021, "learning_rate": 3.0841389187253852e-06, "loss": 0.0, "step": 11217 }, { "epoch": 0.7229490236514791, "grad_norm": 0.00760526489143851, "learning_rate": 3.083422842821339e-06, "loss": 0.0001, "step": 11218 }, { "epoch": 0.7230134690984082, "grad_norm": 0.03619062946544116, "learning_rate": 3.0827067669172934e-06, "loss": 0.0003, "step": 11219 }, { "epoch": 0.7230779145453373, "grad_norm": 0.0007644458517121425, "learning_rate": 3.0819906910132473e-06, "loss": 0.0, "step": 11220 }, { "epoch": 0.7231423599922665, "grad_norm": 0.0006187374178444931, "learning_rate": 3.0812746151092016e-06, "loss": 0.0, "step": 11221 }, { "epoch": 0.7232068054391957, "grad_norm": 0.008779720301754806, "learning_rate": 3.0805585392051563e-06, "loss": 0.0, "step": 11222 }, { "epoch": 0.7232712508861249, "grad_norm": 0.33317532619060475, "learning_rate": 3.07984246330111e-06, "loss": 0.0028, "step": 11223 }, { "epoch": 0.7233356963330541, "grad_norm": 0.008626626856931026, "learning_rate": 3.0791263873970645e-06, "loss": 0.0, "step": 11224 }, { "epoch": 0.7234001417799832, "grad_norm": 0.0015988935817651294, "learning_rate": 3.0784103114930188e-06, "loss": 0.0, "step": 11225 }, { "epoch": 0.7234645872269124, "grad_norm": 0.019773563400669943, "learning_rate": 3.0776942355889726e-06, "loss": 0.0, "step": 11226 }, { "epoch": 0.7235290326738416, "grad_norm": 0.0044985562525182805, "learning_rate": 3.076978159684927e-06, "loss": 0.0, "step": 11227 }, { "epoch": 0.7235934781207708, "grad_norm": 0.001497283255147728, "learning_rate": 3.076262083780881e-06, "loss": 0.0, "step": 11228 }, { "epoch": 0.7236579235677, "grad_norm": 0.00011553391408470788, "learning_rate": 3.075546007876835e-06, "loss": 0.0, "step": 11229 }, { "epoch": 0.7237223690146292, "grad_norm": 0.0025702919683202085, "learning_rate": 3.0748299319727894e-06, "loss": 0.0, "step": 11230 }, { "epoch": 0.7237868144615582, "grad_norm": 3.5742902256819365e-05, "learning_rate": 3.0741138560687433e-06, "loss": 0.0, "step": 11231 }, { "epoch": 0.7238512599084874, "grad_norm": 0.0008341897101581062, "learning_rate": 3.0733977801646976e-06, "loss": 0.0, "step": 11232 }, { "epoch": 0.7239157053554166, "grad_norm": 0.0004687527850827818, "learning_rate": 3.0726817042606523e-06, "loss": 0.0, "step": 11233 }, { "epoch": 0.7239801508023458, "grad_norm": 0.06597886275171361, "learning_rate": 3.071965628356606e-06, "loss": 0.0002, "step": 11234 }, { "epoch": 0.724044596249275, "grad_norm": 0.006935657033462403, "learning_rate": 3.0712495524525605e-06, "loss": 0.0, "step": 11235 }, { "epoch": 0.7241090416962042, "grad_norm": 0.08593750812790572, "learning_rate": 3.0705334765485144e-06, "loss": 0.0005, "step": 11236 }, { "epoch": 0.7241734871431333, "grad_norm": 0.002378809302678618, "learning_rate": 3.0698174006444687e-06, "loss": 0.0, "step": 11237 }, { "epoch": 0.7242379325900625, "grad_norm": 0.45399070124829183, "learning_rate": 3.069101324740423e-06, "loss": 0.0022, "step": 11238 }, { "epoch": 0.7243023780369917, "grad_norm": 0.26184424907155546, "learning_rate": 3.068385248836377e-06, "loss": 0.0009, "step": 11239 }, { "epoch": 0.7243668234839209, "grad_norm": 0.0033589809308402887, "learning_rate": 3.067669172932331e-06, "loss": 0.0, "step": 11240 }, { "epoch": 0.7244312689308501, "grad_norm": 0.005790182700251807, "learning_rate": 3.066953097028285e-06, "loss": 0.0, "step": 11241 }, { "epoch": 0.7244957143777793, "grad_norm": 0.0002524290695434493, "learning_rate": 3.0662370211242393e-06, "loss": 0.0, "step": 11242 }, { "epoch": 0.7245601598247083, "grad_norm": 0.0005162926392841873, "learning_rate": 3.065520945220193e-06, "loss": 0.0, "step": 11243 }, { "epoch": 0.7246246052716375, "grad_norm": 0.0017082764232077385, "learning_rate": 3.0648048693161475e-06, "loss": 0.0, "step": 11244 }, { "epoch": 0.7246890507185667, "grad_norm": 0.004266903234189341, "learning_rate": 3.064088793412102e-06, "loss": 0.0, "step": 11245 }, { "epoch": 0.7247534961654959, "grad_norm": 0.0001998843517710784, "learning_rate": 3.0633727175080565e-06, "loss": 0.0, "step": 11246 }, { "epoch": 0.7248179416124251, "grad_norm": 0.16482393362610753, "learning_rate": 3.0626566416040104e-06, "loss": 0.0002, "step": 11247 }, { "epoch": 0.7248823870593543, "grad_norm": 0.00958134520982764, "learning_rate": 3.0619405656999647e-06, "loss": 0.0, "step": 11248 }, { "epoch": 0.7249468325062834, "grad_norm": 0.005892490520614762, "learning_rate": 3.0612244897959185e-06, "loss": 0.0, "step": 11249 }, { "epoch": 0.7250112779532126, "grad_norm": 0.01922783843529163, "learning_rate": 3.060508413891873e-06, "loss": 0.0, "step": 11250 }, { "epoch": 0.7250757234001418, "grad_norm": 9.092396979500205e-05, "learning_rate": 3.0597923379878267e-06, "loss": 0.0, "step": 11251 }, { "epoch": 0.725140168847071, "grad_norm": 0.0004333292273602369, "learning_rate": 3.059076262083781e-06, "loss": 0.0, "step": 11252 }, { "epoch": 0.7252046142940002, "grad_norm": 0.017930935702477148, "learning_rate": 3.0583601861797353e-06, "loss": 0.0, "step": 11253 }, { "epoch": 0.7252690597409293, "grad_norm": 0.31085538834256204, "learning_rate": 3.057644110275689e-06, "loss": 0.0065, "step": 11254 }, { "epoch": 0.7253335051878584, "grad_norm": 0.014556196484398517, "learning_rate": 3.0569280343716435e-06, "loss": 0.0001, "step": 11255 }, { "epoch": 0.7253979506347876, "grad_norm": 0.00015255389938576366, "learning_rate": 3.0562119584675974e-06, "loss": 0.0, "step": 11256 }, { "epoch": 0.7254623960817168, "grad_norm": 0.00020395906453181, "learning_rate": 3.055495882563552e-06, "loss": 0.0, "step": 11257 }, { "epoch": 0.725526841528646, "grad_norm": 0.0003205706918907401, "learning_rate": 3.0547798066595064e-06, "loss": 0.0, "step": 11258 }, { "epoch": 0.7255912869755752, "grad_norm": 4.8430433611672415e-05, "learning_rate": 3.0540637307554603e-06, "loss": 0.0, "step": 11259 }, { "epoch": 0.7256557324225044, "grad_norm": 0.0034224555781167698, "learning_rate": 3.0533476548514146e-06, "loss": 0.0, "step": 11260 }, { "epoch": 0.7257201778694335, "grad_norm": 0.0007747417139003617, "learning_rate": 3.052631578947369e-06, "loss": 0.0, "step": 11261 }, { "epoch": 0.7257846233163627, "grad_norm": 0.005629201743521242, "learning_rate": 3.0519155030433227e-06, "loss": 0.0016, "step": 11262 }, { "epoch": 0.7258490687632919, "grad_norm": 0.05939066877792539, "learning_rate": 3.051199427139277e-06, "loss": 0.0002, "step": 11263 }, { "epoch": 0.7259135142102211, "grad_norm": 0.024681152626680736, "learning_rate": 3.050483351235231e-06, "loss": 0.0002, "step": 11264 }, { "epoch": 0.7259779596571502, "grad_norm": 0.16541939988288423, "learning_rate": 3.049767275331185e-06, "loss": 0.0003, "step": 11265 }, { "epoch": 0.7260424051040794, "grad_norm": 0.006080011470615735, "learning_rate": 3.0490511994271395e-06, "loss": 0.0001, "step": 11266 }, { "epoch": 0.7261068505510085, "grad_norm": 0.0047498769675451856, "learning_rate": 3.0483351235230934e-06, "loss": 0.0, "step": 11267 }, { "epoch": 0.7261712959979377, "grad_norm": 0.04948049102232195, "learning_rate": 3.047619047619048e-06, "loss": 0.0001, "step": 11268 }, { "epoch": 0.7262357414448669, "grad_norm": 0.0006189984344180252, "learning_rate": 3.0469029717150024e-06, "loss": 0.0, "step": 11269 }, { "epoch": 0.7263001868917961, "grad_norm": 0.005652951470014697, "learning_rate": 3.0461868958109563e-06, "loss": 0.0, "step": 11270 }, { "epoch": 0.7263646323387253, "grad_norm": 0.0005117226456541811, "learning_rate": 3.0454708199069106e-06, "loss": 0.0, "step": 11271 }, { "epoch": 0.7264290777856545, "grad_norm": 0.0007810068636911916, "learning_rate": 3.0447547440028645e-06, "loss": 0.0, "step": 11272 }, { "epoch": 0.7264935232325836, "grad_norm": 0.06590527306262674, "learning_rate": 3.0440386680988188e-06, "loss": 0.0004, "step": 11273 }, { "epoch": 0.7265579686795128, "grad_norm": 0.0005164033597064126, "learning_rate": 3.043322592194773e-06, "loss": 0.0, "step": 11274 }, { "epoch": 0.726622414126442, "grad_norm": 0.0003381609834740342, "learning_rate": 3.042606516290727e-06, "loss": 0.0, "step": 11275 }, { "epoch": 0.7266868595733711, "grad_norm": 0.021830852884517263, "learning_rate": 3.0418904403866812e-06, "loss": 0.0, "step": 11276 }, { "epoch": 0.7267513050203003, "grad_norm": 6.346864446019381e-05, "learning_rate": 3.041174364482635e-06, "loss": 0.0, "step": 11277 }, { "epoch": 0.7268157504672295, "grad_norm": 0.03467683947264384, "learning_rate": 3.0404582885785894e-06, "loss": 0.0, "step": 11278 }, { "epoch": 0.7268801959141586, "grad_norm": 0.004254908353937039, "learning_rate": 3.0397422126745437e-06, "loss": 0.0, "step": 11279 }, { "epoch": 0.7269446413610878, "grad_norm": 0.003063179042156029, "learning_rate": 3.039026136770498e-06, "loss": 0.0, "step": 11280 }, { "epoch": 0.727009086808017, "grad_norm": 0.0001314564867009563, "learning_rate": 3.0383100608664523e-06, "loss": 0.0, "step": 11281 }, { "epoch": 0.7270735322549462, "grad_norm": 0.004146407912709877, "learning_rate": 3.0375939849624066e-06, "loss": 0.0, "step": 11282 }, { "epoch": 0.7271379777018754, "grad_norm": 0.00010302604658438643, "learning_rate": 3.0368779090583605e-06, "loss": 0.0, "step": 11283 }, { "epoch": 0.7272024231488046, "grad_norm": 0.00013965483383936777, "learning_rate": 3.0361618331543148e-06, "loss": 0.0, "step": 11284 }, { "epoch": 0.7272668685957338, "grad_norm": 0.00018378616446001092, "learning_rate": 3.0354457572502686e-06, "loss": 0.0, "step": 11285 }, { "epoch": 0.7273313140426629, "grad_norm": 0.001957099914498733, "learning_rate": 3.034729681346223e-06, "loss": 0.0, "step": 11286 }, { "epoch": 0.727395759489592, "grad_norm": 0.0008828377932276057, "learning_rate": 3.0340136054421772e-06, "loss": 0.0, "step": 11287 }, { "epoch": 0.7274602049365212, "grad_norm": 0.0029698710575783546, "learning_rate": 3.033297529538131e-06, "loss": 0.0, "step": 11288 }, { "epoch": 0.7275246503834504, "grad_norm": 0.00019433618893119242, "learning_rate": 3.0325814536340854e-06, "loss": 0.0, "step": 11289 }, { "epoch": 0.7275890958303796, "grad_norm": 0.0006416192589271509, "learning_rate": 3.0318653777300393e-06, "loss": 0.0, "step": 11290 }, { "epoch": 0.7276535412773087, "grad_norm": 0.00838866934093486, "learning_rate": 3.0311493018259936e-06, "loss": 0.0, "step": 11291 }, { "epoch": 0.7277179867242379, "grad_norm": 0.000245455502279775, "learning_rate": 3.0304332259219483e-06, "loss": 0.0, "step": 11292 }, { "epoch": 0.7277824321711671, "grad_norm": 0.004930906444727167, "learning_rate": 3.029717150017902e-06, "loss": 0.0, "step": 11293 }, { "epoch": 0.7278468776180963, "grad_norm": 4.776988774088927e-05, "learning_rate": 3.0290010741138565e-06, "loss": 0.0, "step": 11294 }, { "epoch": 0.7279113230650255, "grad_norm": 0.27195599982658003, "learning_rate": 3.0282849982098108e-06, "loss": 0.001, "step": 11295 }, { "epoch": 0.7279757685119547, "grad_norm": 0.0030353622788795963, "learning_rate": 3.0275689223057647e-06, "loss": 0.0, "step": 11296 }, { "epoch": 0.7280402139588839, "grad_norm": 0.5768505948030167, "learning_rate": 3.026852846401719e-06, "loss": 0.003, "step": 11297 }, { "epoch": 0.7281046594058129, "grad_norm": 0.0025597431430050125, "learning_rate": 3.026136770497673e-06, "loss": 0.0, "step": 11298 }, { "epoch": 0.7281691048527421, "grad_norm": 0.0016598811747845761, "learning_rate": 3.025420694593627e-06, "loss": 0.0, "step": 11299 }, { "epoch": 0.7282335502996713, "grad_norm": 0.00014893458879150577, "learning_rate": 3.024704618689581e-06, "loss": 0.0, "step": 11300 }, { "epoch": 0.7282979957466005, "grad_norm": 0.0021818727213651556, "learning_rate": 3.0239885427855353e-06, "loss": 0.0, "step": 11301 }, { "epoch": 0.7283624411935297, "grad_norm": 0.008553871083575986, "learning_rate": 3.0232724668814896e-06, "loss": 0.0, "step": 11302 }, { "epoch": 0.7284268866404588, "grad_norm": 0.001350344278458923, "learning_rate": 3.0225563909774443e-06, "loss": 0.0, "step": 11303 }, { "epoch": 0.728491332087388, "grad_norm": 0.00034752832518681015, "learning_rate": 3.021840315073398e-06, "loss": 0.0, "step": 11304 }, { "epoch": 0.7285557775343172, "grad_norm": 0.004296723301807933, "learning_rate": 3.0211242391693525e-06, "loss": 0.0, "step": 11305 }, { "epoch": 0.7286202229812464, "grad_norm": 0.003349462100284539, "learning_rate": 3.0204081632653064e-06, "loss": 0.0, "step": 11306 }, { "epoch": 0.7286846684281756, "grad_norm": 0.13197393612356048, "learning_rate": 3.0196920873612607e-06, "loss": 0.0005, "step": 11307 }, { "epoch": 0.7287491138751048, "grad_norm": 0.000510281932853362, "learning_rate": 3.0189760114572145e-06, "loss": 0.0, "step": 11308 }, { "epoch": 0.7288135593220338, "grad_norm": 0.0011085601485967163, "learning_rate": 3.018259935553169e-06, "loss": 0.0, "step": 11309 }, { "epoch": 0.728878004768963, "grad_norm": 0.005277812025546389, "learning_rate": 3.017543859649123e-06, "loss": 0.0, "step": 11310 }, { "epoch": 0.7289424502158922, "grad_norm": 0.0002395444898397595, "learning_rate": 3.016827783745077e-06, "loss": 0.0, "step": 11311 }, { "epoch": 0.7290068956628214, "grad_norm": 0.0831582389340663, "learning_rate": 3.0161117078410313e-06, "loss": 0.0011, "step": 11312 }, { "epoch": 0.7290713411097506, "grad_norm": 0.0017825745778012954, "learning_rate": 3.015395631936985e-06, "loss": 0.0, "step": 11313 }, { "epoch": 0.7291357865566798, "grad_norm": 0.0011956396270402388, "learning_rate": 3.0146795560329395e-06, "loss": 0.0, "step": 11314 }, { "epoch": 0.729200232003609, "grad_norm": 0.00043885243242865044, "learning_rate": 3.0139634801288942e-06, "loss": 0.0, "step": 11315 }, { "epoch": 0.7292646774505381, "grad_norm": 0.0009921284919679632, "learning_rate": 3.013247404224848e-06, "loss": 0.0, "step": 11316 }, { "epoch": 0.7293291228974673, "grad_norm": 0.0023250831159840257, "learning_rate": 3.0125313283208024e-06, "loss": 0.0, "step": 11317 }, { "epoch": 0.7293935683443965, "grad_norm": 0.15707155844292203, "learning_rate": 3.0118152524167567e-06, "loss": 0.0004, "step": 11318 }, { "epoch": 0.7294580137913257, "grad_norm": 0.013931934554137165, "learning_rate": 3.0110991765127106e-06, "loss": 0.0, "step": 11319 }, { "epoch": 0.7295224592382549, "grad_norm": 0.0014724451177281826, "learning_rate": 3.010383100608665e-06, "loss": 0.0, "step": 11320 }, { "epoch": 0.729586904685184, "grad_norm": 9.713692903118522e-05, "learning_rate": 3.0096670247046187e-06, "loss": 0.0, "step": 11321 }, { "epoch": 0.7296513501321131, "grad_norm": 0.0010043175423519069, "learning_rate": 3.008950948800573e-06, "loss": 0.0, "step": 11322 }, { "epoch": 0.7297157955790423, "grad_norm": 0.003977753280873503, "learning_rate": 3.0082348728965273e-06, "loss": 0.0, "step": 11323 }, { "epoch": 0.7297802410259715, "grad_norm": 7.66866136069101e-05, "learning_rate": 3.007518796992481e-06, "loss": 0.0, "step": 11324 }, { "epoch": 0.7298446864729007, "grad_norm": 0.03245453503710955, "learning_rate": 3.0068027210884355e-06, "loss": 0.0001, "step": 11325 }, { "epoch": 0.7299091319198299, "grad_norm": 0.017230089378299205, "learning_rate": 3.0060866451843894e-06, "loss": 0.0002, "step": 11326 }, { "epoch": 0.729973577366759, "grad_norm": 0.0011237248614723047, "learning_rate": 3.005370569280344e-06, "loss": 0.0, "step": 11327 }, { "epoch": 0.7300380228136882, "grad_norm": 0.001596873892730706, "learning_rate": 3.0046544933762984e-06, "loss": 0.0, "step": 11328 }, { "epoch": 0.7301024682606174, "grad_norm": 0.0025830303630132935, "learning_rate": 3.0039384174722523e-06, "loss": 0.0, "step": 11329 }, { "epoch": 0.7301669137075466, "grad_norm": 0.0003530421299883205, "learning_rate": 3.0032223415682066e-06, "loss": 0.0, "step": 11330 }, { "epoch": 0.7302313591544758, "grad_norm": 0.002322914063727893, "learning_rate": 3.002506265664161e-06, "loss": 0.0, "step": 11331 }, { "epoch": 0.7302958046014049, "grad_norm": 0.015656113380858726, "learning_rate": 3.0017901897601148e-06, "loss": 0.0, "step": 11332 }, { "epoch": 0.730360250048334, "grad_norm": 0.002232419987401452, "learning_rate": 3.001074113856069e-06, "loss": 0.0, "step": 11333 }, { "epoch": 0.7304246954952632, "grad_norm": 0.0006180026645144517, "learning_rate": 3.000358037952023e-06, "loss": 0.0, "step": 11334 }, { "epoch": 0.7304891409421924, "grad_norm": 0.0053043607240062145, "learning_rate": 2.9996419620479772e-06, "loss": 0.0, "step": 11335 }, { "epoch": 0.7305535863891216, "grad_norm": 0.0007339025781432594, "learning_rate": 2.9989258861439315e-06, "loss": 0.0, "step": 11336 }, { "epoch": 0.7306180318360508, "grad_norm": 0.014264670100293255, "learning_rate": 2.9982098102398854e-06, "loss": 0.0, "step": 11337 }, { "epoch": 0.73068247728298, "grad_norm": 0.0012291208711675198, "learning_rate": 2.9974937343358397e-06, "loss": 0.0, "step": 11338 }, { "epoch": 0.7307469227299092, "grad_norm": 0.06005159391534337, "learning_rate": 2.9967776584317944e-06, "loss": 0.0001, "step": 11339 }, { "epoch": 0.7308113681768383, "grad_norm": 0.0003933094764526526, "learning_rate": 2.9960615825277483e-06, "loss": 0.0, "step": 11340 }, { "epoch": 0.7308758136237675, "grad_norm": 0.021090790890940164, "learning_rate": 2.9953455066237026e-06, "loss": 0.0002, "step": 11341 }, { "epoch": 0.7309402590706967, "grad_norm": 0.025931634133735976, "learning_rate": 2.9946294307196565e-06, "loss": 0.0, "step": 11342 }, { "epoch": 0.7310047045176258, "grad_norm": 0.02852376562835928, "learning_rate": 2.9939133548156108e-06, "loss": 0.0, "step": 11343 }, { "epoch": 0.731069149964555, "grad_norm": 0.015667323444314415, "learning_rate": 2.993197278911565e-06, "loss": 0.0, "step": 11344 }, { "epoch": 0.7311335954114841, "grad_norm": 0.00218994004770224, "learning_rate": 2.992481203007519e-06, "loss": 0.0, "step": 11345 }, { "epoch": 0.7311980408584133, "grad_norm": 0.16275101952445878, "learning_rate": 2.9917651271034732e-06, "loss": 0.0028, "step": 11346 }, { "epoch": 0.7312624863053425, "grad_norm": 0.0003862983051810963, "learning_rate": 2.991049051199427e-06, "loss": 0.0, "step": 11347 }, { "epoch": 0.7313269317522717, "grad_norm": 0.003829513862433199, "learning_rate": 2.9903329752953814e-06, "loss": 0.0, "step": 11348 }, { "epoch": 0.7313913771992009, "grad_norm": 0.0034865630005024286, "learning_rate": 2.9896168993913353e-06, "loss": 0.0, "step": 11349 }, { "epoch": 0.7314558226461301, "grad_norm": 0.0009932220979224295, "learning_rate": 2.98890082348729e-06, "loss": 0.0, "step": 11350 }, { "epoch": 0.7315202680930593, "grad_norm": 0.002630709002841604, "learning_rate": 2.9881847475832443e-06, "loss": 0.0, "step": 11351 }, { "epoch": 0.7315847135399884, "grad_norm": 0.008694154945653666, "learning_rate": 2.9874686716791986e-06, "loss": 0.0, "step": 11352 }, { "epoch": 0.7316491589869176, "grad_norm": 0.001717471994335099, "learning_rate": 2.9867525957751525e-06, "loss": 0.0, "step": 11353 }, { "epoch": 0.7317136044338467, "grad_norm": 0.08697505214756895, "learning_rate": 2.9860365198711068e-06, "loss": 0.0017, "step": 11354 }, { "epoch": 0.7317780498807759, "grad_norm": 0.0014773768805635785, "learning_rate": 2.9853204439670607e-06, "loss": 0.0, "step": 11355 }, { "epoch": 0.7318424953277051, "grad_norm": 0.01304011785695086, "learning_rate": 2.984604368063015e-06, "loss": 0.0, "step": 11356 }, { "epoch": 0.7319069407746343, "grad_norm": 0.006473541821742216, "learning_rate": 2.983888292158969e-06, "loss": 0.0, "step": 11357 }, { "epoch": 0.7319713862215634, "grad_norm": 0.0013719339669280896, "learning_rate": 2.983172216254923e-06, "loss": 0.0, "step": 11358 }, { "epoch": 0.7320358316684926, "grad_norm": 0.0002129689555400633, "learning_rate": 2.9824561403508774e-06, "loss": 0.0, "step": 11359 }, { "epoch": 0.7321002771154218, "grad_norm": 0.0001586731060494783, "learning_rate": 2.9817400644468313e-06, "loss": 0.0, "step": 11360 }, { "epoch": 0.732164722562351, "grad_norm": 0.013050724962506173, "learning_rate": 2.9810239885427856e-06, "loss": 0.0, "step": 11361 }, { "epoch": 0.7322291680092802, "grad_norm": 0.019111154363726266, "learning_rate": 2.9803079126387403e-06, "loss": 0.0001, "step": 11362 }, { "epoch": 0.7322936134562094, "grad_norm": 0.023404573866207774, "learning_rate": 2.979591836734694e-06, "loss": 0.0001, "step": 11363 }, { "epoch": 0.7323580589031385, "grad_norm": 0.0005499391081068312, "learning_rate": 2.9788757608306485e-06, "loss": 0.0, "step": 11364 }, { "epoch": 0.7324225043500676, "grad_norm": 0.0021230133353467673, "learning_rate": 2.9781596849266024e-06, "loss": 0.0, "step": 11365 }, { "epoch": 0.7324869497969968, "grad_norm": 0.014250397877147453, "learning_rate": 2.9774436090225567e-06, "loss": 0.0001, "step": 11366 }, { "epoch": 0.732551395243926, "grad_norm": 0.00023056297783971086, "learning_rate": 2.976727533118511e-06, "loss": 0.0, "step": 11367 }, { "epoch": 0.7326158406908552, "grad_norm": 0.073680816757015, "learning_rate": 2.976011457214465e-06, "loss": 0.0002, "step": 11368 }, { "epoch": 0.7326802861377844, "grad_norm": 0.011644553199364758, "learning_rate": 2.975295381310419e-06, "loss": 0.0, "step": 11369 }, { "epoch": 0.7327447315847135, "grad_norm": 1.2168431158448905, "learning_rate": 2.974579305406373e-06, "loss": 0.0042, "step": 11370 }, { "epoch": 0.7328091770316427, "grad_norm": 0.0011119148824099833, "learning_rate": 2.9738632295023273e-06, "loss": 0.0, "step": 11371 }, { "epoch": 0.7328736224785719, "grad_norm": 0.14873476128698843, "learning_rate": 2.9731471535982816e-06, "loss": 0.0002, "step": 11372 }, { "epoch": 0.7329380679255011, "grad_norm": 0.0005180624511212282, "learning_rate": 2.9724310776942355e-06, "loss": 0.0, "step": 11373 }, { "epoch": 0.7330025133724303, "grad_norm": 0.08457680714713561, "learning_rate": 2.9717150017901902e-06, "loss": 0.0002, "step": 11374 }, { "epoch": 0.7330669588193595, "grad_norm": 3.189354502948727e-05, "learning_rate": 2.9709989258861445e-06, "loss": 0.0, "step": 11375 }, { "epoch": 0.7331314042662885, "grad_norm": 0.00015374242569765145, "learning_rate": 2.9702828499820984e-06, "loss": 0.0, "step": 11376 }, { "epoch": 0.7331958497132177, "grad_norm": 0.004303427852003826, "learning_rate": 2.9695667740780527e-06, "loss": 0.0, "step": 11377 }, { "epoch": 0.7332602951601469, "grad_norm": 0.01627340553711377, "learning_rate": 2.9688506981740066e-06, "loss": 0.0, "step": 11378 }, { "epoch": 0.7333247406070761, "grad_norm": 0.1106417562165362, "learning_rate": 2.968134622269961e-06, "loss": 0.0001, "step": 11379 }, { "epoch": 0.7333891860540053, "grad_norm": 0.11460116547485791, "learning_rate": 2.967418546365915e-06, "loss": 0.0018, "step": 11380 }, { "epoch": 0.7334536315009345, "grad_norm": 0.0073095201484695465, "learning_rate": 2.966702470461869e-06, "loss": 0.0, "step": 11381 }, { "epoch": 0.7335180769478636, "grad_norm": 0.014872503234698634, "learning_rate": 2.9659863945578233e-06, "loss": 0.0001, "step": 11382 }, { "epoch": 0.7335825223947928, "grad_norm": 0.042749717691514845, "learning_rate": 2.965270318653777e-06, "loss": 0.0001, "step": 11383 }, { "epoch": 0.733646967841722, "grad_norm": 0.0008445524787041037, "learning_rate": 2.9645542427497315e-06, "loss": 0.0, "step": 11384 }, { "epoch": 0.7337114132886512, "grad_norm": 0.005167055323404655, "learning_rate": 2.9638381668456862e-06, "loss": 0.0, "step": 11385 }, { "epoch": 0.7337758587355804, "grad_norm": 0.026027118945750397, "learning_rate": 2.96312209094164e-06, "loss": 0.0, "step": 11386 }, { "epoch": 0.7338403041825095, "grad_norm": 0.0004145667714222251, "learning_rate": 2.9624060150375944e-06, "loss": 0.0, "step": 11387 }, { "epoch": 0.7339047496294386, "grad_norm": 5.982031954885564e-05, "learning_rate": 2.9616899391335487e-06, "loss": 0.0, "step": 11388 }, { "epoch": 0.7339691950763678, "grad_norm": 0.04745125465754833, "learning_rate": 2.9609738632295026e-06, "loss": 0.0003, "step": 11389 }, { "epoch": 0.734033640523297, "grad_norm": 0.0004990294223088284, "learning_rate": 2.960257787325457e-06, "loss": 0.0, "step": 11390 }, { "epoch": 0.7340980859702262, "grad_norm": 0.0006998216447846934, "learning_rate": 2.9595417114214108e-06, "loss": 0.0, "step": 11391 }, { "epoch": 0.7341625314171554, "grad_norm": 0.01677587625693549, "learning_rate": 2.958825635517365e-06, "loss": 0.0, "step": 11392 }, { "epoch": 0.7342269768640846, "grad_norm": 0.0003467193528171593, "learning_rate": 2.958109559613319e-06, "loss": 0.0, "step": 11393 }, { "epoch": 0.7342914223110137, "grad_norm": 0.0005840219191719138, "learning_rate": 2.9573934837092732e-06, "loss": 0.0, "step": 11394 }, { "epoch": 0.7343558677579429, "grad_norm": 0.0032256336054706956, "learning_rate": 2.9566774078052275e-06, "loss": 0.0, "step": 11395 }, { "epoch": 0.7344203132048721, "grad_norm": 0.00010625610456919803, "learning_rate": 2.9559613319011814e-06, "loss": 0.0, "step": 11396 }, { "epoch": 0.7344847586518013, "grad_norm": 0.00030531125861322296, "learning_rate": 2.955245255997136e-06, "loss": 0.0, "step": 11397 }, { "epoch": 0.7345492040987305, "grad_norm": 0.2009604092585717, "learning_rate": 2.9545291800930904e-06, "loss": 0.0005, "step": 11398 }, { "epoch": 0.7346136495456596, "grad_norm": 0.33045670282215517, "learning_rate": 2.9538131041890443e-06, "loss": 0.0024, "step": 11399 }, { "epoch": 0.7346780949925887, "grad_norm": 0.0005136405285533002, "learning_rate": 2.9530970282849986e-06, "loss": 0.0, "step": 11400 }, { "epoch": 0.7347425404395179, "grad_norm": 0.009765878316454831, "learning_rate": 2.9523809523809525e-06, "loss": 0.0, "step": 11401 }, { "epoch": 0.7348069858864471, "grad_norm": 0.33457723500160075, "learning_rate": 2.9516648764769068e-06, "loss": 0.0013, "step": 11402 }, { "epoch": 0.7348714313333763, "grad_norm": 0.08505562734413115, "learning_rate": 2.950948800572861e-06, "loss": 0.0001, "step": 11403 }, { "epoch": 0.7349358767803055, "grad_norm": 0.0005396746602388885, "learning_rate": 2.950232724668815e-06, "loss": 0.0, "step": 11404 }, { "epoch": 0.7350003222272347, "grad_norm": 0.0007219231506156975, "learning_rate": 2.9495166487647692e-06, "loss": 0.0, "step": 11405 }, { "epoch": 0.7350647676741638, "grad_norm": 0.00080918341992736, "learning_rate": 2.948800572860723e-06, "loss": 0.0, "step": 11406 }, { "epoch": 0.735129213121093, "grad_norm": 0.008291451385984977, "learning_rate": 2.9480844969566774e-06, "loss": 0.0001, "step": 11407 }, { "epoch": 0.7351936585680222, "grad_norm": 3.7420126980391375e-05, "learning_rate": 2.9473684210526317e-06, "loss": 0.0, "step": 11408 }, { "epoch": 0.7352581040149514, "grad_norm": 0.00547087372155844, "learning_rate": 2.9466523451485864e-06, "loss": 0.0, "step": 11409 }, { "epoch": 0.7353225494618805, "grad_norm": 0.0017479068818298368, "learning_rate": 2.9459362692445403e-06, "loss": 0.0, "step": 11410 }, { "epoch": 0.7353869949088097, "grad_norm": 0.00347103415906202, "learning_rate": 2.9452201933404946e-06, "loss": 0.0, "step": 11411 }, { "epoch": 0.7354514403557388, "grad_norm": 0.004133792163996183, "learning_rate": 2.9445041174364485e-06, "loss": 0.0, "step": 11412 }, { "epoch": 0.735515885802668, "grad_norm": 0.00042971752764806866, "learning_rate": 2.9437880415324028e-06, "loss": 0.0, "step": 11413 }, { "epoch": 0.7355803312495972, "grad_norm": 0.00010637823235444148, "learning_rate": 2.9430719656283567e-06, "loss": 0.0, "step": 11414 }, { "epoch": 0.7356447766965264, "grad_norm": 0.00012087427014443836, "learning_rate": 2.942355889724311e-06, "loss": 0.0, "step": 11415 }, { "epoch": 0.7357092221434556, "grad_norm": 0.00217104518436385, "learning_rate": 2.9416398138202653e-06, "loss": 0.0, "step": 11416 }, { "epoch": 0.7357736675903848, "grad_norm": 6.510885686800687e-05, "learning_rate": 2.940923737916219e-06, "loss": 0.0, "step": 11417 }, { "epoch": 0.735838113037314, "grad_norm": 0.013142905983270175, "learning_rate": 2.9402076620121734e-06, "loss": 0.0001, "step": 11418 }, { "epoch": 0.7359025584842431, "grad_norm": 0.002358491054728065, "learning_rate": 2.9394915861081273e-06, "loss": 0.0, "step": 11419 }, { "epoch": 0.7359670039311723, "grad_norm": 0.0026521542996968076, "learning_rate": 2.938775510204082e-06, "loss": 0.0, "step": 11420 }, { "epoch": 0.7360314493781014, "grad_norm": 0.003946334973341138, "learning_rate": 2.9380594343000363e-06, "loss": 0.0, "step": 11421 }, { "epoch": 0.7360958948250306, "grad_norm": 0.0088235968052502, "learning_rate": 2.93734335839599e-06, "loss": 0.0, "step": 11422 }, { "epoch": 0.7361603402719598, "grad_norm": 0.004964299051000416, "learning_rate": 2.9366272824919445e-06, "loss": 0.0, "step": 11423 }, { "epoch": 0.7362247857188889, "grad_norm": 0.008458645840074462, "learning_rate": 2.935911206587899e-06, "loss": 0.0, "step": 11424 }, { "epoch": 0.7362892311658181, "grad_norm": 0.003313165458895752, "learning_rate": 2.9351951306838527e-06, "loss": 0.0, "step": 11425 }, { "epoch": 0.7363536766127473, "grad_norm": 0.0005009463709045276, "learning_rate": 2.934479054779807e-06, "loss": 0.0, "step": 11426 }, { "epoch": 0.7364181220596765, "grad_norm": 0.0006078475946347758, "learning_rate": 2.933762978875761e-06, "loss": 0.0, "step": 11427 }, { "epoch": 0.7364825675066057, "grad_norm": 0.007668545880177946, "learning_rate": 2.933046902971715e-06, "loss": 0.0, "step": 11428 }, { "epoch": 0.7365470129535349, "grad_norm": 0.16581019987401066, "learning_rate": 2.9323308270676694e-06, "loss": 0.0004, "step": 11429 }, { "epoch": 0.736611458400464, "grad_norm": 9.086367411350327e-05, "learning_rate": 2.9316147511636233e-06, "loss": 0.0, "step": 11430 }, { "epoch": 0.7366759038473932, "grad_norm": 0.002234896439144746, "learning_rate": 2.9308986752595776e-06, "loss": 0.0, "step": 11431 }, { "epoch": 0.7367403492943223, "grad_norm": 0.005501577603166098, "learning_rate": 2.9301825993555323e-06, "loss": 0.0, "step": 11432 }, { "epoch": 0.7368047947412515, "grad_norm": 0.014186799638279296, "learning_rate": 2.9294665234514862e-06, "loss": 0.0, "step": 11433 }, { "epoch": 0.7368692401881807, "grad_norm": 0.007490603212982399, "learning_rate": 2.9287504475474405e-06, "loss": 0.0, "step": 11434 }, { "epoch": 0.7369336856351099, "grad_norm": 0.003929949417439596, "learning_rate": 2.9280343716433944e-06, "loss": 0.0, "step": 11435 }, { "epoch": 0.736998131082039, "grad_norm": 0.5977107253835082, "learning_rate": 2.9273182957393487e-06, "loss": 0.0005, "step": 11436 }, { "epoch": 0.7370625765289682, "grad_norm": 0.018352840375388126, "learning_rate": 2.926602219835303e-06, "loss": 0.0, "step": 11437 }, { "epoch": 0.7371270219758974, "grad_norm": 0.00020872345792618345, "learning_rate": 2.925886143931257e-06, "loss": 0.0, "step": 11438 }, { "epoch": 0.7371914674228266, "grad_norm": 0.00234018454065431, "learning_rate": 2.925170068027211e-06, "loss": 0.0, "step": 11439 }, { "epoch": 0.7372559128697558, "grad_norm": 0.0016579721245929928, "learning_rate": 2.924453992123165e-06, "loss": 0.0, "step": 11440 }, { "epoch": 0.737320358316685, "grad_norm": 0.08998472100608822, "learning_rate": 2.9237379162191193e-06, "loss": 0.0001, "step": 11441 }, { "epoch": 0.7373848037636141, "grad_norm": 0.0016083044416349665, "learning_rate": 2.923021840315073e-06, "loss": 0.0, "step": 11442 }, { "epoch": 0.7374492492105432, "grad_norm": 0.4860207992251301, "learning_rate": 2.9223057644110275e-06, "loss": 0.0011, "step": 11443 }, { "epoch": 0.7375136946574724, "grad_norm": 0.0023780665069745095, "learning_rate": 2.9215896885069822e-06, "loss": 0.0, "step": 11444 }, { "epoch": 0.7375781401044016, "grad_norm": 0.18297514282903246, "learning_rate": 2.9208736126029365e-06, "loss": 0.0008, "step": 11445 }, { "epoch": 0.7376425855513308, "grad_norm": 7.226204698247018e-05, "learning_rate": 2.9201575366988904e-06, "loss": 0.0, "step": 11446 }, { "epoch": 0.73770703099826, "grad_norm": 0.005099393862582168, "learning_rate": 2.9194414607948447e-06, "loss": 0.0, "step": 11447 }, { "epoch": 0.7377714764451891, "grad_norm": 0.04962834057641299, "learning_rate": 2.9187253848907986e-06, "loss": 0.0001, "step": 11448 }, { "epoch": 0.7378359218921183, "grad_norm": 0.0006057873141577277, "learning_rate": 2.918009308986753e-06, "loss": 0.0, "step": 11449 }, { "epoch": 0.7379003673390475, "grad_norm": 0.0028990582394420833, "learning_rate": 2.9172932330827068e-06, "loss": 0.0, "step": 11450 }, { "epoch": 0.7379648127859767, "grad_norm": 0.050842658381563356, "learning_rate": 2.916577157178661e-06, "loss": 0.0007, "step": 11451 }, { "epoch": 0.7380292582329059, "grad_norm": 0.0024151748871815318, "learning_rate": 2.9158610812746154e-06, "loss": 0.0, "step": 11452 }, { "epoch": 0.7380937036798351, "grad_norm": 0.0003905186066450872, "learning_rate": 2.9151450053705692e-06, "loss": 0.0, "step": 11453 }, { "epoch": 0.7381581491267641, "grad_norm": 0.09842042434272515, "learning_rate": 2.9144289294665235e-06, "loss": 0.0003, "step": 11454 }, { "epoch": 0.7382225945736933, "grad_norm": 0.03346659978636565, "learning_rate": 2.9137128535624782e-06, "loss": 0.0001, "step": 11455 }, { "epoch": 0.7382870400206225, "grad_norm": 0.006367096281842634, "learning_rate": 2.912996777658432e-06, "loss": 0.0, "step": 11456 }, { "epoch": 0.7383514854675517, "grad_norm": 0.00023461946393520187, "learning_rate": 2.9122807017543864e-06, "loss": 0.0, "step": 11457 }, { "epoch": 0.7384159309144809, "grad_norm": 0.00038369322384038427, "learning_rate": 2.9115646258503403e-06, "loss": 0.0, "step": 11458 }, { "epoch": 0.7384803763614101, "grad_norm": 0.005769438618029025, "learning_rate": 2.9108485499462946e-06, "loss": 0.0, "step": 11459 }, { "epoch": 0.7385448218083392, "grad_norm": 0.0017036441431630288, "learning_rate": 2.910132474042249e-06, "loss": 0.0, "step": 11460 }, { "epoch": 0.7386092672552684, "grad_norm": 0.0001072216240296, "learning_rate": 2.9094163981382028e-06, "loss": 0.0, "step": 11461 }, { "epoch": 0.7386737127021976, "grad_norm": 0.01801058241218933, "learning_rate": 2.908700322234157e-06, "loss": 0.0001, "step": 11462 }, { "epoch": 0.7387381581491268, "grad_norm": 0.22848583907627634, "learning_rate": 2.907984246330111e-06, "loss": 0.0015, "step": 11463 }, { "epoch": 0.738802603596056, "grad_norm": 0.01842134254047434, "learning_rate": 2.9072681704260652e-06, "loss": 0.0002, "step": 11464 }, { "epoch": 0.7388670490429851, "grad_norm": 0.1558477289825116, "learning_rate": 2.9065520945220195e-06, "loss": 0.0003, "step": 11465 }, { "epoch": 0.7389314944899142, "grad_norm": 0.0013242711822388608, "learning_rate": 2.9058360186179734e-06, "loss": 0.0, "step": 11466 }, { "epoch": 0.7389959399368434, "grad_norm": 0.00503076842425939, "learning_rate": 2.905119942713928e-06, "loss": 0.0, "step": 11467 }, { "epoch": 0.7390603853837726, "grad_norm": 0.05842548289512627, "learning_rate": 2.9044038668098824e-06, "loss": 0.0006, "step": 11468 }, { "epoch": 0.7391248308307018, "grad_norm": 0.003572431656853577, "learning_rate": 2.9036877909058363e-06, "loss": 0.0, "step": 11469 }, { "epoch": 0.739189276277631, "grad_norm": 0.0209967272130641, "learning_rate": 2.9029717150017906e-06, "loss": 0.0001, "step": 11470 }, { "epoch": 0.7392537217245602, "grad_norm": 0.0006626698904435314, "learning_rate": 2.9022556390977445e-06, "loss": 0.0, "step": 11471 }, { "epoch": 0.7393181671714893, "grad_norm": 0.0793006035205354, "learning_rate": 2.9015395631936988e-06, "loss": 0.0007, "step": 11472 }, { "epoch": 0.7393826126184185, "grad_norm": 0.0037272037398564804, "learning_rate": 2.900823487289653e-06, "loss": 0.0, "step": 11473 }, { "epoch": 0.7394470580653477, "grad_norm": 0.2464952448338863, "learning_rate": 2.900107411385607e-06, "loss": 0.0058, "step": 11474 }, { "epoch": 0.7395115035122769, "grad_norm": 4.4250068883029826e-05, "learning_rate": 2.8993913354815613e-06, "loss": 0.0, "step": 11475 }, { "epoch": 0.7395759489592061, "grad_norm": 0.00012204855908644014, "learning_rate": 2.898675259577515e-06, "loss": 0.0, "step": 11476 }, { "epoch": 0.7396403944061352, "grad_norm": 0.002904846725812882, "learning_rate": 2.8979591836734694e-06, "loss": 0.0, "step": 11477 }, { "epoch": 0.7397048398530643, "grad_norm": 0.002904846725812882, "learning_rate": 2.8979591836734694e-06, "loss": 0.0034, "step": 11478 }, { "epoch": 0.7397692852999935, "grad_norm": 0.004992793364236146, "learning_rate": 2.8972431077694237e-06, "loss": 0.0, "step": 11479 }, { "epoch": 0.7398337307469227, "grad_norm": 0.04042075975410935, "learning_rate": 2.896527031865378e-06, "loss": 0.0001, "step": 11480 }, { "epoch": 0.7398981761938519, "grad_norm": 0.006872133714897192, "learning_rate": 2.8958109559613323e-06, "loss": 0.0, "step": 11481 }, { "epoch": 0.7399626216407811, "grad_norm": 0.10795343834250004, "learning_rate": 2.8950948800572866e-06, "loss": 0.0002, "step": 11482 }, { "epoch": 0.7400270670877103, "grad_norm": 0.0005270637838438341, "learning_rate": 2.8943788041532405e-06, "loss": 0.0, "step": 11483 }, { "epoch": 0.7400915125346395, "grad_norm": 0.015242437661451919, "learning_rate": 2.893662728249195e-06, "loss": 0.0, "step": 11484 }, { "epoch": 0.7401559579815686, "grad_norm": 0.11215733937589789, "learning_rate": 2.8929466523451487e-06, "loss": 0.0001, "step": 11485 }, { "epoch": 0.7402204034284978, "grad_norm": 5.972577802370192e-05, "learning_rate": 2.892230576441103e-06, "loss": 0.0, "step": 11486 }, { "epoch": 0.740284848875427, "grad_norm": 0.0011349598866917305, "learning_rate": 2.8915145005370573e-06, "loss": 0.0, "step": 11487 }, { "epoch": 0.7403492943223561, "grad_norm": 0.0022451144249306092, "learning_rate": 2.890798424633011e-06, "loss": 0.0, "step": 11488 }, { "epoch": 0.7404137397692853, "grad_norm": 0.0007307866650342127, "learning_rate": 2.8900823487289654e-06, "loss": 0.0, "step": 11489 }, { "epoch": 0.7404781852162144, "grad_norm": 0.0025448175047036894, "learning_rate": 2.8893662728249193e-06, "loss": 0.0, "step": 11490 }, { "epoch": 0.7405426306631436, "grad_norm": 7.213562714107801e-05, "learning_rate": 2.888650196920874e-06, "loss": 0.0, "step": 11491 }, { "epoch": 0.7406070761100728, "grad_norm": 0.00037888931948071514, "learning_rate": 2.8879341210168283e-06, "loss": 0.0, "step": 11492 }, { "epoch": 0.740671521557002, "grad_norm": 0.0003706806676715015, "learning_rate": 2.8872180451127822e-06, "loss": 0.0, "step": 11493 }, { "epoch": 0.7407359670039312, "grad_norm": 0.011404830342325069, "learning_rate": 2.8865019692087365e-06, "loss": 0.0, "step": 11494 }, { "epoch": 0.7408004124508604, "grad_norm": 0.0014902201044040247, "learning_rate": 2.885785893304691e-06, "loss": 0.0, "step": 11495 }, { "epoch": 0.7408648578977896, "grad_norm": 0.00031089468463307054, "learning_rate": 2.8850698174006447e-06, "loss": 0.0, "step": 11496 }, { "epoch": 0.7409293033447187, "grad_norm": 0.0004114111374888369, "learning_rate": 2.884353741496599e-06, "loss": 0.0, "step": 11497 }, { "epoch": 0.7409937487916479, "grad_norm": 0.002545742978935913, "learning_rate": 2.883637665592553e-06, "loss": 0.0, "step": 11498 }, { "epoch": 0.741058194238577, "grad_norm": 8.01331414854357e-05, "learning_rate": 2.882921589688507e-06, "loss": 0.0, "step": 11499 }, { "epoch": 0.7411226396855062, "grad_norm": 0.826731859534718, "learning_rate": 2.882205513784461e-06, "loss": 0.0006, "step": 11500 }, { "epoch": 0.7411870851324354, "grad_norm": 0.1948683552474743, "learning_rate": 2.8814894378804153e-06, "loss": 0.0004, "step": 11501 }, { "epoch": 0.7412515305793645, "grad_norm": 0.008470631267963194, "learning_rate": 2.8807733619763696e-06, "loss": 0.0, "step": 11502 }, { "epoch": 0.7413159760262937, "grad_norm": 0.008737941093447015, "learning_rate": 2.8800572860723244e-06, "loss": 0.0001, "step": 11503 }, { "epoch": 0.7413804214732229, "grad_norm": 0.01340398639142613, "learning_rate": 2.8793412101682782e-06, "loss": 0.0002, "step": 11504 }, { "epoch": 0.7414448669201521, "grad_norm": 0.000581258970536343, "learning_rate": 2.8786251342642325e-06, "loss": 0.0, "step": 11505 }, { "epoch": 0.7415093123670813, "grad_norm": 0.004788253913425937, "learning_rate": 2.8779090583601864e-06, "loss": 0.0, "step": 11506 }, { "epoch": 0.7415737578140105, "grad_norm": 0.00012619639194410797, "learning_rate": 2.8771929824561407e-06, "loss": 0.0, "step": 11507 }, { "epoch": 0.7416382032609397, "grad_norm": 0.011955899024996343, "learning_rate": 2.8764769065520946e-06, "loss": 0.0, "step": 11508 }, { "epoch": 0.7417026487078688, "grad_norm": 0.0011804670870373755, "learning_rate": 2.875760830648049e-06, "loss": 0.0, "step": 11509 }, { "epoch": 0.7417670941547979, "grad_norm": 0.002210108798975343, "learning_rate": 2.875044754744003e-06, "loss": 0.0, "step": 11510 }, { "epoch": 0.7418315396017271, "grad_norm": 0.0004053548305945269, "learning_rate": 2.874328678839957e-06, "loss": 0.0, "step": 11511 }, { "epoch": 0.7418959850486563, "grad_norm": 0.00013120753274026113, "learning_rate": 2.8736126029359114e-06, "loss": 0.0, "step": 11512 }, { "epoch": 0.7419604304955855, "grad_norm": 0.12246954292956626, "learning_rate": 2.8728965270318652e-06, "loss": 0.0002, "step": 11513 }, { "epoch": 0.7420248759425147, "grad_norm": 0.0031773706184475043, "learning_rate": 2.8721804511278195e-06, "loss": 0.0, "step": 11514 }, { "epoch": 0.7420893213894438, "grad_norm": 0.0003136670518566507, "learning_rate": 2.8714643752237742e-06, "loss": 0.0, "step": 11515 }, { "epoch": 0.742153766836373, "grad_norm": 0.05410939153081128, "learning_rate": 2.870748299319728e-06, "loss": 0.0001, "step": 11516 }, { "epoch": 0.7422182122833022, "grad_norm": 7.286480718874506e-05, "learning_rate": 2.8700322234156824e-06, "loss": 0.0, "step": 11517 }, { "epoch": 0.7422826577302314, "grad_norm": 0.005174802579942772, "learning_rate": 2.8693161475116367e-06, "loss": 0.0, "step": 11518 }, { "epoch": 0.7423471031771606, "grad_norm": 0.001427121198095007, "learning_rate": 2.8686000716075906e-06, "loss": 0.0, "step": 11519 }, { "epoch": 0.7424115486240898, "grad_norm": 0.021820159725184658, "learning_rate": 2.867883995703545e-06, "loss": 0.0001, "step": 11520 }, { "epoch": 0.7424759940710188, "grad_norm": 0.0030847242572352293, "learning_rate": 2.8671679197994988e-06, "loss": 0.0, "step": 11521 }, { "epoch": 0.742540439517948, "grad_norm": 0.10641283012131907, "learning_rate": 2.866451843895453e-06, "loss": 0.0017, "step": 11522 }, { "epoch": 0.7426048849648772, "grad_norm": 0.009833028385451683, "learning_rate": 2.8657357679914074e-06, "loss": 0.0001, "step": 11523 }, { "epoch": 0.7426693304118064, "grad_norm": 0.003582891327112572, "learning_rate": 2.8650196920873612e-06, "loss": 0.0, "step": 11524 }, { "epoch": 0.7427337758587356, "grad_norm": 0.00897878326744134, "learning_rate": 2.8643036161833155e-06, "loss": 0.0001, "step": 11525 }, { "epoch": 0.7427982213056648, "grad_norm": 0.00011438453578087207, "learning_rate": 2.8635875402792703e-06, "loss": 0.0, "step": 11526 }, { "epoch": 0.7428626667525939, "grad_norm": 1.8637766949118943, "learning_rate": 2.862871464375224e-06, "loss": 0.0185, "step": 11527 }, { "epoch": 0.7429271121995231, "grad_norm": 8.769021280880539e-05, "learning_rate": 2.8621553884711784e-06, "loss": 0.0, "step": 11528 }, { "epoch": 0.7429915576464523, "grad_norm": 0.009708635368675824, "learning_rate": 2.8614393125671323e-06, "loss": 0.0001, "step": 11529 }, { "epoch": 0.7430560030933815, "grad_norm": 0.25208524448416436, "learning_rate": 2.8607232366630866e-06, "loss": 0.001, "step": 11530 }, { "epoch": 0.7431204485403107, "grad_norm": 0.23889954660465637, "learning_rate": 2.860007160759041e-06, "loss": 0.0013, "step": 11531 }, { "epoch": 0.7431848939872397, "grad_norm": 0.003267420010600027, "learning_rate": 2.8592910848549948e-06, "loss": 0.0, "step": 11532 }, { "epoch": 0.7432493394341689, "grad_norm": 8.740468378403458e-05, "learning_rate": 2.858575008950949e-06, "loss": 0.0, "step": 11533 }, { "epoch": 0.7433137848810981, "grad_norm": 6.484494334233853e-05, "learning_rate": 2.857858933046903e-06, "loss": 0.0, "step": 11534 }, { "epoch": 0.7433782303280273, "grad_norm": 0.33328327409046465, "learning_rate": 2.8571428571428573e-06, "loss": 0.0026, "step": 11535 }, { "epoch": 0.7434426757749565, "grad_norm": 0.03705969421250206, "learning_rate": 2.856426781238811e-06, "loss": 0.0001, "step": 11536 }, { "epoch": 0.7435071212218857, "grad_norm": 0.08214135387497098, "learning_rate": 2.8557107053347654e-06, "loss": 0.0001, "step": 11537 }, { "epoch": 0.7435715666688149, "grad_norm": 0.0009779501742388384, "learning_rate": 2.85499462943072e-06, "loss": 0.0, "step": 11538 }, { "epoch": 0.743636012115744, "grad_norm": 0.0019099608653063945, "learning_rate": 2.8542785535266745e-06, "loss": 0.0, "step": 11539 }, { "epoch": 0.7437004575626732, "grad_norm": 6.36266112464164e-05, "learning_rate": 2.8535624776226283e-06, "loss": 0.0, "step": 11540 }, { "epoch": 0.7437649030096024, "grad_norm": 0.01838611981256415, "learning_rate": 2.8528464017185826e-06, "loss": 0.0, "step": 11541 }, { "epoch": 0.7438293484565316, "grad_norm": 0.00011629883493401809, "learning_rate": 2.8521303258145365e-06, "loss": 0.0, "step": 11542 }, { "epoch": 0.7438937939034607, "grad_norm": 0.002767961297251372, "learning_rate": 2.851414249910491e-06, "loss": 0.0, "step": 11543 }, { "epoch": 0.7439582393503898, "grad_norm": 0.054474319327986465, "learning_rate": 2.8506981740064447e-06, "loss": 0.0004, "step": 11544 }, { "epoch": 0.744022684797319, "grad_norm": 0.0006248430618284908, "learning_rate": 2.849982098102399e-06, "loss": 0.0, "step": 11545 }, { "epoch": 0.7440871302442482, "grad_norm": 0.00481655779190564, "learning_rate": 2.8492660221983533e-06, "loss": 0.0, "step": 11546 }, { "epoch": 0.7441515756911774, "grad_norm": 0.015192968024787968, "learning_rate": 2.848549946294307e-06, "loss": 0.0001, "step": 11547 }, { "epoch": 0.7442160211381066, "grad_norm": 0.02535298427612439, "learning_rate": 2.8478338703902614e-06, "loss": 0.0001, "step": 11548 }, { "epoch": 0.7442804665850358, "grad_norm": 0.008441149317855135, "learning_rate": 2.8471177944862153e-06, "loss": 0.0001, "step": 11549 }, { "epoch": 0.744344912031965, "grad_norm": 0.00043440331282244664, "learning_rate": 2.84640171858217e-06, "loss": 0.0, "step": 11550 }, { "epoch": 0.7444093574788941, "grad_norm": 0.0028974444813782536, "learning_rate": 2.8456856426781243e-06, "loss": 0.0, "step": 11551 }, { "epoch": 0.7444738029258233, "grad_norm": 0.00011725375211290053, "learning_rate": 2.8449695667740786e-06, "loss": 0.0, "step": 11552 }, { "epoch": 0.7445382483727525, "grad_norm": 0.0003650860736984975, "learning_rate": 2.8442534908700325e-06, "loss": 0.0, "step": 11553 }, { "epoch": 0.7446026938196817, "grad_norm": 4.315019431511173e-05, "learning_rate": 2.843537414965987e-06, "loss": 0.0, "step": 11554 }, { "epoch": 0.7446671392666108, "grad_norm": 0.002278690880074027, "learning_rate": 2.8428213390619407e-06, "loss": 0.0, "step": 11555 }, { "epoch": 0.74473158471354, "grad_norm": 0.0001178197491432631, "learning_rate": 2.842105263157895e-06, "loss": 0.0, "step": 11556 }, { "epoch": 0.7447960301604691, "grad_norm": 0.003881942521512389, "learning_rate": 2.841389187253849e-06, "loss": 0.0, "step": 11557 }, { "epoch": 0.7448604756073983, "grad_norm": 0.0021296588472513025, "learning_rate": 2.840673111349803e-06, "loss": 0.0, "step": 11558 }, { "epoch": 0.7449249210543275, "grad_norm": 0.00046791558014023247, "learning_rate": 2.8399570354457575e-06, "loss": 0.0, "step": 11559 }, { "epoch": 0.7449893665012567, "grad_norm": 0.00020211463966043466, "learning_rate": 2.8392409595417113e-06, "loss": 0.0, "step": 11560 }, { "epoch": 0.7450538119481859, "grad_norm": 0.0003985674663379224, "learning_rate": 2.838524883637666e-06, "loss": 0.0, "step": 11561 }, { "epoch": 0.7451182573951151, "grad_norm": 0.33669691966511234, "learning_rate": 2.8378088077336204e-06, "loss": 0.0031, "step": 11562 }, { "epoch": 0.7451827028420442, "grad_norm": 0.012881644125098787, "learning_rate": 2.8370927318295742e-06, "loss": 0.0001, "step": 11563 }, { "epoch": 0.7452471482889734, "grad_norm": 0.0004095707305752869, "learning_rate": 2.8363766559255285e-06, "loss": 0.0, "step": 11564 }, { "epoch": 0.7453115937359026, "grad_norm": 0.0553894190092041, "learning_rate": 2.8356605800214824e-06, "loss": 0.0002, "step": 11565 }, { "epoch": 0.7453760391828317, "grad_norm": 0.0015646026292246813, "learning_rate": 2.8349445041174367e-06, "loss": 0.0, "step": 11566 }, { "epoch": 0.7454404846297609, "grad_norm": 0.0007490978582765871, "learning_rate": 2.834228428213391e-06, "loss": 0.0, "step": 11567 }, { "epoch": 0.74550493007669, "grad_norm": 0.0008107603181492814, "learning_rate": 2.833512352309345e-06, "loss": 0.0, "step": 11568 }, { "epoch": 0.7455693755236192, "grad_norm": 0.0005066625926412042, "learning_rate": 2.832796276405299e-06, "loss": 0.0, "step": 11569 }, { "epoch": 0.7456338209705484, "grad_norm": 0.0031491577925305417, "learning_rate": 2.832080200501253e-06, "loss": 0.0, "step": 11570 }, { "epoch": 0.7456982664174776, "grad_norm": 0.0008086278143267719, "learning_rate": 2.8313641245972074e-06, "loss": 0.0, "step": 11571 }, { "epoch": 0.7457627118644068, "grad_norm": 0.0003411885658767725, "learning_rate": 2.8306480486931616e-06, "loss": 0.0, "step": 11572 }, { "epoch": 0.745827157311336, "grad_norm": 0.0005964478571595986, "learning_rate": 2.829931972789116e-06, "loss": 0.0, "step": 11573 }, { "epoch": 0.7458916027582652, "grad_norm": 0.12147032343687166, "learning_rate": 2.8292158968850702e-06, "loss": 0.0002, "step": 11574 }, { "epoch": 0.7459560482051943, "grad_norm": 0.03803393591887575, "learning_rate": 2.8284998209810245e-06, "loss": 0.0016, "step": 11575 }, { "epoch": 0.7460204936521235, "grad_norm": 0.00016790024335527423, "learning_rate": 2.8277837450769784e-06, "loss": 0.0, "step": 11576 }, { "epoch": 0.7460849390990526, "grad_norm": 0.0024457187488064213, "learning_rate": 2.8270676691729327e-06, "loss": 0.0, "step": 11577 }, { "epoch": 0.7461493845459818, "grad_norm": 6.03264194184613e-05, "learning_rate": 2.8263515932688866e-06, "loss": 0.0, "step": 11578 }, { "epoch": 0.746213829992911, "grad_norm": 0.0009593868231441341, "learning_rate": 2.825635517364841e-06, "loss": 0.0, "step": 11579 }, { "epoch": 0.7462782754398402, "grad_norm": 0.00015227808015561272, "learning_rate": 2.824919441460795e-06, "loss": 0.0, "step": 11580 }, { "epoch": 0.7463427208867693, "grad_norm": 0.0005544944467892892, "learning_rate": 2.824203365556749e-06, "loss": 0.0, "step": 11581 }, { "epoch": 0.7464071663336985, "grad_norm": 0.14816729522203623, "learning_rate": 2.8234872896527034e-06, "loss": 0.0003, "step": 11582 }, { "epoch": 0.7464716117806277, "grad_norm": 0.12938729032013577, "learning_rate": 2.8227712137486572e-06, "loss": 0.0004, "step": 11583 }, { "epoch": 0.7465360572275569, "grad_norm": 0.0844716364187078, "learning_rate": 2.8220551378446115e-06, "loss": 0.0006, "step": 11584 }, { "epoch": 0.7466005026744861, "grad_norm": 0.002823985210489743, "learning_rate": 2.8213390619405663e-06, "loss": 0.0, "step": 11585 }, { "epoch": 0.7466649481214153, "grad_norm": 0.05066442807138474, "learning_rate": 2.82062298603652e-06, "loss": 0.0002, "step": 11586 }, { "epoch": 0.7467293935683444, "grad_norm": 0.004276016365310969, "learning_rate": 2.8199069101324744e-06, "loss": 0.0, "step": 11587 }, { "epoch": 0.7467938390152735, "grad_norm": 0.0028644723760939557, "learning_rate": 2.8191908342284287e-06, "loss": 0.0, "step": 11588 }, { "epoch": 0.7468582844622027, "grad_norm": 0.0002588828785945799, "learning_rate": 2.8184747583243826e-06, "loss": 0.0, "step": 11589 }, { "epoch": 0.7469227299091319, "grad_norm": 0.00039665747800869124, "learning_rate": 2.817758682420337e-06, "loss": 0.0, "step": 11590 }, { "epoch": 0.7469871753560611, "grad_norm": 0.0003482603860227377, "learning_rate": 2.8170426065162908e-06, "loss": 0.0, "step": 11591 }, { "epoch": 0.7470516208029903, "grad_norm": 0.0008336352241551569, "learning_rate": 2.816326530612245e-06, "loss": 0.0, "step": 11592 }, { "epoch": 0.7471160662499194, "grad_norm": 5.235047905893616e-05, "learning_rate": 2.815610454708199e-06, "loss": 0.0, "step": 11593 }, { "epoch": 0.7471805116968486, "grad_norm": 0.0014324225534880688, "learning_rate": 2.8148943788041533e-06, "loss": 0.0, "step": 11594 }, { "epoch": 0.7472449571437778, "grad_norm": 0.0017366095073440056, "learning_rate": 2.8141783029001076e-06, "loss": 0.0, "step": 11595 }, { "epoch": 0.747309402590707, "grad_norm": 0.0004592679869086307, "learning_rate": 2.8134622269960623e-06, "loss": 0.0, "step": 11596 }, { "epoch": 0.7473738480376362, "grad_norm": 0.0057053165657561505, "learning_rate": 2.812746151092016e-06, "loss": 0.0, "step": 11597 }, { "epoch": 0.7474382934845654, "grad_norm": 0.0006819042703925117, "learning_rate": 2.8120300751879705e-06, "loss": 0.0015, "step": 11598 }, { "epoch": 0.7475027389314944, "grad_norm": 0.6504672810054322, "learning_rate": 2.8113139992839243e-06, "loss": 0.002, "step": 11599 }, { "epoch": 0.7475671843784236, "grad_norm": 0.00035804196067088125, "learning_rate": 2.8105979233798786e-06, "loss": 0.0, "step": 11600 }, { "epoch": 0.7476316298253528, "grad_norm": 0.0016188882058790126, "learning_rate": 2.8098818474758325e-06, "loss": 0.0, "step": 11601 }, { "epoch": 0.747696075272282, "grad_norm": 0.00036099919084685566, "learning_rate": 2.809165771571787e-06, "loss": 0.0, "step": 11602 }, { "epoch": 0.7477605207192112, "grad_norm": 0.0029460432117195917, "learning_rate": 2.808449695667741e-06, "loss": 0.0, "step": 11603 }, { "epoch": 0.7478249661661404, "grad_norm": 0.00011651976692317094, "learning_rate": 2.807733619763695e-06, "loss": 0.0, "step": 11604 }, { "epoch": 0.7478894116130695, "grad_norm": 0.0025859682305249014, "learning_rate": 2.8070175438596493e-06, "loss": 0.0, "step": 11605 }, { "epoch": 0.7479538570599987, "grad_norm": 4.958741962410607e-05, "learning_rate": 2.806301467955603e-06, "loss": 0.0, "step": 11606 }, { "epoch": 0.7480183025069279, "grad_norm": 0.0029761885102953744, "learning_rate": 2.8055853920515574e-06, "loss": 0.0, "step": 11607 }, { "epoch": 0.7480827479538571, "grad_norm": 0.0008891637672764099, "learning_rate": 2.804869316147512e-06, "loss": 0.0, "step": 11608 }, { "epoch": 0.7481471934007863, "grad_norm": 2.754389762637177e-05, "learning_rate": 2.804153240243466e-06, "loss": 0.0, "step": 11609 }, { "epoch": 0.7482116388477154, "grad_norm": 0.006316527622726746, "learning_rate": 2.8034371643394203e-06, "loss": 0.0, "step": 11610 }, { "epoch": 0.7482760842946445, "grad_norm": 0.02277983004237064, "learning_rate": 2.8027210884353746e-06, "loss": 0.0001, "step": 11611 }, { "epoch": 0.7483405297415737, "grad_norm": 5.643851932003774e-05, "learning_rate": 2.8020050125313285e-06, "loss": 0.0, "step": 11612 }, { "epoch": 0.7484049751885029, "grad_norm": 0.00037393311065744746, "learning_rate": 2.801288936627283e-06, "loss": 0.0, "step": 11613 }, { "epoch": 0.7484694206354321, "grad_norm": 0.002534550843795288, "learning_rate": 2.8005728607232367e-06, "loss": 0.0, "step": 11614 }, { "epoch": 0.7485338660823613, "grad_norm": 0.024649051818809553, "learning_rate": 2.799856784819191e-06, "loss": 0.0002, "step": 11615 }, { "epoch": 0.7485983115292905, "grad_norm": 0.041082903201940656, "learning_rate": 2.7991407089151453e-06, "loss": 0.0002, "step": 11616 }, { "epoch": 0.7486627569762196, "grad_norm": 0.1127003747936899, "learning_rate": 2.798424633011099e-06, "loss": 0.0002, "step": 11617 }, { "epoch": 0.7487272024231488, "grad_norm": 0.0001859838946770348, "learning_rate": 2.7977085571070535e-06, "loss": 0.0, "step": 11618 }, { "epoch": 0.748791647870078, "grad_norm": 5.065890990128626e-05, "learning_rate": 2.7969924812030073e-06, "loss": 0.0, "step": 11619 }, { "epoch": 0.7488560933170072, "grad_norm": 9.265103468572512e-05, "learning_rate": 2.796276405298962e-06, "loss": 0.0, "step": 11620 }, { "epoch": 0.7489205387639364, "grad_norm": 0.0008966920858239456, "learning_rate": 2.7955603293949164e-06, "loss": 0.0, "step": 11621 }, { "epoch": 0.7489849842108655, "grad_norm": 0.00012183848043519522, "learning_rate": 2.7948442534908702e-06, "loss": 0.0, "step": 11622 }, { "epoch": 0.7490494296577946, "grad_norm": 0.0016598313811174246, "learning_rate": 2.7941281775868245e-06, "loss": 0.0, "step": 11623 }, { "epoch": 0.7491138751047238, "grad_norm": 1.1238977335514882e-05, "learning_rate": 2.793412101682779e-06, "loss": 0.0, "step": 11624 }, { "epoch": 0.749178320551653, "grad_norm": 0.00034594887045504333, "learning_rate": 2.7926960257787327e-06, "loss": 0.0, "step": 11625 }, { "epoch": 0.7492427659985822, "grad_norm": 0.04615189947607823, "learning_rate": 2.791979949874687e-06, "loss": 0.0001, "step": 11626 }, { "epoch": 0.7493072114455114, "grad_norm": 0.0002986722078818211, "learning_rate": 2.791263873970641e-06, "loss": 0.0, "step": 11627 }, { "epoch": 0.7493716568924406, "grad_norm": 0.005092092729537614, "learning_rate": 2.790547798066595e-06, "loss": 0.0, "step": 11628 }, { "epoch": 0.7494361023393697, "grad_norm": 0.1200249868569926, "learning_rate": 2.7898317221625495e-06, "loss": 0.0002, "step": 11629 }, { "epoch": 0.7495005477862989, "grad_norm": 0.0049599157186567015, "learning_rate": 2.7891156462585034e-06, "loss": 0.0, "step": 11630 }, { "epoch": 0.7495649932332281, "grad_norm": 8.649864165149563e-05, "learning_rate": 2.788399570354458e-06, "loss": 0.0, "step": 11631 }, { "epoch": 0.7496294386801573, "grad_norm": 0.0016241807887423438, "learning_rate": 2.7876834944504124e-06, "loss": 0.0, "step": 11632 }, { "epoch": 0.7496938841270864, "grad_norm": 0.000766953844427467, "learning_rate": 2.7869674185463662e-06, "loss": 0.0, "step": 11633 }, { "epoch": 0.7497583295740156, "grad_norm": 0.00042285343101394534, "learning_rate": 2.7862513426423205e-06, "loss": 0.0, "step": 11634 }, { "epoch": 0.7498227750209447, "grad_norm": 0.000205183922965492, "learning_rate": 2.7855352667382744e-06, "loss": 0.0, "step": 11635 }, { "epoch": 0.7498872204678739, "grad_norm": 5.591482784916908e-05, "learning_rate": 2.7848191908342287e-06, "loss": 0.0, "step": 11636 }, { "epoch": 0.7499516659148031, "grad_norm": 0.0005972326634359615, "learning_rate": 2.784103114930183e-06, "loss": 0.0, "step": 11637 }, { "epoch": 0.7500161113617323, "grad_norm": 0.08351007143777367, "learning_rate": 2.783387039026137e-06, "loss": 0.0016, "step": 11638 }, { "epoch": 0.7500805568086615, "grad_norm": 1.1101820253498027, "learning_rate": 2.782670963122091e-06, "loss": 0.0085, "step": 11639 }, { "epoch": 0.7501450022555907, "grad_norm": 4.642928998663188e-05, "learning_rate": 2.781954887218045e-06, "loss": 0.0, "step": 11640 }, { "epoch": 0.7502094477025198, "grad_norm": 0.003023120807805911, "learning_rate": 2.7812388113139994e-06, "loss": 0.0, "step": 11641 }, { "epoch": 0.750273893149449, "grad_norm": 0.00029685534288482725, "learning_rate": 2.7805227354099532e-06, "loss": 0.0, "step": 11642 }, { "epoch": 0.7503383385963782, "grad_norm": 0.00021844772164633, "learning_rate": 2.779806659505908e-06, "loss": 0.0, "step": 11643 }, { "epoch": 0.7504027840433073, "grad_norm": 0.00017051293991199247, "learning_rate": 2.7790905836018623e-06, "loss": 0.0, "step": 11644 }, { "epoch": 0.7504672294902365, "grad_norm": 0.0012412810367730285, "learning_rate": 2.7783745076978166e-06, "loss": 0.0, "step": 11645 }, { "epoch": 0.7505316749371657, "grad_norm": 9.528078393517384e-05, "learning_rate": 2.7776584317937704e-06, "loss": 0.0, "step": 11646 }, { "epoch": 0.7505961203840948, "grad_norm": 0.0011691385906994708, "learning_rate": 2.7769423558897247e-06, "loss": 0.0, "step": 11647 }, { "epoch": 0.750660565831024, "grad_norm": 0.00023046299115114463, "learning_rate": 2.7762262799856786e-06, "loss": 0.0, "step": 11648 }, { "epoch": 0.7507250112779532, "grad_norm": 0.0002655424185747253, "learning_rate": 2.775510204081633e-06, "loss": 0.0, "step": 11649 }, { "epoch": 0.7507894567248824, "grad_norm": 0.19039229979656985, "learning_rate": 2.7747941281775868e-06, "loss": 0.0004, "step": 11650 }, { "epoch": 0.7508539021718116, "grad_norm": 0.0019108833699937504, "learning_rate": 2.774078052273541e-06, "loss": 0.0, "step": 11651 }, { "epoch": 0.7509183476187408, "grad_norm": 0.0007730187024007991, "learning_rate": 2.7733619763694954e-06, "loss": 0.0, "step": 11652 }, { "epoch": 0.75098279306567, "grad_norm": 0.003987731423439629, "learning_rate": 2.7726459004654493e-06, "loss": 0.0, "step": 11653 }, { "epoch": 0.7510472385125991, "grad_norm": 0.000375934201446267, "learning_rate": 2.7719298245614036e-06, "loss": 0.0, "step": 11654 }, { "epoch": 0.7511116839595282, "grad_norm": 0.31494069653808116, "learning_rate": 2.7712137486573583e-06, "loss": 0.0014, "step": 11655 }, { "epoch": 0.7511761294064574, "grad_norm": 0.005691559960223658, "learning_rate": 2.770497672753312e-06, "loss": 0.0001, "step": 11656 }, { "epoch": 0.7512405748533866, "grad_norm": 0.0004438557435561563, "learning_rate": 2.7697815968492665e-06, "loss": 0.0, "step": 11657 }, { "epoch": 0.7513050203003158, "grad_norm": 0.05629256404691263, "learning_rate": 2.7690655209452203e-06, "loss": 0.0001, "step": 11658 }, { "epoch": 0.751369465747245, "grad_norm": 0.00039722302627628154, "learning_rate": 2.7683494450411746e-06, "loss": 0.0, "step": 11659 }, { "epoch": 0.7514339111941741, "grad_norm": 0.025686982877658825, "learning_rate": 2.767633369137129e-06, "loss": 0.0002, "step": 11660 }, { "epoch": 0.7514983566411033, "grad_norm": 0.0007727245090655069, "learning_rate": 2.766917293233083e-06, "loss": 0.0, "step": 11661 }, { "epoch": 0.7515628020880325, "grad_norm": 0.000900820580803089, "learning_rate": 2.766201217329037e-06, "loss": 0.0, "step": 11662 }, { "epoch": 0.7516272475349617, "grad_norm": 0.00027621965420460257, "learning_rate": 2.765485141424991e-06, "loss": 0.0, "step": 11663 }, { "epoch": 0.7516916929818909, "grad_norm": 0.0007073996963652853, "learning_rate": 2.7647690655209453e-06, "loss": 0.0, "step": 11664 }, { "epoch": 0.75175613842882, "grad_norm": 0.0006071636553376335, "learning_rate": 2.7640529896168996e-06, "loss": 0.0, "step": 11665 }, { "epoch": 0.7518205838757491, "grad_norm": 0.012462280848083922, "learning_rate": 2.763336913712854e-06, "loss": 0.0001, "step": 11666 }, { "epoch": 0.7518850293226783, "grad_norm": 0.00018560450923334242, "learning_rate": 2.762620837808808e-06, "loss": 0.0, "step": 11667 }, { "epoch": 0.7519494747696075, "grad_norm": 0.0006644943512278006, "learning_rate": 2.7619047619047625e-06, "loss": 0.0, "step": 11668 }, { "epoch": 0.7520139202165367, "grad_norm": 0.0020807138950169435, "learning_rate": 2.7611886860007163e-06, "loss": 0.0, "step": 11669 }, { "epoch": 0.7520783656634659, "grad_norm": 5.37856004501817e-05, "learning_rate": 2.7604726100966706e-06, "loss": 0.0, "step": 11670 }, { "epoch": 0.752142811110395, "grad_norm": 0.0005517811580377726, "learning_rate": 2.7597565341926245e-06, "loss": 0.0, "step": 11671 }, { "epoch": 0.7522072565573242, "grad_norm": 0.018654850971019456, "learning_rate": 2.759040458288579e-06, "loss": 0.0002, "step": 11672 }, { "epoch": 0.7522717020042534, "grad_norm": 0.0015801575484079477, "learning_rate": 2.758324382384533e-06, "loss": 0.0, "step": 11673 }, { "epoch": 0.7523361474511826, "grad_norm": 0.004039334597610442, "learning_rate": 2.757608306480487e-06, "loss": 0.0, "step": 11674 }, { "epoch": 0.7524005928981118, "grad_norm": 0.06624770873534068, "learning_rate": 2.7568922305764413e-06, "loss": 0.0005, "step": 11675 }, { "epoch": 0.752465038345041, "grad_norm": 0.0010852293084164862, "learning_rate": 2.756176154672395e-06, "loss": 0.0, "step": 11676 }, { "epoch": 0.75252948379197, "grad_norm": 0.0002506029780493605, "learning_rate": 2.7554600787683495e-06, "loss": 0.0, "step": 11677 }, { "epoch": 0.7525939292388992, "grad_norm": 0.0009374336268016534, "learning_rate": 2.754744002864304e-06, "loss": 0.0, "step": 11678 }, { "epoch": 0.7526583746858284, "grad_norm": 8.921424638537497e-05, "learning_rate": 2.754027926960258e-06, "loss": 0.0, "step": 11679 }, { "epoch": 0.7527228201327576, "grad_norm": 0.0015448375941823727, "learning_rate": 2.7533118510562124e-06, "loss": 0.0, "step": 11680 }, { "epoch": 0.7527872655796868, "grad_norm": 0.0010091009396157359, "learning_rate": 2.7525957751521667e-06, "loss": 0.0, "step": 11681 }, { "epoch": 0.752851711026616, "grad_norm": 0.0004459042079651145, "learning_rate": 2.7518796992481205e-06, "loss": 0.0, "step": 11682 }, { "epoch": 0.7529161564735452, "grad_norm": 0.01627399213125485, "learning_rate": 2.751163623344075e-06, "loss": 0.0, "step": 11683 }, { "epoch": 0.7529806019204743, "grad_norm": 0.013965691610753837, "learning_rate": 2.7504475474400287e-06, "loss": 0.0001, "step": 11684 }, { "epoch": 0.7530450473674035, "grad_norm": 0.0001156550801431203, "learning_rate": 2.749731471535983e-06, "loss": 0.0, "step": 11685 }, { "epoch": 0.7531094928143327, "grad_norm": 0.3935840113011604, "learning_rate": 2.7490153956319373e-06, "loss": 0.0004, "step": 11686 }, { "epoch": 0.7531739382612619, "grad_norm": 0.0007292888015694302, "learning_rate": 2.748299319727891e-06, "loss": 0.0, "step": 11687 }, { "epoch": 0.753238383708191, "grad_norm": 0.00018506515606916054, "learning_rate": 2.7475832438238455e-06, "loss": 0.0, "step": 11688 }, { "epoch": 0.7533028291551201, "grad_norm": 0.0002825209862627467, "learning_rate": 2.7468671679197994e-06, "loss": 0.0, "step": 11689 }, { "epoch": 0.7533672746020493, "grad_norm": 0.0049305875242178976, "learning_rate": 2.746151092015754e-06, "loss": 0.0, "step": 11690 }, { "epoch": 0.7534317200489785, "grad_norm": 0.0003643963192400323, "learning_rate": 2.7454350161117084e-06, "loss": 0.0, "step": 11691 }, { "epoch": 0.7534961654959077, "grad_norm": 0.0035886233576998884, "learning_rate": 2.7447189402076622e-06, "loss": 0.0, "step": 11692 }, { "epoch": 0.7535606109428369, "grad_norm": 0.03080716919407748, "learning_rate": 2.7440028643036165e-06, "loss": 0.0001, "step": 11693 }, { "epoch": 0.7536250563897661, "grad_norm": 0.00470493868439932, "learning_rate": 2.743286788399571e-06, "loss": 0.0, "step": 11694 }, { "epoch": 0.7536895018366953, "grad_norm": 0.00019118115247683613, "learning_rate": 2.7425707124955247e-06, "loss": 0.0, "step": 11695 }, { "epoch": 0.7537539472836244, "grad_norm": 5.5976844224649946e-05, "learning_rate": 2.741854636591479e-06, "loss": 0.0, "step": 11696 }, { "epoch": 0.7538183927305536, "grad_norm": 0.0237676543025234, "learning_rate": 2.741138560687433e-06, "loss": 0.0, "step": 11697 }, { "epoch": 0.7538828381774828, "grad_norm": 0.38799647013666066, "learning_rate": 2.740422484783387e-06, "loss": 0.0029, "step": 11698 }, { "epoch": 0.753947283624412, "grad_norm": 0.003417863242828486, "learning_rate": 2.739706408879341e-06, "loss": 0.0, "step": 11699 }, { "epoch": 0.7540117290713411, "grad_norm": 3.090706308481155e-05, "learning_rate": 2.7389903329752954e-06, "loss": 0.0, "step": 11700 }, { "epoch": 0.7540761745182702, "grad_norm": 6.625108551354473e-05, "learning_rate": 2.73827425707125e-06, "loss": 0.0, "step": 11701 }, { "epoch": 0.7541406199651994, "grad_norm": 0.00021590261960305907, "learning_rate": 2.7375581811672044e-06, "loss": 0.0, "step": 11702 }, { "epoch": 0.7542050654121286, "grad_norm": 0.0006234648806642372, "learning_rate": 2.7368421052631583e-06, "loss": 0.0, "step": 11703 }, { "epoch": 0.7542695108590578, "grad_norm": 0.0071332380691034, "learning_rate": 2.7361260293591126e-06, "loss": 0.0, "step": 11704 }, { "epoch": 0.754333956305987, "grad_norm": 1.2562028050453755, "learning_rate": 2.7354099534550664e-06, "loss": 0.0093, "step": 11705 }, { "epoch": 0.7543984017529162, "grad_norm": 0.0029726964662291043, "learning_rate": 2.7346938775510207e-06, "loss": 0.0, "step": 11706 }, { "epoch": 0.7544628471998454, "grad_norm": 0.014132551918677235, "learning_rate": 2.7339778016469746e-06, "loss": 0.0002, "step": 11707 }, { "epoch": 0.7545272926467745, "grad_norm": 0.001203151290063077, "learning_rate": 2.733261725742929e-06, "loss": 0.0, "step": 11708 }, { "epoch": 0.7545917380937037, "grad_norm": 0.009634512422446848, "learning_rate": 2.732545649838883e-06, "loss": 0.0001, "step": 11709 }, { "epoch": 0.7546561835406329, "grad_norm": 0.16926328691142287, "learning_rate": 2.731829573934837e-06, "loss": 0.0003, "step": 11710 }, { "epoch": 0.754720628987562, "grad_norm": 0.021506676559323645, "learning_rate": 2.7311134980307914e-06, "loss": 0.0001, "step": 11711 }, { "epoch": 0.7547850744344912, "grad_norm": 0.12201709350159476, "learning_rate": 2.7303974221267453e-06, "loss": 0.0001, "step": 11712 }, { "epoch": 0.7548495198814203, "grad_norm": 0.010025205131539256, "learning_rate": 2.7296813462227e-06, "loss": 0.0001, "step": 11713 }, { "epoch": 0.7549139653283495, "grad_norm": 0.00034131962690424583, "learning_rate": 2.7289652703186543e-06, "loss": 0.0, "step": 11714 }, { "epoch": 0.7549784107752787, "grad_norm": 0.0005576713005746142, "learning_rate": 2.728249194414608e-06, "loss": 0.0, "step": 11715 }, { "epoch": 0.7550428562222079, "grad_norm": 0.00021151105237174136, "learning_rate": 2.7275331185105625e-06, "loss": 0.0, "step": 11716 }, { "epoch": 0.7551073016691371, "grad_norm": 0.0005511650236481126, "learning_rate": 2.7268170426065167e-06, "loss": 0.0, "step": 11717 }, { "epoch": 0.7551717471160663, "grad_norm": 0.0005032934039541653, "learning_rate": 2.7261009667024706e-06, "loss": 0.0, "step": 11718 }, { "epoch": 0.7552361925629955, "grad_norm": 0.004268815835783067, "learning_rate": 2.725384890798425e-06, "loss": 0.0, "step": 11719 }, { "epoch": 0.7553006380099246, "grad_norm": 0.029657345675551724, "learning_rate": 2.724668814894379e-06, "loss": 0.0001, "step": 11720 }, { "epoch": 0.7553650834568538, "grad_norm": 0.00019803287793178183, "learning_rate": 2.723952738990333e-06, "loss": 0.0, "step": 11721 }, { "epoch": 0.7554295289037829, "grad_norm": 0.017483873308816737, "learning_rate": 2.7232366630862874e-06, "loss": 0.0001, "step": 11722 }, { "epoch": 0.7554939743507121, "grad_norm": 0.0002098614959974441, "learning_rate": 2.7225205871822413e-06, "loss": 0.0, "step": 11723 }, { "epoch": 0.7555584197976413, "grad_norm": 0.0066547328197811, "learning_rate": 2.7218045112781956e-06, "loss": 0.0, "step": 11724 }, { "epoch": 0.7556228652445705, "grad_norm": 0.00320167998285512, "learning_rate": 2.7210884353741503e-06, "loss": 0.0, "step": 11725 }, { "epoch": 0.7556873106914996, "grad_norm": 0.00010177039287106444, "learning_rate": 2.720372359470104e-06, "loss": 0.0, "step": 11726 }, { "epoch": 0.7557517561384288, "grad_norm": 0.00154407389412439, "learning_rate": 2.7196562835660585e-06, "loss": 0.0, "step": 11727 }, { "epoch": 0.755816201585358, "grad_norm": 0.0033565564377196987, "learning_rate": 2.7189402076620123e-06, "loss": 0.0, "step": 11728 }, { "epoch": 0.7558806470322872, "grad_norm": 0.1146090764871986, "learning_rate": 2.7182241317579666e-06, "loss": 0.0001, "step": 11729 }, { "epoch": 0.7559450924792164, "grad_norm": 0.0142495845011723, "learning_rate": 2.717508055853921e-06, "loss": 0.0, "step": 11730 }, { "epoch": 0.7560095379261456, "grad_norm": 0.028604510943794178, "learning_rate": 2.716791979949875e-06, "loss": 0.0, "step": 11731 }, { "epoch": 0.7560739833730747, "grad_norm": 0.0015702490887233036, "learning_rate": 2.716075904045829e-06, "loss": 0.0, "step": 11732 }, { "epoch": 0.7561384288200038, "grad_norm": 0.0037016426026286998, "learning_rate": 2.715359828141783e-06, "loss": 0.0, "step": 11733 }, { "epoch": 0.756202874266933, "grad_norm": 0.05622580946870829, "learning_rate": 2.7146437522377373e-06, "loss": 0.0016, "step": 11734 }, { "epoch": 0.7562673197138622, "grad_norm": 0.016572480640073047, "learning_rate": 2.713927676333691e-06, "loss": 0.0001, "step": 11735 }, { "epoch": 0.7563317651607914, "grad_norm": 0.011137781346677419, "learning_rate": 2.7132116004296455e-06, "loss": 0.0, "step": 11736 }, { "epoch": 0.7563962106077206, "grad_norm": 0.05387597953175312, "learning_rate": 2.7124955245256e-06, "loss": 0.0017, "step": 11737 }, { "epoch": 0.7564606560546497, "grad_norm": 0.007463667476841625, "learning_rate": 2.7117794486215545e-06, "loss": 0.0, "step": 11738 }, { "epoch": 0.7565251015015789, "grad_norm": 0.006396773303127624, "learning_rate": 2.7110633727175084e-06, "loss": 0.0, "step": 11739 }, { "epoch": 0.7565895469485081, "grad_norm": 0.21658050634270629, "learning_rate": 2.7103472968134627e-06, "loss": 0.0003, "step": 11740 }, { "epoch": 0.7566539923954373, "grad_norm": 0.009636629385455547, "learning_rate": 2.7096312209094165e-06, "loss": 0.0, "step": 11741 }, { "epoch": 0.7567184378423665, "grad_norm": 0.003825152018230289, "learning_rate": 2.708915145005371e-06, "loss": 0.0, "step": 11742 }, { "epoch": 0.7567828832892957, "grad_norm": 0.003434779293330581, "learning_rate": 2.7081990691013247e-06, "loss": 0.0, "step": 11743 }, { "epoch": 0.7568473287362247, "grad_norm": 0.005043666369474453, "learning_rate": 2.707482993197279e-06, "loss": 0.0, "step": 11744 }, { "epoch": 0.7569117741831539, "grad_norm": 0.20329336378052815, "learning_rate": 2.7067669172932333e-06, "loss": 0.0007, "step": 11745 }, { "epoch": 0.7569762196300831, "grad_norm": 0.08628995642796942, "learning_rate": 2.706050841389187e-06, "loss": 0.0001, "step": 11746 }, { "epoch": 0.7570406650770123, "grad_norm": 0.0038641870067376086, "learning_rate": 2.7053347654851415e-06, "loss": 0.0, "step": 11747 }, { "epoch": 0.7571051105239415, "grad_norm": 0.00018371592263419093, "learning_rate": 2.704618689581096e-06, "loss": 0.0, "step": 11748 }, { "epoch": 0.7571695559708707, "grad_norm": 0.15424518081402985, "learning_rate": 2.70390261367705e-06, "loss": 0.002, "step": 11749 }, { "epoch": 0.7572340014177998, "grad_norm": 0.013526881003292895, "learning_rate": 2.7031865377730044e-06, "loss": 0.0, "step": 11750 }, { "epoch": 0.757298446864729, "grad_norm": 0.12452717105498468, "learning_rate": 2.7024704618689582e-06, "loss": 0.0019, "step": 11751 }, { "epoch": 0.7573628923116582, "grad_norm": 0.003727796039911149, "learning_rate": 2.7017543859649125e-06, "loss": 0.0, "step": 11752 }, { "epoch": 0.7574273377585874, "grad_norm": 0.0013507419381989824, "learning_rate": 2.701038310060867e-06, "loss": 0.0, "step": 11753 }, { "epoch": 0.7574917832055166, "grad_norm": 0.003996855090206607, "learning_rate": 2.7003222341568207e-06, "loss": 0.0, "step": 11754 }, { "epoch": 0.7575562286524457, "grad_norm": 0.1571776864987052, "learning_rate": 2.699606158252775e-06, "loss": 0.0003, "step": 11755 }, { "epoch": 0.7576206740993748, "grad_norm": 0.012616492852407405, "learning_rate": 2.698890082348729e-06, "loss": 0.0, "step": 11756 }, { "epoch": 0.757685119546304, "grad_norm": 0.0265738536170251, "learning_rate": 2.698174006444683e-06, "loss": 0.0, "step": 11757 }, { "epoch": 0.7577495649932332, "grad_norm": 0.001051202791457292, "learning_rate": 2.6974579305406375e-06, "loss": 0.0, "step": 11758 }, { "epoch": 0.7578140104401624, "grad_norm": 0.009863241518408, "learning_rate": 2.6967418546365914e-06, "loss": 0.0001, "step": 11759 }, { "epoch": 0.7578784558870916, "grad_norm": 0.00017777405807486947, "learning_rate": 2.696025778732546e-06, "loss": 0.0, "step": 11760 }, { "epoch": 0.7579429013340208, "grad_norm": 0.0215673841155166, "learning_rate": 2.6953097028285004e-06, "loss": 0.0, "step": 11761 }, { "epoch": 0.7580073467809499, "grad_norm": 0.00022594751348228815, "learning_rate": 2.6945936269244543e-06, "loss": 0.0, "step": 11762 }, { "epoch": 0.7580717922278791, "grad_norm": 0.03331891898553955, "learning_rate": 2.6938775510204086e-06, "loss": 0.0001, "step": 11763 }, { "epoch": 0.7581362376748083, "grad_norm": 0.001087133402002619, "learning_rate": 2.6931614751163624e-06, "loss": 0.0, "step": 11764 }, { "epoch": 0.7582006831217375, "grad_norm": 1.1937595826408476, "learning_rate": 2.6924453992123167e-06, "loss": 0.0228, "step": 11765 }, { "epoch": 0.7582651285686666, "grad_norm": 2.6517257757050547, "learning_rate": 2.691729323308271e-06, "loss": 0.0089, "step": 11766 }, { "epoch": 0.7583295740155958, "grad_norm": 0.0009098672745745888, "learning_rate": 2.691013247404225e-06, "loss": 0.0, "step": 11767 }, { "epoch": 0.7583940194625249, "grad_norm": 0.0005929477012532038, "learning_rate": 2.690297171500179e-06, "loss": 0.0, "step": 11768 }, { "epoch": 0.7584584649094541, "grad_norm": 0.04149749930801474, "learning_rate": 2.689581095596133e-06, "loss": 0.0, "step": 11769 }, { "epoch": 0.7585229103563833, "grad_norm": 0.0042939977993353496, "learning_rate": 2.6888650196920874e-06, "loss": 0.0, "step": 11770 }, { "epoch": 0.7585873558033125, "grad_norm": 0.000777963915366081, "learning_rate": 2.6881489437880417e-06, "loss": 0.0, "step": 11771 }, { "epoch": 0.7586518012502417, "grad_norm": 0.0033128294424841676, "learning_rate": 2.687432867883996e-06, "loss": 0.0, "step": 11772 }, { "epoch": 0.7587162466971709, "grad_norm": 0.0009968778887012326, "learning_rate": 2.6867167919799503e-06, "loss": 0.0, "step": 11773 }, { "epoch": 0.7587806921441, "grad_norm": 0.008245827496948636, "learning_rate": 2.6860007160759046e-06, "loss": 0.0001, "step": 11774 }, { "epoch": 0.7588451375910292, "grad_norm": 0.006805220256750272, "learning_rate": 2.6852846401718585e-06, "loss": 0.0001, "step": 11775 }, { "epoch": 0.7589095830379584, "grad_norm": 0.006761178031938303, "learning_rate": 2.6845685642678127e-06, "loss": 0.0, "step": 11776 }, { "epoch": 0.7589740284848876, "grad_norm": 0.24914090247152243, "learning_rate": 2.6838524883637666e-06, "loss": 0.0019, "step": 11777 }, { "epoch": 0.7590384739318167, "grad_norm": 0.0001379510648379672, "learning_rate": 2.683136412459721e-06, "loss": 0.0, "step": 11778 }, { "epoch": 0.7591029193787459, "grad_norm": 0.49096497583264964, "learning_rate": 2.6824203365556752e-06, "loss": 0.0012, "step": 11779 }, { "epoch": 0.759167364825675, "grad_norm": 0.010198572628584911, "learning_rate": 2.681704260651629e-06, "loss": 0.0, "step": 11780 }, { "epoch": 0.7592318102726042, "grad_norm": 0.02135744720818544, "learning_rate": 2.6809881847475834e-06, "loss": 0.0, "step": 11781 }, { "epoch": 0.7592962557195334, "grad_norm": 0.0610014097381115, "learning_rate": 2.6802721088435373e-06, "loss": 0.0001, "step": 11782 }, { "epoch": 0.7593607011664626, "grad_norm": 0.003880145932573114, "learning_rate": 2.679556032939492e-06, "loss": 0.0, "step": 11783 }, { "epoch": 0.7594251466133918, "grad_norm": 0.00020370923103518696, "learning_rate": 2.6788399570354463e-06, "loss": 0.0, "step": 11784 }, { "epoch": 0.759489592060321, "grad_norm": 0.0014196499306884838, "learning_rate": 2.6781238811314e-06, "loss": 0.0, "step": 11785 }, { "epoch": 0.7595540375072501, "grad_norm": 0.0004107186318848994, "learning_rate": 2.6774078052273545e-06, "loss": 0.0, "step": 11786 }, { "epoch": 0.7596184829541793, "grad_norm": 0.004399499841082977, "learning_rate": 2.6766917293233088e-06, "loss": 0.0, "step": 11787 }, { "epoch": 0.7596829284011085, "grad_norm": 0.043867588185750386, "learning_rate": 2.6759756534192626e-06, "loss": 0.0004, "step": 11788 }, { "epoch": 0.7597473738480376, "grad_norm": 0.0009829067833408381, "learning_rate": 2.675259577515217e-06, "loss": 0.0, "step": 11789 }, { "epoch": 0.7598118192949668, "grad_norm": 0.23802523069820278, "learning_rate": 2.674543501611171e-06, "loss": 0.001, "step": 11790 }, { "epoch": 0.759876264741896, "grad_norm": 0.0033577163553118712, "learning_rate": 2.673827425707125e-06, "loss": 0.0, "step": 11791 }, { "epoch": 0.7599407101888251, "grad_norm": 0.02319084706000951, "learning_rate": 2.673111349803079e-06, "loss": 0.0001, "step": 11792 }, { "epoch": 0.7600051556357543, "grad_norm": 0.056277099181222555, "learning_rate": 2.6723952738990333e-06, "loss": 0.0001, "step": 11793 }, { "epoch": 0.7600696010826835, "grad_norm": 0.00015177896495238968, "learning_rate": 2.6716791979949876e-06, "loss": 0.0, "step": 11794 }, { "epoch": 0.7601340465296127, "grad_norm": 0.0015311593093557537, "learning_rate": 2.6709631220909423e-06, "loss": 0.0, "step": 11795 }, { "epoch": 0.7601984919765419, "grad_norm": 0.021320862762892245, "learning_rate": 2.670247046186896e-06, "loss": 0.0, "step": 11796 }, { "epoch": 0.7602629374234711, "grad_norm": 0.031283424056328626, "learning_rate": 2.6695309702828505e-06, "loss": 0.0001, "step": 11797 }, { "epoch": 0.7603273828704002, "grad_norm": 0.23894887948035884, "learning_rate": 2.6688148943788044e-06, "loss": 0.0002, "step": 11798 }, { "epoch": 0.7603918283173294, "grad_norm": 0.04260379057087094, "learning_rate": 2.6680988184747587e-06, "loss": 0.0003, "step": 11799 }, { "epoch": 0.7604562737642585, "grad_norm": 9.578836850924028e-05, "learning_rate": 2.6673827425707125e-06, "loss": 0.0, "step": 11800 }, { "epoch": 0.7605207192111877, "grad_norm": 0.006114623869051579, "learning_rate": 2.666666666666667e-06, "loss": 0.0, "step": 11801 }, { "epoch": 0.7605851646581169, "grad_norm": 0.0006305918796649802, "learning_rate": 2.665950590762621e-06, "loss": 0.0, "step": 11802 }, { "epoch": 0.7606496101050461, "grad_norm": 0.0018584589838389813, "learning_rate": 2.665234514858575e-06, "loss": 0.0, "step": 11803 }, { "epoch": 0.7607140555519752, "grad_norm": 0.0934540428233906, "learning_rate": 2.6645184389545293e-06, "loss": 0.0003, "step": 11804 }, { "epoch": 0.7607785009989044, "grad_norm": 0.017028167671529763, "learning_rate": 2.663802363050483e-06, "loss": 0.0, "step": 11805 }, { "epoch": 0.7608429464458336, "grad_norm": 0.3192495760082157, "learning_rate": 2.6630862871464375e-06, "loss": 0.004, "step": 11806 }, { "epoch": 0.7609073918927628, "grad_norm": 0.000859402811223285, "learning_rate": 2.662370211242392e-06, "loss": 0.0, "step": 11807 }, { "epoch": 0.760971837339692, "grad_norm": 0.001708086676881105, "learning_rate": 2.661654135338346e-06, "loss": 0.0, "step": 11808 }, { "epoch": 0.7610362827866212, "grad_norm": 0.0002548697259070124, "learning_rate": 2.6609380594343004e-06, "loss": 0.0, "step": 11809 }, { "epoch": 0.7611007282335503, "grad_norm": 0.032245765729999896, "learning_rate": 2.6602219835302547e-06, "loss": 0.0001, "step": 11810 }, { "epoch": 0.7611651736804794, "grad_norm": 0.07445310934893193, "learning_rate": 2.6595059076262085e-06, "loss": 0.0001, "step": 11811 }, { "epoch": 0.7612296191274086, "grad_norm": 0.17762862192819528, "learning_rate": 2.658789831722163e-06, "loss": 0.0005, "step": 11812 }, { "epoch": 0.7612940645743378, "grad_norm": 0.011117645753348048, "learning_rate": 2.6580737558181167e-06, "loss": 0.0001, "step": 11813 }, { "epoch": 0.761358510021267, "grad_norm": 0.34508473103897397, "learning_rate": 2.657357679914071e-06, "loss": 0.0005, "step": 11814 }, { "epoch": 0.7614229554681962, "grad_norm": 0.1738423116659773, "learning_rate": 2.6566416040100253e-06, "loss": 0.0006, "step": 11815 }, { "epoch": 0.7614874009151253, "grad_norm": 0.005119695379919854, "learning_rate": 2.655925528105979e-06, "loss": 0.0, "step": 11816 }, { "epoch": 0.7615518463620545, "grad_norm": 1.4379237835705299, "learning_rate": 2.6552094522019335e-06, "loss": 0.0012, "step": 11817 }, { "epoch": 0.7616162918089837, "grad_norm": 0.0022575272986593542, "learning_rate": 2.6544933762978882e-06, "loss": 0.0, "step": 11818 }, { "epoch": 0.7616807372559129, "grad_norm": 0.5371539565571194, "learning_rate": 2.653777300393842e-06, "loss": 0.0042, "step": 11819 }, { "epoch": 0.7617451827028421, "grad_norm": 0.00024939560620323515, "learning_rate": 2.6530612244897964e-06, "loss": 0.0, "step": 11820 }, { "epoch": 0.7618096281497713, "grad_norm": 0.0006033900395677962, "learning_rate": 2.6523451485857503e-06, "loss": 0.0, "step": 11821 }, { "epoch": 0.7618740735967003, "grad_norm": 8.636202424418066e-05, "learning_rate": 2.6516290726817046e-06, "loss": 0.0, "step": 11822 }, { "epoch": 0.7619385190436295, "grad_norm": 0.00012699654655637103, "learning_rate": 2.650912996777659e-06, "loss": 0.0, "step": 11823 }, { "epoch": 0.7620029644905587, "grad_norm": 0.001349256260770123, "learning_rate": 2.6501969208736127e-06, "loss": 0.0, "step": 11824 }, { "epoch": 0.7620674099374879, "grad_norm": 0.00020679210237969375, "learning_rate": 2.649480844969567e-06, "loss": 0.0, "step": 11825 }, { "epoch": 0.7621318553844171, "grad_norm": 0.002953117774268297, "learning_rate": 2.648764769065521e-06, "loss": 0.0, "step": 11826 }, { "epoch": 0.7621963008313463, "grad_norm": 0.003053642823270433, "learning_rate": 2.648048693161475e-06, "loss": 0.0, "step": 11827 }, { "epoch": 0.7622607462782754, "grad_norm": 0.01158112876131613, "learning_rate": 2.6473326172574295e-06, "loss": 0.0, "step": 11828 }, { "epoch": 0.7623251917252046, "grad_norm": 0.00046034852405999084, "learning_rate": 2.6466165413533834e-06, "loss": 0.0, "step": 11829 }, { "epoch": 0.7623896371721338, "grad_norm": 0.0006405258341291647, "learning_rate": 2.645900465449338e-06, "loss": 0.0, "step": 11830 }, { "epoch": 0.762454082619063, "grad_norm": 0.0016408110038443101, "learning_rate": 2.6451843895452924e-06, "loss": 0.0, "step": 11831 }, { "epoch": 0.7625185280659922, "grad_norm": 0.001069109636870012, "learning_rate": 2.6444683136412463e-06, "loss": 0.0, "step": 11832 }, { "epoch": 0.7625829735129213, "grad_norm": 0.015143400205832954, "learning_rate": 2.6437522377372006e-06, "loss": 0.0, "step": 11833 }, { "epoch": 0.7626474189598504, "grad_norm": 0.0010125760276858443, "learning_rate": 2.6430361618331545e-06, "loss": 0.0, "step": 11834 }, { "epoch": 0.7627118644067796, "grad_norm": 0.022163290000592962, "learning_rate": 2.6423200859291087e-06, "loss": 0.0016, "step": 11835 }, { "epoch": 0.7627763098537088, "grad_norm": 0.7956127941337039, "learning_rate": 2.641604010025063e-06, "loss": 0.0027, "step": 11836 }, { "epoch": 0.762840755300638, "grad_norm": 0.16291320241804333, "learning_rate": 2.640887934121017e-06, "loss": 0.0019, "step": 11837 }, { "epoch": 0.7629052007475672, "grad_norm": 0.00014900202124249368, "learning_rate": 2.6401718582169712e-06, "loss": 0.0, "step": 11838 }, { "epoch": 0.7629696461944964, "grad_norm": 0.0017136274959623572, "learning_rate": 2.639455782312925e-06, "loss": 0.0, "step": 11839 }, { "epoch": 0.7630340916414255, "grad_norm": 0.05000114029835755, "learning_rate": 2.6387397064088794e-06, "loss": 0.0, "step": 11840 }, { "epoch": 0.7630985370883547, "grad_norm": 7.336247695839558e-05, "learning_rate": 2.6380236305048333e-06, "loss": 0.0, "step": 11841 }, { "epoch": 0.7631629825352839, "grad_norm": 0.6437919769674801, "learning_rate": 2.637307554600788e-06, "loss": 0.0017, "step": 11842 }, { "epoch": 0.7632274279822131, "grad_norm": 0.009178616250083763, "learning_rate": 2.6365914786967423e-06, "loss": 0.0, "step": 11843 }, { "epoch": 0.7632918734291422, "grad_norm": 0.0001536315240165565, "learning_rate": 2.6358754027926966e-06, "loss": 0.0, "step": 11844 }, { "epoch": 0.7633563188760714, "grad_norm": 0.04400854197836655, "learning_rate": 2.6351593268886505e-06, "loss": 0.0, "step": 11845 }, { "epoch": 0.7634207643230005, "grad_norm": 0.05531086042366422, "learning_rate": 2.6344432509846048e-06, "loss": 0.0001, "step": 11846 }, { "epoch": 0.7634852097699297, "grad_norm": 0.00016720311908218334, "learning_rate": 2.6337271750805586e-06, "loss": 0.0, "step": 11847 }, { "epoch": 0.7635496552168589, "grad_norm": 0.0006551267521307725, "learning_rate": 2.633011099176513e-06, "loss": 0.0, "step": 11848 }, { "epoch": 0.7636141006637881, "grad_norm": 6.0717022634338646e-05, "learning_rate": 2.632295023272467e-06, "loss": 0.0, "step": 11849 }, { "epoch": 0.7636785461107173, "grad_norm": 0.010353891275486336, "learning_rate": 2.631578947368421e-06, "loss": 0.0, "step": 11850 }, { "epoch": 0.7637429915576465, "grad_norm": 0.0015369640711080093, "learning_rate": 2.6308628714643754e-06, "loss": 0.0, "step": 11851 }, { "epoch": 0.7638074370045757, "grad_norm": 0.0003941849795398379, "learning_rate": 2.6301467955603293e-06, "loss": 0.0, "step": 11852 }, { "epoch": 0.7638718824515048, "grad_norm": 0.0003463927405450879, "learning_rate": 2.629430719656284e-06, "loss": 0.0, "step": 11853 }, { "epoch": 0.763936327898434, "grad_norm": 0.0008114464169782369, "learning_rate": 2.6287146437522383e-06, "loss": 0.0, "step": 11854 }, { "epoch": 0.7640007733453632, "grad_norm": 6.755891019105164e-05, "learning_rate": 2.627998567848192e-06, "loss": 0.0, "step": 11855 }, { "epoch": 0.7640652187922923, "grad_norm": 0.0011958914319152606, "learning_rate": 2.6272824919441465e-06, "loss": 0.0, "step": 11856 }, { "epoch": 0.7641296642392215, "grad_norm": 0.0038164852243763306, "learning_rate": 2.6265664160401004e-06, "loss": 0.0, "step": 11857 }, { "epoch": 0.7641941096861506, "grad_norm": 0.00030386703955184003, "learning_rate": 2.6258503401360547e-06, "loss": 0.0, "step": 11858 }, { "epoch": 0.7642585551330798, "grad_norm": 0.00043240885358736416, "learning_rate": 2.625134264232009e-06, "loss": 0.0, "step": 11859 }, { "epoch": 0.764323000580009, "grad_norm": 0.002957734448481022, "learning_rate": 2.624418188327963e-06, "loss": 0.0, "step": 11860 }, { "epoch": 0.7643874460269382, "grad_norm": 0.0002885876430917981, "learning_rate": 2.623702112423917e-06, "loss": 0.0, "step": 11861 }, { "epoch": 0.7644518914738674, "grad_norm": 0.005890731307086754, "learning_rate": 2.622986036519871e-06, "loss": 0.0, "step": 11862 }, { "epoch": 0.7645163369207966, "grad_norm": 6.651252772496995e-05, "learning_rate": 2.6222699606158253e-06, "loss": 0.0, "step": 11863 }, { "epoch": 0.7645807823677258, "grad_norm": 0.09255987482417974, "learning_rate": 2.6215538847117796e-06, "loss": 0.0017, "step": 11864 }, { "epoch": 0.7646452278146549, "grad_norm": 0.0030729239305532265, "learning_rate": 2.620837808807734e-06, "loss": 0.0, "step": 11865 }, { "epoch": 0.7647096732615841, "grad_norm": 0.0004729470478401344, "learning_rate": 2.620121732903688e-06, "loss": 0.0, "step": 11866 }, { "epoch": 0.7647741187085132, "grad_norm": 0.00031197574419735606, "learning_rate": 2.6194056569996425e-06, "loss": 0.0, "step": 11867 }, { "epoch": 0.7648385641554424, "grad_norm": 0.554805689295591, "learning_rate": 2.6186895810955964e-06, "loss": 0.0024, "step": 11868 }, { "epoch": 0.7649030096023716, "grad_norm": 0.0005885395825914211, "learning_rate": 2.6179735051915507e-06, "loss": 0.0, "step": 11869 }, { "epoch": 0.7649674550493007, "grad_norm": 0.0012661613214064538, "learning_rate": 2.6172574292875045e-06, "loss": 0.0, "step": 11870 }, { "epoch": 0.7650319004962299, "grad_norm": 8.82429252709678e-05, "learning_rate": 2.616541353383459e-06, "loss": 0.0, "step": 11871 }, { "epoch": 0.7650963459431591, "grad_norm": 0.1790998115578762, "learning_rate": 2.615825277479413e-06, "loss": 0.0002, "step": 11872 }, { "epoch": 0.7651607913900883, "grad_norm": 0.08301635025098202, "learning_rate": 2.615109201575367e-06, "loss": 0.0006, "step": 11873 }, { "epoch": 0.7652252368370175, "grad_norm": 0.0008072109844447568, "learning_rate": 2.6143931256713213e-06, "loss": 0.0, "step": 11874 }, { "epoch": 0.7652896822839467, "grad_norm": 5.068903277494801e-05, "learning_rate": 2.613677049767275e-06, "loss": 0.0, "step": 11875 }, { "epoch": 0.7653541277308759, "grad_norm": 0.003124247831575227, "learning_rate": 2.6129609738632295e-06, "loss": 0.0, "step": 11876 }, { "epoch": 0.765418573177805, "grad_norm": 0.00019207895262184278, "learning_rate": 2.6122448979591842e-06, "loss": 0.0, "step": 11877 }, { "epoch": 0.7654830186247341, "grad_norm": 0.2378545285856134, "learning_rate": 2.611528822055138e-06, "loss": 0.0004, "step": 11878 }, { "epoch": 0.7655474640716633, "grad_norm": 0.008514530768873717, "learning_rate": 2.6108127461510924e-06, "loss": 0.0, "step": 11879 }, { "epoch": 0.7656119095185925, "grad_norm": 0.00046250907666416433, "learning_rate": 2.6100966702470467e-06, "loss": 0.0, "step": 11880 }, { "epoch": 0.7656763549655217, "grad_norm": 0.13777567500132695, "learning_rate": 2.6093805943430006e-06, "loss": 0.0023, "step": 11881 }, { "epoch": 0.7657408004124509, "grad_norm": 0.0003629261576732833, "learning_rate": 2.608664518438955e-06, "loss": 0.0, "step": 11882 }, { "epoch": 0.76580524585938, "grad_norm": 0.0009412332498742197, "learning_rate": 2.6079484425349087e-06, "loss": 0.0, "step": 11883 }, { "epoch": 0.7658696913063092, "grad_norm": 0.00012120714432166618, "learning_rate": 2.607232366630863e-06, "loss": 0.0, "step": 11884 }, { "epoch": 0.7659341367532384, "grad_norm": 0.0012524471309115777, "learning_rate": 2.606516290726817e-06, "loss": 0.0, "step": 11885 }, { "epoch": 0.7659985822001676, "grad_norm": 0.0004439158991068107, "learning_rate": 2.605800214822771e-06, "loss": 0.0, "step": 11886 }, { "epoch": 0.7660630276470968, "grad_norm": 0.014125048239822585, "learning_rate": 2.6050841389187255e-06, "loss": 0.0, "step": 11887 }, { "epoch": 0.766127473094026, "grad_norm": 0.01923080340759084, "learning_rate": 2.6043680630146802e-06, "loss": 0.0, "step": 11888 }, { "epoch": 0.766191918540955, "grad_norm": 0.00011607705395213511, "learning_rate": 2.603651987110634e-06, "loss": 0.0, "step": 11889 }, { "epoch": 0.7662563639878842, "grad_norm": 0.0006579140278466091, "learning_rate": 2.6029359112065884e-06, "loss": 0.0, "step": 11890 }, { "epoch": 0.7663208094348134, "grad_norm": 0.0002124806354313648, "learning_rate": 2.6022198353025423e-06, "loss": 0.0, "step": 11891 }, { "epoch": 0.7663852548817426, "grad_norm": 0.0013674333354162045, "learning_rate": 2.6015037593984966e-06, "loss": 0.0, "step": 11892 }, { "epoch": 0.7664497003286718, "grad_norm": 0.014675526109410085, "learning_rate": 2.6007876834944505e-06, "loss": 0.0001, "step": 11893 }, { "epoch": 0.766514145775601, "grad_norm": 0.03573676645358599, "learning_rate": 2.6000716075904047e-06, "loss": 0.0001, "step": 11894 }, { "epoch": 0.7665785912225301, "grad_norm": 0.0008220368009597546, "learning_rate": 2.599355531686359e-06, "loss": 0.0, "step": 11895 }, { "epoch": 0.7666430366694593, "grad_norm": 0.021377558030884054, "learning_rate": 2.598639455782313e-06, "loss": 0.0, "step": 11896 }, { "epoch": 0.7667074821163885, "grad_norm": 0.01966843926829134, "learning_rate": 2.5979233798782672e-06, "loss": 0.0001, "step": 11897 }, { "epoch": 0.7667719275633177, "grad_norm": 0.00019038251652734773, "learning_rate": 2.597207303974221e-06, "loss": 0.0, "step": 11898 }, { "epoch": 0.7668363730102469, "grad_norm": 0.002648309532818556, "learning_rate": 2.5964912280701754e-06, "loss": 0.0, "step": 11899 }, { "epoch": 0.766900818457176, "grad_norm": 0.0001730522603491945, "learning_rate": 2.59577515216613e-06, "loss": 0.0, "step": 11900 }, { "epoch": 0.7669652639041051, "grad_norm": 1.6432124179162014e-05, "learning_rate": 2.595059076262084e-06, "loss": 0.0, "step": 11901 }, { "epoch": 0.7670297093510343, "grad_norm": 1.7618660333607035e-05, "learning_rate": 2.5943430003580383e-06, "loss": 0.0, "step": 11902 }, { "epoch": 0.7670941547979635, "grad_norm": 0.03666116270808565, "learning_rate": 2.5936269244539926e-06, "loss": 0.0, "step": 11903 }, { "epoch": 0.7671586002448927, "grad_norm": 0.11028554089182246, "learning_rate": 2.5929108485499465e-06, "loss": 0.0002, "step": 11904 }, { "epoch": 0.7672230456918219, "grad_norm": 0.22051480791536138, "learning_rate": 2.5921947726459008e-06, "loss": 0.0013, "step": 11905 }, { "epoch": 0.767287491138751, "grad_norm": 0.006515029704666616, "learning_rate": 2.5914786967418546e-06, "loss": 0.0, "step": 11906 }, { "epoch": 0.7673519365856802, "grad_norm": 0.006650815831030154, "learning_rate": 2.590762620837809e-06, "loss": 0.0, "step": 11907 }, { "epoch": 0.7674163820326094, "grad_norm": 0.0051438193964023025, "learning_rate": 2.5900465449337632e-06, "loss": 0.0, "step": 11908 }, { "epoch": 0.7674808274795386, "grad_norm": 0.4898605593241484, "learning_rate": 2.589330469029717e-06, "loss": 0.0023, "step": 11909 }, { "epoch": 0.7675452729264678, "grad_norm": 0.0006288678826158701, "learning_rate": 2.5886143931256714e-06, "loss": 0.0, "step": 11910 }, { "epoch": 0.7676097183733969, "grad_norm": 0.003386373418255314, "learning_rate": 2.5878983172216253e-06, "loss": 0.0, "step": 11911 }, { "epoch": 0.767674163820326, "grad_norm": 0.002668168384645223, "learning_rate": 2.58718224131758e-06, "loss": 0.0, "step": 11912 }, { "epoch": 0.7677386092672552, "grad_norm": 0.001082914875932513, "learning_rate": 2.5864661654135343e-06, "loss": 0.0, "step": 11913 }, { "epoch": 0.7678030547141844, "grad_norm": 0.06223376012771815, "learning_rate": 2.585750089509488e-06, "loss": 0.0003, "step": 11914 }, { "epoch": 0.7678675001611136, "grad_norm": 0.31937400054402, "learning_rate": 2.5850340136054425e-06, "loss": 0.0019, "step": 11915 }, { "epoch": 0.7679319456080428, "grad_norm": 0.018148016411417802, "learning_rate": 2.5843179377013968e-06, "loss": 0.0001, "step": 11916 }, { "epoch": 0.767996391054972, "grad_norm": 0.0038383521751228934, "learning_rate": 2.5836018617973507e-06, "loss": 0.0, "step": 11917 }, { "epoch": 0.7680608365019012, "grad_norm": 0.1600973730490817, "learning_rate": 2.582885785893305e-06, "loss": 0.0012, "step": 11918 }, { "epoch": 0.7681252819488303, "grad_norm": 0.0460683108904585, "learning_rate": 2.582169709989259e-06, "loss": 0.0001, "step": 11919 }, { "epoch": 0.7681897273957595, "grad_norm": 0.0016058862460527552, "learning_rate": 2.581453634085213e-06, "loss": 0.0, "step": 11920 }, { "epoch": 0.7682541728426887, "grad_norm": 0.002115022812249171, "learning_rate": 2.5807375581811674e-06, "loss": 0.0, "step": 11921 }, { "epoch": 0.7683186182896179, "grad_norm": 0.004045692377456239, "learning_rate": 2.5800214822771213e-06, "loss": 0.0, "step": 11922 }, { "epoch": 0.768383063736547, "grad_norm": 0.0005268503339750655, "learning_rate": 2.579305406373076e-06, "loss": 0.0, "step": 11923 }, { "epoch": 0.7684475091834762, "grad_norm": 0.0009562755514328496, "learning_rate": 2.5785893304690303e-06, "loss": 0.0, "step": 11924 }, { "epoch": 0.7685119546304053, "grad_norm": 0.21633146350250407, "learning_rate": 2.577873254564984e-06, "loss": 0.0003, "step": 11925 }, { "epoch": 0.7685764000773345, "grad_norm": 0.0018381836193263551, "learning_rate": 2.5771571786609385e-06, "loss": 0.0, "step": 11926 }, { "epoch": 0.7686408455242637, "grad_norm": 0.007529660661730795, "learning_rate": 2.5764411027568924e-06, "loss": 0.0001, "step": 11927 }, { "epoch": 0.7687052909711929, "grad_norm": 0.0025654502952239984, "learning_rate": 2.5757250268528467e-06, "loss": 0.0, "step": 11928 }, { "epoch": 0.7687697364181221, "grad_norm": 0.0316317751952979, "learning_rate": 2.575008950948801e-06, "loss": 0.0002, "step": 11929 }, { "epoch": 0.7688341818650513, "grad_norm": 0.1274536097738183, "learning_rate": 2.574292875044755e-06, "loss": 0.0004, "step": 11930 }, { "epoch": 0.7688986273119804, "grad_norm": 0.0013855358441066388, "learning_rate": 2.573576799140709e-06, "loss": 0.0, "step": 11931 }, { "epoch": 0.7689630727589096, "grad_norm": 0.03422381413667952, "learning_rate": 2.572860723236663e-06, "loss": 0.0001, "step": 11932 }, { "epoch": 0.7690275182058388, "grad_norm": 0.00036604118401955133, "learning_rate": 2.5721446473326173e-06, "loss": 0.0, "step": 11933 }, { "epoch": 0.7690919636527679, "grad_norm": 0.007119691489192297, "learning_rate": 2.571428571428571e-06, "loss": 0.0, "step": 11934 }, { "epoch": 0.7691564090996971, "grad_norm": 0.0007796408203194449, "learning_rate": 2.570712495524526e-06, "loss": 0.0, "step": 11935 }, { "epoch": 0.7692208545466263, "grad_norm": 0.0046414742114352535, "learning_rate": 2.5699964196204802e-06, "loss": 0.0, "step": 11936 }, { "epoch": 0.7692852999935554, "grad_norm": 0.0026985613912551645, "learning_rate": 2.5692803437164345e-06, "loss": 0.0, "step": 11937 }, { "epoch": 0.7693497454404846, "grad_norm": 0.015896688905246856, "learning_rate": 2.5685642678123884e-06, "loss": 0.0001, "step": 11938 }, { "epoch": 0.7694141908874138, "grad_norm": 0.002225209735481927, "learning_rate": 2.5678481919083427e-06, "loss": 0.0, "step": 11939 }, { "epoch": 0.769478636334343, "grad_norm": 0.000544120900602758, "learning_rate": 2.5671321160042966e-06, "loss": 0.0, "step": 11940 }, { "epoch": 0.7695430817812722, "grad_norm": 0.000987951875979495, "learning_rate": 2.566416040100251e-06, "loss": 0.0, "step": 11941 }, { "epoch": 0.7696075272282014, "grad_norm": 0.2139565355227848, "learning_rate": 2.5656999641962047e-06, "loss": 0.0011, "step": 11942 }, { "epoch": 0.7696719726751305, "grad_norm": 0.0014181229277454124, "learning_rate": 2.564983888292159e-06, "loss": 0.0, "step": 11943 }, { "epoch": 0.7697364181220597, "grad_norm": 0.0011511183313510068, "learning_rate": 2.5642678123881133e-06, "loss": 0.0, "step": 11944 }, { "epoch": 0.7698008635689888, "grad_norm": 0.021332500647752366, "learning_rate": 2.563551736484067e-06, "loss": 0.0, "step": 11945 }, { "epoch": 0.769865309015918, "grad_norm": 0.0006443705067154068, "learning_rate": 2.5628356605800215e-06, "loss": 0.0, "step": 11946 }, { "epoch": 0.7699297544628472, "grad_norm": 0.018157607163298645, "learning_rate": 2.5621195846759762e-06, "loss": 0.0, "step": 11947 }, { "epoch": 0.7699941999097764, "grad_norm": 0.0006373038105464806, "learning_rate": 2.56140350877193e-06, "loss": 0.0, "step": 11948 }, { "epoch": 0.7700586453567055, "grad_norm": 0.1702311649693865, "learning_rate": 2.5606874328678844e-06, "loss": 0.0007, "step": 11949 }, { "epoch": 0.7701230908036347, "grad_norm": 0.006839913357284363, "learning_rate": 2.5599713569638383e-06, "loss": 0.0, "step": 11950 }, { "epoch": 0.7701875362505639, "grad_norm": 0.020449848798393098, "learning_rate": 2.5592552810597926e-06, "loss": 0.0, "step": 11951 }, { "epoch": 0.7702519816974931, "grad_norm": 0.0186318491828164, "learning_rate": 2.558539205155747e-06, "loss": 0.0, "step": 11952 }, { "epoch": 0.7703164271444223, "grad_norm": 0.012205018831311388, "learning_rate": 2.5578231292517007e-06, "loss": 0.0, "step": 11953 }, { "epoch": 0.7703808725913515, "grad_norm": 0.014573322368295935, "learning_rate": 2.557107053347655e-06, "loss": 0.0001, "step": 11954 }, { "epoch": 0.7704453180382806, "grad_norm": 0.3025764545657545, "learning_rate": 2.556390977443609e-06, "loss": 0.0009, "step": 11955 }, { "epoch": 0.7705097634852097, "grad_norm": 0.003638116650934389, "learning_rate": 2.5556749015395632e-06, "loss": 0.0, "step": 11956 }, { "epoch": 0.7705742089321389, "grad_norm": 0.005688319311506136, "learning_rate": 2.5549588256355175e-06, "loss": 0.0, "step": 11957 }, { "epoch": 0.7706386543790681, "grad_norm": 0.00278424510278735, "learning_rate": 2.554242749731472e-06, "loss": 0.0, "step": 11958 }, { "epoch": 0.7707030998259973, "grad_norm": 0.01585624886430006, "learning_rate": 2.553526673827426e-06, "loss": 0.0, "step": 11959 }, { "epoch": 0.7707675452729265, "grad_norm": 0.0003486352458013042, "learning_rate": 2.5528105979233804e-06, "loss": 0.0, "step": 11960 }, { "epoch": 0.7708319907198556, "grad_norm": 0.013720875341040538, "learning_rate": 2.5520945220193343e-06, "loss": 0.0, "step": 11961 }, { "epoch": 0.7708964361667848, "grad_norm": 0.0011977463656281952, "learning_rate": 2.5513784461152886e-06, "loss": 0.0, "step": 11962 }, { "epoch": 0.770960881613714, "grad_norm": 0.0006557547530678418, "learning_rate": 2.5506623702112425e-06, "loss": 0.0, "step": 11963 }, { "epoch": 0.7710253270606432, "grad_norm": 7.640697653079052e-05, "learning_rate": 2.5499462943071968e-06, "loss": 0.0, "step": 11964 }, { "epoch": 0.7710897725075724, "grad_norm": 0.0005535518755850447, "learning_rate": 2.549230218403151e-06, "loss": 0.0, "step": 11965 }, { "epoch": 0.7711542179545016, "grad_norm": 0.014170017898537866, "learning_rate": 2.548514142499105e-06, "loss": 0.0, "step": 11966 }, { "epoch": 0.7712186634014306, "grad_norm": 0.025082221368806706, "learning_rate": 2.5477980665950592e-06, "loss": 0.0, "step": 11967 }, { "epoch": 0.7712831088483598, "grad_norm": 0.06604501701956178, "learning_rate": 2.547081990691013e-06, "loss": 0.0001, "step": 11968 }, { "epoch": 0.771347554295289, "grad_norm": 0.5134007423695695, "learning_rate": 2.5463659147869674e-06, "loss": 0.0026, "step": 11969 }, { "epoch": 0.7714119997422182, "grad_norm": 9.906826757261537e-05, "learning_rate": 2.545649838882922e-06, "loss": 0.0, "step": 11970 }, { "epoch": 0.7714764451891474, "grad_norm": 0.0019939027732718065, "learning_rate": 2.544933762978876e-06, "loss": 0.0, "step": 11971 }, { "epoch": 0.7715408906360766, "grad_norm": 0.0009953462564640401, "learning_rate": 2.5442176870748303e-06, "loss": 0.0, "step": 11972 }, { "epoch": 0.7716053360830057, "grad_norm": 0.00022765940038362478, "learning_rate": 2.5435016111707846e-06, "loss": 0.0, "step": 11973 }, { "epoch": 0.7716697815299349, "grad_norm": 0.0034598503765954076, "learning_rate": 2.5427855352667385e-06, "loss": 0.0, "step": 11974 }, { "epoch": 0.7717342269768641, "grad_norm": 0.0015874933110641958, "learning_rate": 2.5420694593626928e-06, "loss": 0.0, "step": 11975 }, { "epoch": 0.7717986724237933, "grad_norm": 0.19490331702864275, "learning_rate": 2.5413533834586467e-06, "loss": 0.0002, "step": 11976 }, { "epoch": 0.7718631178707225, "grad_norm": 0.046350962259317584, "learning_rate": 2.540637307554601e-06, "loss": 0.0002, "step": 11977 }, { "epoch": 0.7719275633176516, "grad_norm": 0.0029893966150884525, "learning_rate": 2.5399212316505553e-06, "loss": 0.0, "step": 11978 }, { "epoch": 0.7719920087645807, "grad_norm": 0.006386238741863867, "learning_rate": 2.539205155746509e-06, "loss": 0.0, "step": 11979 }, { "epoch": 0.7720564542115099, "grad_norm": 0.08005569888313953, "learning_rate": 2.5384890798424634e-06, "loss": 0.0001, "step": 11980 }, { "epoch": 0.7721208996584391, "grad_norm": 0.00017401220544152146, "learning_rate": 2.5377730039384173e-06, "loss": 0.0, "step": 11981 }, { "epoch": 0.7721853451053683, "grad_norm": 0.00920950918241963, "learning_rate": 2.537056928034372e-06, "loss": 0.0001, "step": 11982 }, { "epoch": 0.7722497905522975, "grad_norm": 4.005209131921557e-05, "learning_rate": 2.5363408521303263e-06, "loss": 0.0, "step": 11983 }, { "epoch": 0.7723142359992267, "grad_norm": 0.0041205538081291486, "learning_rate": 2.53562477622628e-06, "loss": 0.0, "step": 11984 }, { "epoch": 0.7723786814461558, "grad_norm": 0.21781045231342352, "learning_rate": 2.5349087003222345e-06, "loss": 0.0006, "step": 11985 }, { "epoch": 0.772443126893085, "grad_norm": 0.4514950507189504, "learning_rate": 2.534192624418189e-06, "loss": 0.0019, "step": 11986 }, { "epoch": 0.7725075723400142, "grad_norm": 0.0022551838848633467, "learning_rate": 2.5334765485141427e-06, "loss": 0.0, "step": 11987 }, { "epoch": 0.7725720177869434, "grad_norm": 0.03850665996937269, "learning_rate": 2.532760472610097e-06, "loss": 0.0, "step": 11988 }, { "epoch": 0.7726364632338725, "grad_norm": 0.003876283490998469, "learning_rate": 2.532044396706051e-06, "loss": 0.0, "step": 11989 }, { "epoch": 0.7727009086808017, "grad_norm": 0.001072320260391634, "learning_rate": 2.531328320802005e-06, "loss": 0.0, "step": 11990 }, { "epoch": 0.7727653541277308, "grad_norm": 0.0027809967217183487, "learning_rate": 2.530612244897959e-06, "loss": 0.0, "step": 11991 }, { "epoch": 0.77282979957466, "grad_norm": 8.871064129087865e-05, "learning_rate": 2.5298961689939133e-06, "loss": 0.0, "step": 11992 }, { "epoch": 0.7728942450215892, "grad_norm": 0.004071476358003262, "learning_rate": 2.529180093089868e-06, "loss": 0.0, "step": 11993 }, { "epoch": 0.7729586904685184, "grad_norm": 0.0002506879102149424, "learning_rate": 2.5284640171858223e-06, "loss": 0.0, "step": 11994 }, { "epoch": 0.7730231359154476, "grad_norm": 0.0063177136721583385, "learning_rate": 2.5277479412817762e-06, "loss": 0.0, "step": 11995 }, { "epoch": 0.7730875813623768, "grad_norm": 0.00446458534063843, "learning_rate": 2.5270318653777305e-06, "loss": 0.0, "step": 11996 }, { "epoch": 0.773152026809306, "grad_norm": 0.0008947799086842924, "learning_rate": 2.5263157894736844e-06, "loss": 0.0, "step": 11997 }, { "epoch": 0.7732164722562351, "grad_norm": 0.0004906066965026694, "learning_rate": 2.5255997135696387e-06, "loss": 0.0, "step": 11998 }, { "epoch": 0.7732809177031643, "grad_norm": 0.0006771847353210726, "learning_rate": 2.5248836376655926e-06, "loss": 0.0, "step": 11999 }, { "epoch": 0.7733453631500935, "grad_norm": 0.03204525715449161, "learning_rate": 2.524167561761547e-06, "loss": 0.0, "step": 12000 }, { "epoch": 0.7734098085970226, "grad_norm": 0.17269605440999874, "learning_rate": 2.523451485857501e-06, "loss": 0.0004, "step": 12001 }, { "epoch": 0.7734742540439518, "grad_norm": 0.0002331402822554568, "learning_rate": 2.522735409953455e-06, "loss": 0.0, "step": 12002 }, { "epoch": 0.7735386994908809, "grad_norm": 0.0018706894203856107, "learning_rate": 2.5220193340494093e-06, "loss": 0.0, "step": 12003 }, { "epoch": 0.7736031449378101, "grad_norm": 0.23577475587510444, "learning_rate": 2.521303258145363e-06, "loss": 0.003, "step": 12004 }, { "epoch": 0.7736675903847393, "grad_norm": 0.09998982366549632, "learning_rate": 2.520587182241318e-06, "loss": 0.0002, "step": 12005 }, { "epoch": 0.7737320358316685, "grad_norm": 0.00016423824356981455, "learning_rate": 2.5198711063372722e-06, "loss": 0.0, "step": 12006 }, { "epoch": 0.7737964812785977, "grad_norm": 0.03779216557272751, "learning_rate": 2.519155030433226e-06, "loss": 0.0, "step": 12007 }, { "epoch": 0.7738609267255269, "grad_norm": 0.0012791941326287728, "learning_rate": 2.5184389545291804e-06, "loss": 0.0, "step": 12008 }, { "epoch": 0.773925372172456, "grad_norm": 0.012356869073728675, "learning_rate": 2.5177228786251347e-06, "loss": 0.0, "step": 12009 }, { "epoch": 0.7739898176193852, "grad_norm": 0.4994760987231003, "learning_rate": 2.5170068027210886e-06, "loss": 0.0029, "step": 12010 }, { "epoch": 0.7740542630663144, "grad_norm": 0.06908226621424454, "learning_rate": 2.516290726817043e-06, "loss": 0.0016, "step": 12011 }, { "epoch": 0.7741187085132435, "grad_norm": 0.28788372637119103, "learning_rate": 2.5155746509129967e-06, "loss": 0.0004, "step": 12012 }, { "epoch": 0.7741831539601727, "grad_norm": 0.0007493520973933247, "learning_rate": 2.514858575008951e-06, "loss": 0.0, "step": 12013 }, { "epoch": 0.7742475994071019, "grad_norm": 0.2166681766839472, "learning_rate": 2.5141424991049053e-06, "loss": 0.0003, "step": 12014 }, { "epoch": 0.774312044854031, "grad_norm": 0.01656720079967917, "learning_rate": 2.5134264232008592e-06, "loss": 0.0, "step": 12015 }, { "epoch": 0.7743764903009602, "grad_norm": 0.028964880535246422, "learning_rate": 2.5127103472968135e-06, "loss": 0.0, "step": 12016 }, { "epoch": 0.7744409357478894, "grad_norm": 0.01668855371166636, "learning_rate": 2.5119942713927682e-06, "loss": 0.0, "step": 12017 }, { "epoch": 0.7745053811948186, "grad_norm": 0.00826921975490535, "learning_rate": 2.511278195488722e-06, "loss": 0.0, "step": 12018 }, { "epoch": 0.7745698266417478, "grad_norm": 0.0019967600267275804, "learning_rate": 2.5105621195846764e-06, "loss": 0.0, "step": 12019 }, { "epoch": 0.774634272088677, "grad_norm": 0.1981025072169954, "learning_rate": 2.5098460436806303e-06, "loss": 0.001, "step": 12020 }, { "epoch": 0.7746987175356062, "grad_norm": 0.44295811245102745, "learning_rate": 2.5091299677765846e-06, "loss": 0.0034, "step": 12021 }, { "epoch": 0.7747631629825353, "grad_norm": 0.0007132173739187416, "learning_rate": 2.508413891872539e-06, "loss": 0.0, "step": 12022 }, { "epoch": 0.7748276084294644, "grad_norm": 0.0025241999101835977, "learning_rate": 2.5076978159684928e-06, "loss": 0.0, "step": 12023 }, { "epoch": 0.7748920538763936, "grad_norm": 0.00020836271478278545, "learning_rate": 2.506981740064447e-06, "loss": 0.0, "step": 12024 }, { "epoch": 0.7749564993233228, "grad_norm": 0.004587700629129224, "learning_rate": 2.506265664160401e-06, "loss": 0.0, "step": 12025 }, { "epoch": 0.775020944770252, "grad_norm": 0.017800457394732166, "learning_rate": 2.5055495882563552e-06, "loss": 0.0, "step": 12026 }, { "epoch": 0.7750853902171811, "grad_norm": 0.005903831600595229, "learning_rate": 2.504833512352309e-06, "loss": 0.0, "step": 12027 }, { "epoch": 0.7751498356641103, "grad_norm": 0.0019456858051113458, "learning_rate": 2.504117436448264e-06, "loss": 0.0, "step": 12028 }, { "epoch": 0.7752142811110395, "grad_norm": 0.008377258458590922, "learning_rate": 2.503401360544218e-06, "loss": 0.0, "step": 12029 }, { "epoch": 0.7752787265579687, "grad_norm": 0.01561660979057207, "learning_rate": 2.5026852846401724e-06, "loss": 0.0, "step": 12030 }, { "epoch": 0.7753431720048979, "grad_norm": 0.02577598930821863, "learning_rate": 2.5019692087361263e-06, "loss": 0.0, "step": 12031 }, { "epoch": 0.7754076174518271, "grad_norm": 0.0010712492468354091, "learning_rate": 2.5012531328320806e-06, "loss": 0.0, "step": 12032 }, { "epoch": 0.7754720628987563, "grad_norm": 0.0944316984142256, "learning_rate": 2.5005370569280345e-06, "loss": 0.0001, "step": 12033 }, { "epoch": 0.7755365083456853, "grad_norm": 0.0232625944343488, "learning_rate": 2.4998209810239888e-06, "loss": 0.0003, "step": 12034 }, { "epoch": 0.7756009537926145, "grad_norm": 1.4485049070539173, "learning_rate": 2.4991049051199427e-06, "loss": 0.0025, "step": 12035 }, { "epoch": 0.7756653992395437, "grad_norm": 0.0016307701840922474, "learning_rate": 2.498388829215897e-06, "loss": 0.0, "step": 12036 }, { "epoch": 0.7757298446864729, "grad_norm": 0.02336175794591136, "learning_rate": 2.4976727533118513e-06, "loss": 0.0001, "step": 12037 }, { "epoch": 0.7757942901334021, "grad_norm": 0.16227938458248187, "learning_rate": 2.4969566774078056e-06, "loss": 0.0022, "step": 12038 }, { "epoch": 0.7758587355803312, "grad_norm": 0.07302846880666206, "learning_rate": 2.4962406015037594e-06, "loss": 0.0004, "step": 12039 }, { "epoch": 0.7759231810272604, "grad_norm": 0.00041019317192437306, "learning_rate": 2.4955245255997137e-06, "loss": 0.0, "step": 12040 }, { "epoch": 0.7759876264741896, "grad_norm": 0.002584801277987377, "learning_rate": 2.494808449695668e-06, "loss": 0.0, "step": 12041 }, { "epoch": 0.7760520719211188, "grad_norm": 0.002223323087132319, "learning_rate": 2.494092373791622e-06, "loss": 0.0, "step": 12042 }, { "epoch": 0.776116517368048, "grad_norm": 0.01440653692624194, "learning_rate": 2.493376297887576e-06, "loss": 0.0, "step": 12043 }, { "epoch": 0.7761809628149772, "grad_norm": 0.053300064123326024, "learning_rate": 2.4926602219835305e-06, "loss": 0.0001, "step": 12044 }, { "epoch": 0.7762454082619062, "grad_norm": 0.20566420595865056, "learning_rate": 2.491944146079485e-06, "loss": 0.0023, "step": 12045 }, { "epoch": 0.7763098537088354, "grad_norm": 0.0007623805293543374, "learning_rate": 2.4912280701754387e-06, "loss": 0.0, "step": 12046 }, { "epoch": 0.7763742991557646, "grad_norm": 0.0014681434347622946, "learning_rate": 2.490511994271393e-06, "loss": 0.0, "step": 12047 }, { "epoch": 0.7764387446026938, "grad_norm": 0.00043488636991413735, "learning_rate": 2.489795918367347e-06, "loss": 0.0, "step": 12048 }, { "epoch": 0.776503190049623, "grad_norm": 0.0017831550431723615, "learning_rate": 2.4890798424633016e-06, "loss": 0.0, "step": 12049 }, { "epoch": 0.7765676354965522, "grad_norm": 0.0018114956405963806, "learning_rate": 2.4883637665592554e-06, "loss": 0.0, "step": 12050 }, { "epoch": 0.7766320809434814, "grad_norm": 0.18950622836851014, "learning_rate": 2.4876476906552097e-06, "loss": 0.0002, "step": 12051 }, { "epoch": 0.7766965263904105, "grad_norm": 0.00418238718089347, "learning_rate": 2.4869316147511636e-06, "loss": 0.0, "step": 12052 }, { "epoch": 0.7767609718373397, "grad_norm": 0.0004478533245452791, "learning_rate": 2.486215538847118e-06, "loss": 0.0, "step": 12053 }, { "epoch": 0.7768254172842689, "grad_norm": 0.00024396889565855113, "learning_rate": 2.4854994629430722e-06, "loss": 0.0, "step": 12054 }, { "epoch": 0.7768898627311981, "grad_norm": 0.2903369953181515, "learning_rate": 2.4847833870390265e-06, "loss": 0.0008, "step": 12055 }, { "epoch": 0.7769543081781272, "grad_norm": 0.00020092059146728687, "learning_rate": 2.4840673111349804e-06, "loss": 0.0, "step": 12056 }, { "epoch": 0.7770187536250563, "grad_norm": 0.0011666064461578786, "learning_rate": 2.4833512352309347e-06, "loss": 0.0, "step": 12057 }, { "epoch": 0.7770831990719855, "grad_norm": 0.001190503348124305, "learning_rate": 2.482635159326889e-06, "loss": 0.0, "step": 12058 }, { "epoch": 0.7771476445189147, "grad_norm": 0.3657997162209834, "learning_rate": 2.481919083422843e-06, "loss": 0.0032, "step": 12059 }, { "epoch": 0.7772120899658439, "grad_norm": 0.001634408520878064, "learning_rate": 2.481203007518797e-06, "loss": 0.0, "step": 12060 }, { "epoch": 0.7772765354127731, "grad_norm": 0.1469337056910139, "learning_rate": 2.4804869316147515e-06, "loss": 0.0017, "step": 12061 }, { "epoch": 0.7773409808597023, "grad_norm": 0.06710843879393069, "learning_rate": 2.4797708557107058e-06, "loss": 0.0002, "step": 12062 }, { "epoch": 0.7774054263066315, "grad_norm": 0.0017892333181064397, "learning_rate": 2.4790547798066596e-06, "loss": 0.0, "step": 12063 }, { "epoch": 0.7774698717535606, "grad_norm": 0.004690414808615758, "learning_rate": 2.478338703902614e-06, "loss": 0.0, "step": 12064 }, { "epoch": 0.7775343172004898, "grad_norm": 0.004454153225457359, "learning_rate": 2.477622627998568e-06, "loss": 0.0, "step": 12065 }, { "epoch": 0.777598762647419, "grad_norm": 0.31551044717576315, "learning_rate": 2.4769065520945225e-06, "loss": 0.0011, "step": 12066 }, { "epoch": 0.7776632080943481, "grad_norm": 0.0014824010634681267, "learning_rate": 2.4761904761904764e-06, "loss": 0.0, "step": 12067 }, { "epoch": 0.7777276535412773, "grad_norm": 0.005493271680943317, "learning_rate": 2.4754744002864307e-06, "loss": 0.0, "step": 12068 }, { "epoch": 0.7777920989882064, "grad_norm": 0.002344001587170874, "learning_rate": 2.4747583243823846e-06, "loss": 0.0, "step": 12069 }, { "epoch": 0.7778565444351356, "grad_norm": 0.00015707887304980444, "learning_rate": 2.474042248478339e-06, "loss": 0.0, "step": 12070 }, { "epoch": 0.7779209898820648, "grad_norm": 0.0006823565350090584, "learning_rate": 2.473326172574293e-06, "loss": 0.0, "step": 12071 }, { "epoch": 0.777985435328994, "grad_norm": 0.0017209300771120863, "learning_rate": 2.4726100966702475e-06, "loss": 0.0, "step": 12072 }, { "epoch": 0.7780498807759232, "grad_norm": 0.2138218417932704, "learning_rate": 2.4718940207662013e-06, "loss": 0.0006, "step": 12073 }, { "epoch": 0.7781143262228524, "grad_norm": 0.0065011778796621855, "learning_rate": 2.4711779448621556e-06, "loss": 0.0, "step": 12074 }, { "epoch": 0.7781787716697816, "grad_norm": 0.0003251508710584543, "learning_rate": 2.47046186895811e-06, "loss": 0.0, "step": 12075 }, { "epoch": 0.7782432171167107, "grad_norm": 0.0010564877939732219, "learning_rate": 2.469745793054064e-06, "loss": 0.0, "step": 12076 }, { "epoch": 0.7783076625636399, "grad_norm": 0.0001559384697446135, "learning_rate": 2.469029717150018e-06, "loss": 0.0, "step": 12077 }, { "epoch": 0.7783721080105691, "grad_norm": 0.0007897786642414037, "learning_rate": 2.4683136412459724e-06, "loss": 0.0, "step": 12078 }, { "epoch": 0.7784365534574982, "grad_norm": 0.008367158669417396, "learning_rate": 2.4675975653419267e-06, "loss": 0.0, "step": 12079 }, { "epoch": 0.7785009989044274, "grad_norm": 0.0024458826459670666, "learning_rate": 2.4668814894378806e-06, "loss": 0.0, "step": 12080 }, { "epoch": 0.7785654443513566, "grad_norm": 0.02535862235484841, "learning_rate": 2.466165413533835e-06, "loss": 0.0001, "step": 12081 }, { "epoch": 0.7786298897982857, "grad_norm": 0.0008567730125366892, "learning_rate": 2.4654493376297888e-06, "loss": 0.0, "step": 12082 }, { "epoch": 0.7786943352452149, "grad_norm": 0.0010730528697962104, "learning_rate": 2.464733261725743e-06, "loss": 0.0, "step": 12083 }, { "epoch": 0.7787587806921441, "grad_norm": 0.05461918233566015, "learning_rate": 2.4640171858216974e-06, "loss": 0.0001, "step": 12084 }, { "epoch": 0.7788232261390733, "grad_norm": 0.15517757863659667, "learning_rate": 2.4633011099176517e-06, "loss": 0.0015, "step": 12085 }, { "epoch": 0.7788876715860025, "grad_norm": 0.1818583424467823, "learning_rate": 2.4625850340136055e-06, "loss": 0.0013, "step": 12086 }, { "epoch": 0.7789521170329317, "grad_norm": 0.001713274648651585, "learning_rate": 2.46186895810956e-06, "loss": 0.0, "step": 12087 }, { "epoch": 0.7790165624798608, "grad_norm": 0.14944862140584098, "learning_rate": 2.4611528822055137e-06, "loss": 0.0011, "step": 12088 }, { "epoch": 0.77908100792679, "grad_norm": 0.12680037185162404, "learning_rate": 2.460436806301468e-06, "loss": 0.0012, "step": 12089 }, { "epoch": 0.7791454533737191, "grad_norm": 2.549845910452418e-05, "learning_rate": 2.4597207303974223e-06, "loss": 0.0, "step": 12090 }, { "epoch": 0.7792098988206483, "grad_norm": 0.000564051977485508, "learning_rate": 2.4590046544933766e-06, "loss": 0.0, "step": 12091 }, { "epoch": 0.7792743442675775, "grad_norm": 0.0010766900145023287, "learning_rate": 2.4582885785893305e-06, "loss": 0.0, "step": 12092 }, { "epoch": 0.7793387897145067, "grad_norm": 0.7329335370994735, "learning_rate": 2.4575725026852848e-06, "loss": 0.001, "step": 12093 }, { "epoch": 0.7794032351614358, "grad_norm": 0.001353292375859486, "learning_rate": 2.456856426781239e-06, "loss": 0.0, "step": 12094 }, { "epoch": 0.779467680608365, "grad_norm": 0.053991346554312696, "learning_rate": 2.456140350877193e-06, "loss": 0.0, "step": 12095 }, { "epoch": 0.7795321260552942, "grad_norm": 0.005993064838952124, "learning_rate": 2.4554242749731473e-06, "loss": 0.0, "step": 12096 }, { "epoch": 0.7795965715022234, "grad_norm": 0.0021416859490622545, "learning_rate": 2.4547081990691016e-06, "loss": 0.0, "step": 12097 }, { "epoch": 0.7796610169491526, "grad_norm": 0.014471636175070532, "learning_rate": 2.453992123165056e-06, "loss": 0.0, "step": 12098 }, { "epoch": 0.7797254623960818, "grad_norm": 0.006637386067261587, "learning_rate": 2.4532760472610097e-06, "loss": 0.0, "step": 12099 }, { "epoch": 0.7797899078430109, "grad_norm": 0.01238649530879906, "learning_rate": 2.452559971356964e-06, "loss": 0.0001, "step": 12100 }, { "epoch": 0.77985435328994, "grad_norm": 0.04190091815805419, "learning_rate": 2.4518438954529183e-06, "loss": 0.0016, "step": 12101 }, { "epoch": 0.7799187987368692, "grad_norm": 0.00034396283472747845, "learning_rate": 2.4511278195488726e-06, "loss": 0.0, "step": 12102 }, { "epoch": 0.7799832441837984, "grad_norm": 0.00787000151745771, "learning_rate": 2.4504117436448265e-06, "loss": 0.0, "step": 12103 }, { "epoch": 0.7800476896307276, "grad_norm": 0.0040082247399448896, "learning_rate": 2.449695667740781e-06, "loss": 0.0, "step": 12104 }, { "epoch": 0.7801121350776568, "grad_norm": 0.4142050407785845, "learning_rate": 2.4489795918367347e-06, "loss": 0.0009, "step": 12105 }, { "epoch": 0.7801765805245859, "grad_norm": 0.0002606664246261931, "learning_rate": 2.448263515932689e-06, "loss": 0.0, "step": 12106 }, { "epoch": 0.7802410259715151, "grad_norm": 0.003579972965774035, "learning_rate": 2.4475474400286433e-06, "loss": 0.0, "step": 12107 }, { "epoch": 0.7803054714184443, "grad_norm": 0.021305318846473693, "learning_rate": 2.4468313641245976e-06, "loss": 0.0002, "step": 12108 }, { "epoch": 0.7803699168653735, "grad_norm": 0.012691156115001318, "learning_rate": 2.4461152882205514e-06, "loss": 0.0, "step": 12109 }, { "epoch": 0.7804343623123027, "grad_norm": 0.0017728354092780912, "learning_rate": 2.4453992123165057e-06, "loss": 0.0, "step": 12110 }, { "epoch": 0.7804988077592319, "grad_norm": 0.0016533355333587835, "learning_rate": 2.44468313641246e-06, "loss": 0.0, "step": 12111 }, { "epoch": 0.7805632532061609, "grad_norm": 0.0022065467282514017, "learning_rate": 2.443967060508414e-06, "loss": 0.0, "step": 12112 }, { "epoch": 0.7806276986530901, "grad_norm": 0.004123723032585986, "learning_rate": 2.4432509846043682e-06, "loss": 0.0, "step": 12113 }, { "epoch": 0.7806921441000193, "grad_norm": 0.0005452104333529033, "learning_rate": 2.4425349087003225e-06, "loss": 0.0, "step": 12114 }, { "epoch": 0.7807565895469485, "grad_norm": 0.0005542779206685047, "learning_rate": 2.441818832796277e-06, "loss": 0.0, "step": 12115 }, { "epoch": 0.7808210349938777, "grad_norm": 0.10948264410785608, "learning_rate": 2.4411027568922307e-06, "loss": 0.0004, "step": 12116 }, { "epoch": 0.7808854804408069, "grad_norm": 0.0001848680504991575, "learning_rate": 2.440386680988185e-06, "loss": 0.0, "step": 12117 }, { "epoch": 0.780949925887736, "grad_norm": 0.0007432499498959208, "learning_rate": 2.439670605084139e-06, "loss": 0.0, "step": 12118 }, { "epoch": 0.7810143713346652, "grad_norm": 0.004389440175323089, "learning_rate": 2.4389545291800936e-06, "loss": 0.0, "step": 12119 }, { "epoch": 0.7810788167815944, "grad_norm": 0.002810186785865301, "learning_rate": 2.4382384532760475e-06, "loss": 0.0, "step": 12120 }, { "epoch": 0.7811432622285236, "grad_norm": 0.007075445688301599, "learning_rate": 2.4375223773720018e-06, "loss": 0.0, "step": 12121 }, { "epoch": 0.7812077076754528, "grad_norm": 0.00019089657477344002, "learning_rate": 2.4368063014679556e-06, "loss": 0.0, "step": 12122 }, { "epoch": 0.7812721531223819, "grad_norm": 0.002233569077508966, "learning_rate": 2.43609022556391e-06, "loss": 0.0, "step": 12123 }, { "epoch": 0.781336598569311, "grad_norm": 2.4103195654440213, "learning_rate": 2.4353741496598642e-06, "loss": 0.0292, "step": 12124 }, { "epoch": 0.7814010440162402, "grad_norm": 8.232393805314394e-05, "learning_rate": 2.4346580737558185e-06, "loss": 0.0, "step": 12125 }, { "epoch": 0.7814654894631694, "grad_norm": 0.16571786769047236, "learning_rate": 2.4339419978517724e-06, "loss": 0.0003, "step": 12126 }, { "epoch": 0.7815299349100986, "grad_norm": 0.006719157453833545, "learning_rate": 2.4332259219477267e-06, "loss": 0.0, "step": 12127 }, { "epoch": 0.7815943803570278, "grad_norm": 0.004906313933490102, "learning_rate": 2.432509846043681e-06, "loss": 0.0, "step": 12128 }, { "epoch": 0.781658825803957, "grad_norm": 0.006681028163229526, "learning_rate": 2.431793770139635e-06, "loss": 0.0, "step": 12129 }, { "epoch": 0.7817232712508861, "grad_norm": 0.2901713907485199, "learning_rate": 2.431077694235589e-06, "loss": 0.0, "step": 12130 }, { "epoch": 0.7817877166978153, "grad_norm": 0.0014225891402345502, "learning_rate": 2.4303616183315435e-06, "loss": 0.0, "step": 12131 }, { "epoch": 0.7818521621447445, "grad_norm": 0.08323650832116088, "learning_rate": 2.4296455424274978e-06, "loss": 0.0001, "step": 12132 }, { "epoch": 0.7819166075916737, "grad_norm": 0.003190025050149661, "learning_rate": 2.4289294665234516e-06, "loss": 0.0, "step": 12133 }, { "epoch": 0.7819810530386028, "grad_norm": 0.00029784653909833324, "learning_rate": 2.428213390619406e-06, "loss": 0.0, "step": 12134 }, { "epoch": 0.782045498485532, "grad_norm": 0.0007026094397731929, "learning_rate": 2.42749731471536e-06, "loss": 0.0, "step": 12135 }, { "epoch": 0.7821099439324611, "grad_norm": 0.0012636307050004592, "learning_rate": 2.4267812388113145e-06, "loss": 0.0, "step": 12136 }, { "epoch": 0.7821743893793903, "grad_norm": 0.0029543542002693716, "learning_rate": 2.4260651629072684e-06, "loss": 0.0, "step": 12137 }, { "epoch": 0.7822388348263195, "grad_norm": 0.00022484532724232343, "learning_rate": 2.4253490870032227e-06, "loss": 0.0, "step": 12138 }, { "epoch": 0.7823032802732487, "grad_norm": 0.09689424289044542, "learning_rate": 2.4246330110991766e-06, "loss": 0.0002, "step": 12139 }, { "epoch": 0.7823677257201779, "grad_norm": 0.0004091185705752037, "learning_rate": 2.423916935195131e-06, "loss": 0.0, "step": 12140 }, { "epoch": 0.7824321711671071, "grad_norm": 0.03076440841027304, "learning_rate": 2.4232008592910848e-06, "loss": 0.0, "step": 12141 }, { "epoch": 0.7824966166140362, "grad_norm": 0.00017963182636153072, "learning_rate": 2.4224847833870395e-06, "loss": 0.0, "step": 12142 }, { "epoch": 0.7825610620609654, "grad_norm": 5.2125403499151375e-05, "learning_rate": 2.4217687074829934e-06, "loss": 0.0, "step": 12143 }, { "epoch": 0.7826255075078946, "grad_norm": 0.000837962959501766, "learning_rate": 2.4210526315789477e-06, "loss": 0.0, "step": 12144 }, { "epoch": 0.7826899529548237, "grad_norm": 0.0031376415468503094, "learning_rate": 2.4203365556749015e-06, "loss": 0.0, "step": 12145 }, { "epoch": 0.7827543984017529, "grad_norm": 5.146406773654627e-05, "learning_rate": 2.419620479770856e-06, "loss": 0.0, "step": 12146 }, { "epoch": 0.782818843848682, "grad_norm": 0.00029061667402159076, "learning_rate": 2.41890440386681e-06, "loss": 0.0, "step": 12147 }, { "epoch": 0.7828832892956112, "grad_norm": 0.00689310914039741, "learning_rate": 2.4181883279627644e-06, "loss": 0.0, "step": 12148 }, { "epoch": 0.7829477347425404, "grad_norm": 0.00255918569442203, "learning_rate": 2.4174722520587183e-06, "loss": 0.0, "step": 12149 }, { "epoch": 0.7830121801894696, "grad_norm": 0.14416244853354604, "learning_rate": 2.4167561761546726e-06, "loss": 0.0012, "step": 12150 }, { "epoch": 0.7830766256363988, "grad_norm": 0.3272660684480119, "learning_rate": 2.416040100250627e-06, "loss": 0.0005, "step": 12151 }, { "epoch": 0.783141071083328, "grad_norm": 0.30817576965074384, "learning_rate": 2.4153240243465808e-06, "loss": 0.0019, "step": 12152 }, { "epoch": 0.7832055165302572, "grad_norm": 0.07889921893529749, "learning_rate": 2.414607948442535e-06, "loss": 0.0001, "step": 12153 }, { "epoch": 0.7832699619771863, "grad_norm": 0.00043163331430451227, "learning_rate": 2.4138918725384894e-06, "loss": 0.0, "step": 12154 }, { "epoch": 0.7833344074241155, "grad_norm": 0.0001289400500351095, "learning_rate": 2.4131757966344437e-06, "loss": 0.0, "step": 12155 }, { "epoch": 0.7833988528710447, "grad_norm": 0.0007038620007471168, "learning_rate": 2.4124597207303976e-06, "loss": 0.0, "step": 12156 }, { "epoch": 0.7834632983179738, "grad_norm": 0.009012677822004099, "learning_rate": 2.411743644826352e-06, "loss": 0.0, "step": 12157 }, { "epoch": 0.783527743764903, "grad_norm": 0.015514184778514704, "learning_rate": 2.4110275689223057e-06, "loss": 0.0, "step": 12158 }, { "epoch": 0.7835921892118322, "grad_norm": 0.0007476084735745873, "learning_rate": 2.41031149301826e-06, "loss": 0.0, "step": 12159 }, { "epoch": 0.7836566346587613, "grad_norm": 0.004590340510394398, "learning_rate": 2.4095954171142143e-06, "loss": 0.0, "step": 12160 }, { "epoch": 0.7837210801056905, "grad_norm": 0.010306499838861465, "learning_rate": 2.4088793412101686e-06, "loss": 0.0, "step": 12161 }, { "epoch": 0.7837855255526197, "grad_norm": 0.0005240210405741492, "learning_rate": 2.4081632653061225e-06, "loss": 0.0, "step": 12162 }, { "epoch": 0.7838499709995489, "grad_norm": 0.23179992839626576, "learning_rate": 2.407447189402077e-06, "loss": 0.0022, "step": 12163 }, { "epoch": 0.7839144164464781, "grad_norm": 0.00023400309095611104, "learning_rate": 2.406731113498031e-06, "loss": 0.0, "step": 12164 }, { "epoch": 0.7839788618934073, "grad_norm": 0.03840447149473234, "learning_rate": 2.406015037593985e-06, "loss": 0.0001, "step": 12165 }, { "epoch": 0.7840433073403364, "grad_norm": 0.33425590007271, "learning_rate": 2.4052989616899393e-06, "loss": 0.0024, "step": 12166 }, { "epoch": 0.7841077527872656, "grad_norm": 0.013922142574820724, "learning_rate": 2.4045828857858936e-06, "loss": 0.0001, "step": 12167 }, { "epoch": 0.7841721982341947, "grad_norm": 0.0012904758371460228, "learning_rate": 2.403866809881848e-06, "loss": 0.0, "step": 12168 }, { "epoch": 0.7842366436811239, "grad_norm": 0.0009271433348567991, "learning_rate": 2.4031507339778017e-06, "loss": 0.0, "step": 12169 }, { "epoch": 0.7843010891280531, "grad_norm": 0.029267167884655023, "learning_rate": 2.402434658073756e-06, "loss": 0.0001, "step": 12170 }, { "epoch": 0.7843655345749823, "grad_norm": 0.006470476356729586, "learning_rate": 2.4017185821697103e-06, "loss": 0.0001, "step": 12171 }, { "epoch": 0.7844299800219114, "grad_norm": 0.006364758656668254, "learning_rate": 2.4010025062656646e-06, "loss": 0.0, "step": 12172 }, { "epoch": 0.7844944254688406, "grad_norm": 0.34521296607212343, "learning_rate": 2.4002864303616185e-06, "loss": 0.0032, "step": 12173 }, { "epoch": 0.7845588709157698, "grad_norm": 0.003681485751642218, "learning_rate": 2.399570354457573e-06, "loss": 0.0, "step": 12174 }, { "epoch": 0.784623316362699, "grad_norm": 0.00445226304354565, "learning_rate": 2.3988542785535267e-06, "loss": 0.0, "step": 12175 }, { "epoch": 0.7846877618096282, "grad_norm": 0.0015989638304230221, "learning_rate": 2.398138202649481e-06, "loss": 0.0, "step": 12176 }, { "epoch": 0.7847522072565574, "grad_norm": 0.0043978620529324984, "learning_rate": 2.3974221267454353e-06, "loss": 0.0, "step": 12177 }, { "epoch": 0.7848166527034866, "grad_norm": 0.017444955525010995, "learning_rate": 2.3967060508413896e-06, "loss": 0.0, "step": 12178 }, { "epoch": 0.7848810981504156, "grad_norm": 0.01364086457456686, "learning_rate": 2.3959899749373435e-06, "loss": 0.0015, "step": 12179 }, { "epoch": 0.7849455435973448, "grad_norm": 0.005634009224861755, "learning_rate": 2.3952738990332978e-06, "loss": 0.0, "step": 12180 }, { "epoch": 0.785009989044274, "grad_norm": 0.0002739373318359726, "learning_rate": 2.3945578231292516e-06, "loss": 0.0, "step": 12181 }, { "epoch": 0.7850744344912032, "grad_norm": 0.013928744806136031, "learning_rate": 2.393841747225206e-06, "loss": 0.0, "step": 12182 }, { "epoch": 0.7851388799381324, "grad_norm": 0.00152916755038011, "learning_rate": 2.3931256713211602e-06, "loss": 0.0, "step": 12183 }, { "epoch": 0.7852033253850615, "grad_norm": 0.005809892577009027, "learning_rate": 2.3924095954171145e-06, "loss": 0.0, "step": 12184 }, { "epoch": 0.7852677708319907, "grad_norm": 0.013241687900794809, "learning_rate": 2.3916935195130684e-06, "loss": 0.0001, "step": 12185 }, { "epoch": 0.7853322162789199, "grad_norm": 0.00023618599780431406, "learning_rate": 2.3909774436090227e-06, "loss": 0.0, "step": 12186 }, { "epoch": 0.7853966617258491, "grad_norm": 0.030064050017800315, "learning_rate": 2.390261367704977e-06, "loss": 0.0003, "step": 12187 }, { "epoch": 0.7854611071727783, "grad_norm": 0.01977551719248264, "learning_rate": 2.389545291800931e-06, "loss": 0.0015, "step": 12188 }, { "epoch": 0.7855255526197075, "grad_norm": 0.0002665702840403745, "learning_rate": 2.388829215896885e-06, "loss": 0.0, "step": 12189 }, { "epoch": 0.7855899980666365, "grad_norm": 0.016736773736830732, "learning_rate": 2.3881131399928395e-06, "loss": 0.0, "step": 12190 }, { "epoch": 0.7856544435135657, "grad_norm": 0.011101890454886009, "learning_rate": 2.3873970640887938e-06, "loss": 0.0, "step": 12191 }, { "epoch": 0.7857188889604949, "grad_norm": 0.23320839139528582, "learning_rate": 2.3866809881847476e-06, "loss": 0.0002, "step": 12192 }, { "epoch": 0.7857833344074241, "grad_norm": 0.001548839832227801, "learning_rate": 2.385964912280702e-06, "loss": 0.0, "step": 12193 }, { "epoch": 0.7858477798543533, "grad_norm": 0.008140804390900756, "learning_rate": 2.385248836376656e-06, "loss": 0.0, "step": 12194 }, { "epoch": 0.7859122253012825, "grad_norm": 0.002026366280862881, "learning_rate": 2.3845327604726105e-06, "loss": 0.0, "step": 12195 }, { "epoch": 0.7859766707482116, "grad_norm": 0.0012847861580297382, "learning_rate": 2.3838166845685644e-06, "loss": 0.0, "step": 12196 }, { "epoch": 0.7860411161951408, "grad_norm": 0.0006644772422334051, "learning_rate": 2.3831006086645187e-06, "loss": 0.0, "step": 12197 }, { "epoch": 0.78610556164207, "grad_norm": 0.03664619062678337, "learning_rate": 2.3823845327604726e-06, "loss": 0.0, "step": 12198 }, { "epoch": 0.7861700070889992, "grad_norm": 0.004651097115143568, "learning_rate": 2.381668456856427e-06, "loss": 0.0, "step": 12199 }, { "epoch": 0.7862344525359284, "grad_norm": 0.015155141237009116, "learning_rate": 2.380952380952381e-06, "loss": 0.0, "step": 12200 }, { "epoch": 0.7862988979828575, "grad_norm": 0.14372060879564724, "learning_rate": 2.3802363050483355e-06, "loss": 0.0021, "step": 12201 }, { "epoch": 0.7863633434297866, "grad_norm": 0.012198772020524575, "learning_rate": 2.3795202291442894e-06, "loss": 0.0001, "step": 12202 }, { "epoch": 0.7864277888767158, "grad_norm": 0.003081126606843541, "learning_rate": 2.3788041532402437e-06, "loss": 0.0, "step": 12203 }, { "epoch": 0.786492234323645, "grad_norm": 0.0045107827367743885, "learning_rate": 2.378088077336198e-06, "loss": 0.0, "step": 12204 }, { "epoch": 0.7865566797705742, "grad_norm": 0.14999968012139864, "learning_rate": 2.377372001432152e-06, "loss": 0.0013, "step": 12205 }, { "epoch": 0.7866211252175034, "grad_norm": 0.09314629267320713, "learning_rate": 2.376655925528106e-06, "loss": 0.0001, "step": 12206 }, { "epoch": 0.7866855706644326, "grad_norm": 0.0022869868699410103, "learning_rate": 2.3759398496240604e-06, "loss": 0.0, "step": 12207 }, { "epoch": 0.7867500161113617, "grad_norm": 0.00021443939616199012, "learning_rate": 2.3752237737200147e-06, "loss": 0.0, "step": 12208 }, { "epoch": 0.7868144615582909, "grad_norm": 0.007957057997663745, "learning_rate": 2.3745076978159686e-06, "loss": 0.0, "step": 12209 }, { "epoch": 0.7868789070052201, "grad_norm": 0.047592136178799455, "learning_rate": 2.373791621911923e-06, "loss": 0.0, "step": 12210 }, { "epoch": 0.7869433524521493, "grad_norm": 0.004593912436612267, "learning_rate": 2.3730755460078768e-06, "loss": 0.0, "step": 12211 }, { "epoch": 0.7870077978990784, "grad_norm": 0.012097084100934885, "learning_rate": 2.3723594701038315e-06, "loss": 0.0001, "step": 12212 }, { "epoch": 0.7870722433460076, "grad_norm": 0.02758894133465858, "learning_rate": 2.3716433941997854e-06, "loss": 0.0, "step": 12213 }, { "epoch": 0.7871366887929367, "grad_norm": 0.000602953256711079, "learning_rate": 2.3709273182957397e-06, "loss": 0.0, "step": 12214 }, { "epoch": 0.7872011342398659, "grad_norm": 0.004375077774056914, "learning_rate": 2.3702112423916936e-06, "loss": 0.0, "step": 12215 }, { "epoch": 0.7872655796867951, "grad_norm": 0.0025716119276872535, "learning_rate": 2.369495166487648e-06, "loss": 0.0, "step": 12216 }, { "epoch": 0.7873300251337243, "grad_norm": 0.01916180191521439, "learning_rate": 2.368779090583602e-06, "loss": 0.0001, "step": 12217 }, { "epoch": 0.7873944705806535, "grad_norm": 0.15724028651836391, "learning_rate": 2.3680630146795564e-06, "loss": 0.0004, "step": 12218 }, { "epoch": 0.7874589160275827, "grad_norm": 0.14482280604470232, "learning_rate": 2.3673469387755103e-06, "loss": 0.0017, "step": 12219 }, { "epoch": 0.7875233614745119, "grad_norm": 0.17595384919038112, "learning_rate": 2.3666308628714646e-06, "loss": 0.0005, "step": 12220 }, { "epoch": 0.787587806921441, "grad_norm": 0.11437295345132624, "learning_rate": 2.365914786967419e-06, "loss": 0.0003, "step": 12221 }, { "epoch": 0.7876522523683702, "grad_norm": 0.027399682842273777, "learning_rate": 2.365198711063373e-06, "loss": 0.0, "step": 12222 }, { "epoch": 0.7877166978152993, "grad_norm": 0.00710695761001995, "learning_rate": 2.364482635159327e-06, "loss": 0.0, "step": 12223 }, { "epoch": 0.7877811432622285, "grad_norm": 0.0010690633257261582, "learning_rate": 2.3637665592552814e-06, "loss": 0.0, "step": 12224 }, { "epoch": 0.7878455887091577, "grad_norm": 0.3357881946120721, "learning_rate": 2.3630504833512357e-06, "loss": 0.0006, "step": 12225 }, { "epoch": 0.7879100341560868, "grad_norm": 0.011652891799082953, "learning_rate": 2.3623344074471896e-06, "loss": 0.0, "step": 12226 }, { "epoch": 0.787974479603016, "grad_norm": 0.002222747962640288, "learning_rate": 2.361618331543144e-06, "loss": 0.0, "step": 12227 }, { "epoch": 0.7880389250499452, "grad_norm": 0.010843309689273661, "learning_rate": 2.3609022556390977e-06, "loss": 0.0001, "step": 12228 }, { "epoch": 0.7881033704968744, "grad_norm": 0.46336933926030965, "learning_rate": 2.360186179735052e-06, "loss": 0.0068, "step": 12229 }, { "epoch": 0.7881678159438036, "grad_norm": 0.0005158168464518133, "learning_rate": 2.3594701038310063e-06, "loss": 0.0015, "step": 12230 }, { "epoch": 0.7882322613907328, "grad_norm": 0.00042800521227334665, "learning_rate": 2.3587540279269606e-06, "loss": 0.0, "step": 12231 }, { "epoch": 0.788296706837662, "grad_norm": 0.0375830123299696, "learning_rate": 2.3580379520229145e-06, "loss": 0.0003, "step": 12232 }, { "epoch": 0.7883611522845911, "grad_norm": 0.3342013072829814, "learning_rate": 2.357321876118869e-06, "loss": 0.0003, "step": 12233 }, { "epoch": 0.7884255977315203, "grad_norm": 0.0008485553032904481, "learning_rate": 2.3566058002148227e-06, "loss": 0.0, "step": 12234 }, { "epoch": 0.7884900431784494, "grad_norm": 0.4544385418544847, "learning_rate": 2.355889724310777e-06, "loss": 0.0008, "step": 12235 }, { "epoch": 0.7885544886253786, "grad_norm": 0.0010409078297272856, "learning_rate": 2.3551736484067313e-06, "loss": 0.0, "step": 12236 }, { "epoch": 0.7886189340723078, "grad_norm": 0.002169620912121323, "learning_rate": 2.3544575725026856e-06, "loss": 0.0, "step": 12237 }, { "epoch": 0.788683379519237, "grad_norm": 0.0017981680341119573, "learning_rate": 2.3537414965986395e-06, "loss": 0.0, "step": 12238 }, { "epoch": 0.7887478249661661, "grad_norm": 0.04692421753212694, "learning_rate": 2.3530254206945938e-06, "loss": 0.0004, "step": 12239 }, { "epoch": 0.7888122704130953, "grad_norm": 0.040648232692013384, "learning_rate": 2.352309344790548e-06, "loss": 0.0001, "step": 12240 }, { "epoch": 0.7888767158600245, "grad_norm": 0.21630014614031065, "learning_rate": 2.351593268886502e-06, "loss": 0.0003, "step": 12241 }, { "epoch": 0.7889411613069537, "grad_norm": 0.0016455505937524788, "learning_rate": 2.3508771929824562e-06, "loss": 0.0, "step": 12242 }, { "epoch": 0.7890056067538829, "grad_norm": 1.7173800802001882e-05, "learning_rate": 2.3501611170784105e-06, "loss": 0.0, "step": 12243 }, { "epoch": 0.789070052200812, "grad_norm": 0.3804484963166623, "learning_rate": 2.349445041174365e-06, "loss": 0.0029, "step": 12244 }, { "epoch": 0.7891344976477412, "grad_norm": 0.0006245605744261292, "learning_rate": 2.3487289652703187e-06, "loss": 0.0, "step": 12245 }, { "epoch": 0.7891989430946703, "grad_norm": 0.0016185882460237843, "learning_rate": 2.348012889366273e-06, "loss": 0.0, "step": 12246 }, { "epoch": 0.7892633885415995, "grad_norm": 6.474478917892943e-05, "learning_rate": 2.3472968134622273e-06, "loss": 0.0, "step": 12247 }, { "epoch": 0.7893278339885287, "grad_norm": 0.0010739064109665784, "learning_rate": 2.3465807375581816e-06, "loss": 0.0, "step": 12248 }, { "epoch": 0.7893922794354579, "grad_norm": 0.005548181129234342, "learning_rate": 2.3458646616541355e-06, "loss": 0.0001, "step": 12249 }, { "epoch": 0.789456724882387, "grad_norm": 0.008573517040880186, "learning_rate": 2.3451485857500898e-06, "loss": 0.0, "step": 12250 }, { "epoch": 0.7895211703293162, "grad_norm": 0.0006694098065241521, "learning_rate": 2.3444325098460436e-06, "loss": 0.0, "step": 12251 }, { "epoch": 0.7895856157762454, "grad_norm": 0.027516989197968755, "learning_rate": 2.343716433941998e-06, "loss": 0.0, "step": 12252 }, { "epoch": 0.7896500612231746, "grad_norm": 0.0049549690740599, "learning_rate": 2.3430003580379522e-06, "loss": 0.0, "step": 12253 }, { "epoch": 0.7897145066701038, "grad_norm": 0.0029326547432867644, "learning_rate": 2.3422842821339065e-06, "loss": 0.0, "step": 12254 }, { "epoch": 0.789778952117033, "grad_norm": 0.025886055570603583, "learning_rate": 2.3415682062298604e-06, "loss": 0.0, "step": 12255 }, { "epoch": 0.7898433975639622, "grad_norm": 0.012209315086344184, "learning_rate": 2.3408521303258147e-06, "loss": 0.0, "step": 12256 }, { "epoch": 0.7899078430108912, "grad_norm": 6.541967178543215e-05, "learning_rate": 2.340136054421769e-06, "loss": 0.0, "step": 12257 }, { "epoch": 0.7899722884578204, "grad_norm": 0.002371812470589662, "learning_rate": 2.339419978517723e-06, "loss": 0.0, "step": 12258 }, { "epoch": 0.7900367339047496, "grad_norm": 0.11401620408703504, "learning_rate": 2.338703902613677e-06, "loss": 0.0019, "step": 12259 }, { "epoch": 0.7901011793516788, "grad_norm": 0.012610422179252519, "learning_rate": 2.3379878267096315e-06, "loss": 0.0, "step": 12260 }, { "epoch": 0.790165624798608, "grad_norm": 0.00046666391484797036, "learning_rate": 2.3372717508055858e-06, "loss": 0.0, "step": 12261 }, { "epoch": 0.7902300702455372, "grad_norm": 0.00217984363643231, "learning_rate": 2.3365556749015397e-06, "loss": 0.0, "step": 12262 }, { "epoch": 0.7902945156924663, "grad_norm": 0.008862938690758203, "learning_rate": 2.335839598997494e-06, "loss": 0.0, "step": 12263 }, { "epoch": 0.7903589611393955, "grad_norm": 0.05935538910762845, "learning_rate": 2.335123523093448e-06, "loss": 0.0001, "step": 12264 }, { "epoch": 0.7904234065863247, "grad_norm": 0.0005315012449867554, "learning_rate": 2.3344074471894026e-06, "loss": 0.0, "step": 12265 }, { "epoch": 0.7904878520332539, "grad_norm": 0.0012946143773809717, "learning_rate": 2.3336913712853564e-06, "loss": 0.0, "step": 12266 }, { "epoch": 0.7905522974801831, "grad_norm": 0.022759214076861673, "learning_rate": 2.3329752953813107e-06, "loss": 0.0, "step": 12267 }, { "epoch": 0.7906167429271121, "grad_norm": 0.0003268068807733046, "learning_rate": 2.3322592194772646e-06, "loss": 0.0, "step": 12268 }, { "epoch": 0.7906811883740413, "grad_norm": 0.3537525626450311, "learning_rate": 2.331543143573219e-06, "loss": 0.0012, "step": 12269 }, { "epoch": 0.7907456338209705, "grad_norm": 0.01784420948702125, "learning_rate": 2.330827067669173e-06, "loss": 0.0001, "step": 12270 }, { "epoch": 0.7908100792678997, "grad_norm": 0.001048223111041715, "learning_rate": 2.3301109917651275e-06, "loss": 0.0, "step": 12271 }, { "epoch": 0.7908745247148289, "grad_norm": 0.0011720714576711842, "learning_rate": 2.3293949158610814e-06, "loss": 0.0, "step": 12272 }, { "epoch": 0.7909389701617581, "grad_norm": 0.09711526275150101, "learning_rate": 2.3286788399570357e-06, "loss": 0.0001, "step": 12273 }, { "epoch": 0.7910034156086873, "grad_norm": 0.0004784918328230193, "learning_rate": 2.32796276405299e-06, "loss": 0.0, "step": 12274 }, { "epoch": 0.7910678610556164, "grad_norm": 0.0019445352077134038, "learning_rate": 2.327246688148944e-06, "loss": 0.0, "step": 12275 }, { "epoch": 0.7911323065025456, "grad_norm": 0.0010747047967742114, "learning_rate": 2.326530612244898e-06, "loss": 0.0, "step": 12276 }, { "epoch": 0.7911967519494748, "grad_norm": 0.0008913434106858376, "learning_rate": 2.3258145363408524e-06, "loss": 0.0, "step": 12277 }, { "epoch": 0.791261197396404, "grad_norm": 0.0008809684577119583, "learning_rate": 2.3250984604368067e-06, "loss": 0.0, "step": 12278 }, { "epoch": 0.7913256428433331, "grad_norm": 0.0033671814651020987, "learning_rate": 2.3243823845327606e-06, "loss": 0.0, "step": 12279 }, { "epoch": 0.7913900882902623, "grad_norm": 0.0011540542898446035, "learning_rate": 2.323666308628715e-06, "loss": 0.0, "step": 12280 }, { "epoch": 0.7914545337371914, "grad_norm": 0.003371865755117032, "learning_rate": 2.322950232724669e-06, "loss": 0.0, "step": 12281 }, { "epoch": 0.7915189791841206, "grad_norm": 0.00016563388657528873, "learning_rate": 2.3222341568206235e-06, "loss": 0.0, "step": 12282 }, { "epoch": 0.7915834246310498, "grad_norm": 0.09632593516717095, "learning_rate": 2.3215180809165774e-06, "loss": 0.0013, "step": 12283 }, { "epoch": 0.791647870077979, "grad_norm": 0.0017298544745243945, "learning_rate": 2.3208020050125317e-06, "loss": 0.0, "step": 12284 }, { "epoch": 0.7917123155249082, "grad_norm": 0.010223582251694568, "learning_rate": 2.3200859291084856e-06, "loss": 0.0001, "step": 12285 }, { "epoch": 0.7917767609718374, "grad_norm": 0.003706090422825231, "learning_rate": 2.31936985320444e-06, "loss": 0.0, "step": 12286 }, { "epoch": 0.7918412064187665, "grad_norm": 0.5455833713255104, "learning_rate": 2.3186537773003937e-06, "loss": 0.0033, "step": 12287 }, { "epoch": 0.7919056518656957, "grad_norm": 0.005634153863743244, "learning_rate": 2.3179377013963485e-06, "loss": 0.0, "step": 12288 }, { "epoch": 0.7919700973126249, "grad_norm": 0.00811497967985401, "learning_rate": 2.3172216254923023e-06, "loss": 0.0, "step": 12289 }, { "epoch": 0.792034542759554, "grad_norm": 0.023416422764268287, "learning_rate": 2.3165055495882566e-06, "loss": 0.0, "step": 12290 }, { "epoch": 0.7920989882064832, "grad_norm": 0.000397575294584166, "learning_rate": 2.3157894736842105e-06, "loss": 0.0, "step": 12291 }, { "epoch": 0.7921634336534124, "grad_norm": 0.09261146259030603, "learning_rate": 2.315073397780165e-06, "loss": 0.0007, "step": 12292 }, { "epoch": 0.7922278791003415, "grad_norm": 6.736243921635722e-05, "learning_rate": 2.314357321876119e-06, "loss": 0.0, "step": 12293 }, { "epoch": 0.7922923245472707, "grad_norm": 8.188848771461251e-05, "learning_rate": 2.3136412459720734e-06, "loss": 0.0, "step": 12294 }, { "epoch": 0.7923567699941999, "grad_norm": 0.5248949627024653, "learning_rate": 2.3129251700680273e-06, "loss": 0.0005, "step": 12295 }, { "epoch": 0.7924212154411291, "grad_norm": 0.0022065020062548503, "learning_rate": 2.3122090941639816e-06, "loss": 0.0, "step": 12296 }, { "epoch": 0.7924856608880583, "grad_norm": 0.0012940578166477242, "learning_rate": 2.311493018259936e-06, "loss": 0.0, "step": 12297 }, { "epoch": 0.7925501063349875, "grad_norm": 0.0008661635742292246, "learning_rate": 2.3107769423558898e-06, "loss": 0.0, "step": 12298 }, { "epoch": 0.7926145517819166, "grad_norm": 0.012853991928710864, "learning_rate": 2.310060866451844e-06, "loss": 0.0, "step": 12299 }, { "epoch": 0.7926789972288458, "grad_norm": 0.0013576044205097536, "learning_rate": 2.3093447905477984e-06, "loss": 0.0, "step": 12300 }, { "epoch": 0.792743442675775, "grad_norm": 6.910762728236609e-05, "learning_rate": 2.3086287146437527e-06, "loss": 0.0, "step": 12301 }, { "epoch": 0.7928078881227041, "grad_norm": 0.0067343948025941945, "learning_rate": 2.3079126387397065e-06, "loss": 0.0, "step": 12302 }, { "epoch": 0.7928723335696333, "grad_norm": 0.13638891214778198, "learning_rate": 2.307196562835661e-06, "loss": 0.0004, "step": 12303 }, { "epoch": 0.7929367790165625, "grad_norm": 0.32272106701892816, "learning_rate": 2.3064804869316147e-06, "loss": 0.0012, "step": 12304 }, { "epoch": 0.7930012244634916, "grad_norm": 0.0008684087055918425, "learning_rate": 2.305764411027569e-06, "loss": 0.0, "step": 12305 }, { "epoch": 0.7930656699104208, "grad_norm": 0.0001796671162931569, "learning_rate": 2.3050483351235233e-06, "loss": 0.0, "step": 12306 }, { "epoch": 0.79313011535735, "grad_norm": 0.22511224430501187, "learning_rate": 2.3043322592194776e-06, "loss": 0.0007, "step": 12307 }, { "epoch": 0.7931945608042792, "grad_norm": 0.0004992730688903502, "learning_rate": 2.3036161833154315e-06, "loss": 0.0, "step": 12308 }, { "epoch": 0.7932590062512084, "grad_norm": 0.006909952409031217, "learning_rate": 2.3029001074113858e-06, "loss": 0.0, "step": 12309 }, { "epoch": 0.7933234516981376, "grad_norm": 0.0040115938022819, "learning_rate": 2.30218403150734e-06, "loss": 0.0, "step": 12310 }, { "epoch": 0.7933878971450667, "grad_norm": 0.0014633534190212837, "learning_rate": 2.301467955603294e-06, "loss": 0.0, "step": 12311 }, { "epoch": 0.7934523425919959, "grad_norm": 0.012486874126142355, "learning_rate": 2.3007518796992482e-06, "loss": 0.0, "step": 12312 }, { "epoch": 0.793516788038925, "grad_norm": 0.0010191498823039192, "learning_rate": 2.3000358037952025e-06, "loss": 0.0, "step": 12313 }, { "epoch": 0.7935812334858542, "grad_norm": 0.004574874004510897, "learning_rate": 2.299319727891157e-06, "loss": 0.0, "step": 12314 }, { "epoch": 0.7936456789327834, "grad_norm": 0.00018292927670207949, "learning_rate": 2.2986036519871107e-06, "loss": 0.0, "step": 12315 }, { "epoch": 0.7937101243797126, "grad_norm": 0.2433858495795913, "learning_rate": 2.297887576083065e-06, "loss": 0.0025, "step": 12316 }, { "epoch": 0.7937745698266417, "grad_norm": 0.000507205422647784, "learning_rate": 2.2971715001790193e-06, "loss": 0.0, "step": 12317 }, { "epoch": 0.7938390152735709, "grad_norm": 0.1609688126555202, "learning_rate": 2.2964554242749736e-06, "loss": 0.0019, "step": 12318 }, { "epoch": 0.7939034607205001, "grad_norm": 0.0012293050600295469, "learning_rate": 2.2957393483709275e-06, "loss": 0.0, "step": 12319 }, { "epoch": 0.7939679061674293, "grad_norm": 0.0009011055666482163, "learning_rate": 2.2950232724668818e-06, "loss": 0.0, "step": 12320 }, { "epoch": 0.7940323516143585, "grad_norm": 0.002458300198121493, "learning_rate": 2.2943071965628357e-06, "loss": 0.0, "step": 12321 }, { "epoch": 0.7940967970612877, "grad_norm": 0.010545672807899122, "learning_rate": 2.29359112065879e-06, "loss": 0.0001, "step": 12322 }, { "epoch": 0.7941612425082168, "grad_norm": 0.0006009889248218322, "learning_rate": 2.2928750447547443e-06, "loss": 0.0, "step": 12323 }, { "epoch": 0.7942256879551459, "grad_norm": 0.0002882234106889472, "learning_rate": 2.2921589688506986e-06, "loss": 0.0, "step": 12324 }, { "epoch": 0.7942901334020751, "grad_norm": 7.939152417382084e-05, "learning_rate": 2.2914428929466524e-06, "loss": 0.0, "step": 12325 }, { "epoch": 0.7943545788490043, "grad_norm": 0.0004449329234502113, "learning_rate": 2.2907268170426067e-06, "loss": 0.0, "step": 12326 }, { "epoch": 0.7944190242959335, "grad_norm": 0.00014424612203982502, "learning_rate": 2.2900107411385606e-06, "loss": 0.0, "step": 12327 }, { "epoch": 0.7944834697428627, "grad_norm": 0.0031572437902694242, "learning_rate": 2.289294665234515e-06, "loss": 0.0, "step": 12328 }, { "epoch": 0.7945479151897918, "grad_norm": 0.02013882993644046, "learning_rate": 2.288578589330469e-06, "loss": 0.0, "step": 12329 }, { "epoch": 0.794612360636721, "grad_norm": 0.0006641821282922399, "learning_rate": 2.2878625134264235e-06, "loss": 0.0, "step": 12330 }, { "epoch": 0.7946768060836502, "grad_norm": 0.0964850576276222, "learning_rate": 2.2871464375223774e-06, "loss": 0.0003, "step": 12331 }, { "epoch": 0.7947412515305794, "grad_norm": 0.0007458173316273714, "learning_rate": 2.2864303616183317e-06, "loss": 0.0, "step": 12332 }, { "epoch": 0.7948056969775086, "grad_norm": 0.044678187758849984, "learning_rate": 2.285714285714286e-06, "loss": 0.0001, "step": 12333 }, { "epoch": 0.7948701424244378, "grad_norm": 0.012587534549491937, "learning_rate": 2.28499820981024e-06, "loss": 0.0001, "step": 12334 }, { "epoch": 0.7949345878713668, "grad_norm": 0.11678656098745568, "learning_rate": 2.284282133906194e-06, "loss": 0.0002, "step": 12335 }, { "epoch": 0.794999033318296, "grad_norm": 0.0010133240072671896, "learning_rate": 2.2835660580021484e-06, "loss": 0.0, "step": 12336 }, { "epoch": 0.7950634787652252, "grad_norm": 0.01128362238810577, "learning_rate": 2.2828499820981027e-06, "loss": 0.0, "step": 12337 }, { "epoch": 0.7951279242121544, "grad_norm": 0.325144468806227, "learning_rate": 2.2821339061940566e-06, "loss": 0.0024, "step": 12338 }, { "epoch": 0.7951923696590836, "grad_norm": 0.0003143201195948108, "learning_rate": 2.281417830290011e-06, "loss": 0.0, "step": 12339 }, { "epoch": 0.7952568151060128, "grad_norm": 0.00018327716875614307, "learning_rate": 2.280701754385965e-06, "loss": 0.0, "step": 12340 }, { "epoch": 0.7953212605529419, "grad_norm": 0.00038591333705380033, "learning_rate": 2.2799856784819195e-06, "loss": 0.0, "step": 12341 }, { "epoch": 0.7953857059998711, "grad_norm": 0.0012024442904717586, "learning_rate": 2.2792696025778734e-06, "loss": 0.0, "step": 12342 }, { "epoch": 0.7954501514468003, "grad_norm": 0.00036475198609929637, "learning_rate": 2.2785535266738277e-06, "loss": 0.0, "step": 12343 }, { "epoch": 0.7955145968937295, "grad_norm": 0.009867178122339677, "learning_rate": 2.2778374507697816e-06, "loss": 0.0, "step": 12344 }, { "epoch": 0.7955790423406587, "grad_norm": 0.00080797752438696, "learning_rate": 2.277121374865736e-06, "loss": 0.0, "step": 12345 }, { "epoch": 0.7956434877875878, "grad_norm": 0.000350938664247268, "learning_rate": 2.27640529896169e-06, "loss": 0.0, "step": 12346 }, { "epoch": 0.7957079332345169, "grad_norm": 0.0004998791925092817, "learning_rate": 2.2756892230576445e-06, "loss": 0.0, "step": 12347 }, { "epoch": 0.7957723786814461, "grad_norm": 0.023357191681495117, "learning_rate": 2.2749731471535983e-06, "loss": 0.0001, "step": 12348 }, { "epoch": 0.7958368241283753, "grad_norm": 0.3889522787023688, "learning_rate": 2.2742570712495526e-06, "loss": 0.0028, "step": 12349 }, { "epoch": 0.7959012695753045, "grad_norm": 0.026527525468430544, "learning_rate": 2.273540995345507e-06, "loss": 0.0, "step": 12350 }, { "epoch": 0.7959657150222337, "grad_norm": 0.0003701205214895632, "learning_rate": 2.272824919441461e-06, "loss": 0.0, "step": 12351 }, { "epoch": 0.7960301604691629, "grad_norm": 0.048662335749158375, "learning_rate": 2.272108843537415e-06, "loss": 0.0005, "step": 12352 }, { "epoch": 0.796094605916092, "grad_norm": 0.0028033741150151723, "learning_rate": 2.2713927676333694e-06, "loss": 0.0, "step": 12353 }, { "epoch": 0.7961590513630212, "grad_norm": 0.005704388075964442, "learning_rate": 2.2706766917293237e-06, "loss": 0.0, "step": 12354 }, { "epoch": 0.7962234968099504, "grad_norm": 0.002117519304700571, "learning_rate": 2.2699606158252776e-06, "loss": 0.0, "step": 12355 }, { "epoch": 0.7962879422568796, "grad_norm": 0.02104627910229218, "learning_rate": 2.269244539921232e-06, "loss": 0.0001, "step": 12356 }, { "epoch": 0.7963523877038087, "grad_norm": 0.0002981008190228777, "learning_rate": 2.2685284640171858e-06, "loss": 0.0, "step": 12357 }, { "epoch": 0.7964168331507379, "grad_norm": 0.15222814648679983, "learning_rate": 2.2678123881131405e-06, "loss": 0.0034, "step": 12358 }, { "epoch": 0.796481278597667, "grad_norm": 0.0007378246127458366, "learning_rate": 2.2670963122090944e-06, "loss": 0.0, "step": 12359 }, { "epoch": 0.7965457240445962, "grad_norm": 0.0003285574545467159, "learning_rate": 2.2663802363050487e-06, "loss": 0.0, "step": 12360 }, { "epoch": 0.7966101694915254, "grad_norm": 0.011981375678915126, "learning_rate": 2.2656641604010025e-06, "loss": 0.0, "step": 12361 }, { "epoch": 0.7966746149384546, "grad_norm": 0.0715815572735966, "learning_rate": 2.264948084496957e-06, "loss": 0.0, "step": 12362 }, { "epoch": 0.7967390603853838, "grad_norm": 0.0077975512422411635, "learning_rate": 2.264232008592911e-06, "loss": 0.0, "step": 12363 }, { "epoch": 0.796803505832313, "grad_norm": 0.00018230401913338704, "learning_rate": 2.2635159326888654e-06, "loss": 0.0, "step": 12364 }, { "epoch": 0.7968679512792421, "grad_norm": 0.00044439336476779, "learning_rate": 2.2627998567848193e-06, "loss": 0.0, "step": 12365 }, { "epoch": 0.7969323967261713, "grad_norm": 0.0558334291996121, "learning_rate": 2.2620837808807736e-06, "loss": 0.0, "step": 12366 }, { "epoch": 0.7969968421731005, "grad_norm": 0.0017058114736429463, "learning_rate": 2.261367704976728e-06, "loss": 0.0, "step": 12367 }, { "epoch": 0.7970612876200296, "grad_norm": 0.0031099253657042403, "learning_rate": 2.2606516290726818e-06, "loss": 0.0, "step": 12368 }, { "epoch": 0.7971257330669588, "grad_norm": 0.07728687070940958, "learning_rate": 2.259935553168636e-06, "loss": 0.0008, "step": 12369 }, { "epoch": 0.797190178513888, "grad_norm": 0.1450721760764216, "learning_rate": 2.2592194772645904e-06, "loss": 0.0005, "step": 12370 }, { "epoch": 0.7972546239608171, "grad_norm": 0.2237326802977375, "learning_rate": 2.2585034013605447e-06, "loss": 0.0004, "step": 12371 }, { "epoch": 0.7973190694077463, "grad_norm": 0.010944975406888244, "learning_rate": 2.2577873254564985e-06, "loss": 0.0, "step": 12372 }, { "epoch": 0.7973835148546755, "grad_norm": 0.0016884389008774191, "learning_rate": 2.257071249552453e-06, "loss": 0.0, "step": 12373 }, { "epoch": 0.7974479603016047, "grad_norm": 0.00034746512207015046, "learning_rate": 2.2563551736484067e-06, "loss": 0.0, "step": 12374 }, { "epoch": 0.7975124057485339, "grad_norm": 0.005475084699684978, "learning_rate": 2.255639097744361e-06, "loss": 0.0, "step": 12375 }, { "epoch": 0.7975768511954631, "grad_norm": 0.2716014115799345, "learning_rate": 2.2549230218403153e-06, "loss": 0.001, "step": 12376 }, { "epoch": 0.7976412966423923, "grad_norm": 0.001062484250198185, "learning_rate": 2.2542069459362696e-06, "loss": 0.0, "step": 12377 }, { "epoch": 0.7977057420893214, "grad_norm": 0.007033136720229323, "learning_rate": 2.2534908700322235e-06, "loss": 0.0001, "step": 12378 }, { "epoch": 0.7977701875362506, "grad_norm": 0.022493932942061604, "learning_rate": 2.2527747941281778e-06, "loss": 0.0, "step": 12379 }, { "epoch": 0.7978346329831797, "grad_norm": 0.0005868051093204366, "learning_rate": 2.2520587182241317e-06, "loss": 0.0, "step": 12380 }, { "epoch": 0.7978990784301089, "grad_norm": 0.01206821548237533, "learning_rate": 2.251342642320086e-06, "loss": 0.0, "step": 12381 }, { "epoch": 0.7979635238770381, "grad_norm": 0.003972101955416267, "learning_rate": 2.2506265664160403e-06, "loss": 0.0, "step": 12382 }, { "epoch": 0.7980279693239672, "grad_norm": 0.019091583821505498, "learning_rate": 2.2499104905119946e-06, "loss": 0.0002, "step": 12383 }, { "epoch": 0.7980924147708964, "grad_norm": 0.0028018271371107813, "learning_rate": 2.2491944146079484e-06, "loss": 0.0, "step": 12384 }, { "epoch": 0.7981568602178256, "grad_norm": 0.018689290061926607, "learning_rate": 2.2484783387039027e-06, "loss": 0.0, "step": 12385 }, { "epoch": 0.7982213056647548, "grad_norm": 0.0010994308213438746, "learning_rate": 2.247762262799857e-06, "loss": 0.0, "step": 12386 }, { "epoch": 0.798285751111684, "grad_norm": 0.0011902206978131014, "learning_rate": 2.2470461868958113e-06, "loss": 0.0, "step": 12387 }, { "epoch": 0.7983501965586132, "grad_norm": 0.00018473605557963436, "learning_rate": 2.246330110991765e-06, "loss": 0.0, "step": 12388 }, { "epoch": 0.7984146420055424, "grad_norm": 0.0027828798294523296, "learning_rate": 2.2456140350877195e-06, "loss": 0.0, "step": 12389 }, { "epoch": 0.7984790874524715, "grad_norm": 0.0026909045095040935, "learning_rate": 2.244897959183674e-06, "loss": 0.0, "step": 12390 }, { "epoch": 0.7985435328994006, "grad_norm": 0.04215782510421399, "learning_rate": 2.2441818832796277e-06, "loss": 0.0, "step": 12391 }, { "epoch": 0.7986079783463298, "grad_norm": 0.004400490777957583, "learning_rate": 2.243465807375582e-06, "loss": 0.0, "step": 12392 }, { "epoch": 0.798672423793259, "grad_norm": 0.0009927995263724551, "learning_rate": 2.2427497314715363e-06, "loss": 0.0, "step": 12393 }, { "epoch": 0.7987368692401882, "grad_norm": 0.0008656766188128951, "learning_rate": 2.2420336555674906e-06, "loss": 0.0, "step": 12394 }, { "epoch": 0.7988013146871173, "grad_norm": 0.11693899508875823, "learning_rate": 2.2413175796634444e-06, "loss": 0.0012, "step": 12395 }, { "epoch": 0.7988657601340465, "grad_norm": 0.015184840303952818, "learning_rate": 2.2406015037593987e-06, "loss": 0.0, "step": 12396 }, { "epoch": 0.7989302055809757, "grad_norm": 0.0008131608440570534, "learning_rate": 2.2398854278553526e-06, "loss": 0.0, "step": 12397 }, { "epoch": 0.7989946510279049, "grad_norm": 0.008973112267477039, "learning_rate": 2.239169351951307e-06, "loss": 0.0, "step": 12398 }, { "epoch": 0.7990590964748341, "grad_norm": 0.4256889173153369, "learning_rate": 2.2384532760472612e-06, "loss": 0.0007, "step": 12399 }, { "epoch": 0.7991235419217633, "grad_norm": 0.0014311981142707912, "learning_rate": 2.2377372001432155e-06, "loss": 0.0, "step": 12400 }, { "epoch": 0.7991879873686925, "grad_norm": 0.0038494384225200677, "learning_rate": 2.2370211242391694e-06, "loss": 0.0, "step": 12401 }, { "epoch": 0.7992524328156215, "grad_norm": 0.00021152556432885007, "learning_rate": 2.2363050483351237e-06, "loss": 0.0, "step": 12402 }, { "epoch": 0.7993168782625507, "grad_norm": 0.0004490586247319541, "learning_rate": 2.235588972431078e-06, "loss": 0.0, "step": 12403 }, { "epoch": 0.7993813237094799, "grad_norm": 0.0006710799683837646, "learning_rate": 2.234872896527032e-06, "loss": 0.0, "step": 12404 }, { "epoch": 0.7994457691564091, "grad_norm": 0.010726106254010393, "learning_rate": 2.234156820622986e-06, "loss": 0.0, "step": 12405 }, { "epoch": 0.7995102146033383, "grad_norm": 0.0014574245169015954, "learning_rate": 2.2334407447189405e-06, "loss": 0.0, "step": 12406 }, { "epoch": 0.7995746600502674, "grad_norm": 0.0005909390737096638, "learning_rate": 2.2327246688148948e-06, "loss": 0.0, "step": 12407 }, { "epoch": 0.7996391054971966, "grad_norm": 0.00010923331761016114, "learning_rate": 2.2320085929108486e-06, "loss": 0.0, "step": 12408 }, { "epoch": 0.7997035509441258, "grad_norm": 0.03501950265571546, "learning_rate": 2.231292517006803e-06, "loss": 0.0, "step": 12409 }, { "epoch": 0.799767996391055, "grad_norm": 0.0004077594829991419, "learning_rate": 2.230576441102757e-06, "loss": 0.0, "step": 12410 }, { "epoch": 0.7998324418379842, "grad_norm": 2.0668252348953236e-05, "learning_rate": 2.2298603651987115e-06, "loss": 0.0, "step": 12411 }, { "epoch": 0.7998968872849134, "grad_norm": 0.0005704418239510764, "learning_rate": 2.2291442892946654e-06, "loss": 0.0, "step": 12412 }, { "epoch": 0.7999613327318424, "grad_norm": 0.01770676458062447, "learning_rate": 2.2284282133906197e-06, "loss": 0.0, "step": 12413 }, { "epoch": 0.8000257781787716, "grad_norm": 0.00979534521749774, "learning_rate": 2.2277121374865736e-06, "loss": 0.0001, "step": 12414 }, { "epoch": 0.8000902236257008, "grad_norm": 0.006045575204337346, "learning_rate": 2.226996061582528e-06, "loss": 0.0, "step": 12415 }, { "epoch": 0.80015466907263, "grad_norm": 0.006182677580691062, "learning_rate": 2.226279985678482e-06, "loss": 0.0, "step": 12416 }, { "epoch": 0.8002191145195592, "grad_norm": 0.01825002921528511, "learning_rate": 2.2255639097744365e-06, "loss": 0.0, "step": 12417 }, { "epoch": 0.8002835599664884, "grad_norm": 0.11108596716148732, "learning_rate": 2.2248478338703904e-06, "loss": 0.0002, "step": 12418 }, { "epoch": 0.8003480054134176, "grad_norm": 0.004849966220652724, "learning_rate": 2.2241317579663447e-06, "loss": 0.0, "step": 12419 }, { "epoch": 0.8004124508603467, "grad_norm": 0.01844083369199926, "learning_rate": 2.223415682062299e-06, "loss": 0.0001, "step": 12420 }, { "epoch": 0.8004768963072759, "grad_norm": 0.011662986322466401, "learning_rate": 2.222699606158253e-06, "loss": 0.0015, "step": 12421 }, { "epoch": 0.8005413417542051, "grad_norm": 0.00012785641594670615, "learning_rate": 2.221983530254207e-06, "loss": 0.0, "step": 12422 }, { "epoch": 0.8006057872011343, "grad_norm": 0.00016684387752677132, "learning_rate": 2.2212674543501614e-06, "loss": 0.0, "step": 12423 }, { "epoch": 0.8006702326480634, "grad_norm": 0.00021000222066320482, "learning_rate": 2.2205513784461157e-06, "loss": 0.0, "step": 12424 }, { "epoch": 0.8007346780949925, "grad_norm": 0.00046518345635586757, "learning_rate": 2.2198353025420696e-06, "loss": 0.0, "step": 12425 }, { "epoch": 0.8007991235419217, "grad_norm": 0.18923775731011344, "learning_rate": 2.219119226638024e-06, "loss": 0.0003, "step": 12426 }, { "epoch": 0.8008635689888509, "grad_norm": 0.007316504578402545, "learning_rate": 2.2184031507339778e-06, "loss": 0.0001, "step": 12427 }, { "epoch": 0.8009280144357801, "grad_norm": 0.004194083563563775, "learning_rate": 2.2176870748299325e-06, "loss": 0.0, "step": 12428 }, { "epoch": 0.8009924598827093, "grad_norm": 0.0009184694452434844, "learning_rate": 2.2169709989258864e-06, "loss": 0.0, "step": 12429 }, { "epoch": 0.8010569053296385, "grad_norm": 0.02332515788220043, "learning_rate": 2.2162549230218407e-06, "loss": 0.0, "step": 12430 }, { "epoch": 0.8011213507765677, "grad_norm": 0.031507976952465794, "learning_rate": 2.2155388471177945e-06, "loss": 0.0001, "step": 12431 }, { "epoch": 0.8011857962234968, "grad_norm": 2.666919954509735, "learning_rate": 2.214822771213749e-06, "loss": 0.0356, "step": 12432 }, { "epoch": 0.801250241670426, "grad_norm": 0.001449000874599054, "learning_rate": 2.2141066953097027e-06, "loss": 0.0, "step": 12433 }, { "epoch": 0.8013146871173552, "grad_norm": 0.004870778020393193, "learning_rate": 2.2133906194056574e-06, "loss": 0.0, "step": 12434 }, { "epoch": 0.8013791325642843, "grad_norm": 7.898662380784237e-05, "learning_rate": 2.2126745435016113e-06, "loss": 0.0, "step": 12435 }, { "epoch": 0.8014435780112135, "grad_norm": 0.0002028904110712959, "learning_rate": 2.2119584675975656e-06, "loss": 0.0, "step": 12436 }, { "epoch": 0.8015080234581426, "grad_norm": 0.005889722100197126, "learning_rate": 2.2112423916935195e-06, "loss": 0.0, "step": 12437 }, { "epoch": 0.8015724689050718, "grad_norm": 0.0014307871393988698, "learning_rate": 2.2105263157894738e-06, "loss": 0.0, "step": 12438 }, { "epoch": 0.801636914352001, "grad_norm": 0.0011634336621486382, "learning_rate": 2.209810239885428e-06, "loss": 0.0, "step": 12439 }, { "epoch": 0.8017013597989302, "grad_norm": 0.020376505300973437, "learning_rate": 2.2090941639813824e-06, "loss": 0.0, "step": 12440 }, { "epoch": 0.8017658052458594, "grad_norm": 0.003821548746330608, "learning_rate": 2.2083780880773363e-06, "loss": 0.0, "step": 12441 }, { "epoch": 0.8018302506927886, "grad_norm": 0.0014602166162969207, "learning_rate": 2.2076620121732906e-06, "loss": 0.0, "step": 12442 }, { "epoch": 0.8018946961397178, "grad_norm": 0.14062339066538232, "learning_rate": 2.206945936269245e-06, "loss": 0.0018, "step": 12443 }, { "epoch": 0.8019591415866469, "grad_norm": 0.020081862810718113, "learning_rate": 2.2062298603651987e-06, "loss": 0.0, "step": 12444 }, { "epoch": 0.8020235870335761, "grad_norm": 0.13397164659681082, "learning_rate": 2.205513784461153e-06, "loss": 0.0003, "step": 12445 }, { "epoch": 0.8020880324805052, "grad_norm": 0.0022000253668111184, "learning_rate": 2.2047977085571073e-06, "loss": 0.0, "step": 12446 }, { "epoch": 0.8021524779274344, "grad_norm": 0.006739471661244481, "learning_rate": 2.2040816326530616e-06, "loss": 0.0, "step": 12447 }, { "epoch": 0.8022169233743636, "grad_norm": 0.007373562634504472, "learning_rate": 2.2033655567490155e-06, "loss": 0.0, "step": 12448 }, { "epoch": 0.8022813688212928, "grad_norm": 0.056199122722132976, "learning_rate": 2.20264948084497e-06, "loss": 0.0001, "step": 12449 }, { "epoch": 0.8023458142682219, "grad_norm": 0.16691897834307712, "learning_rate": 2.2019334049409237e-06, "loss": 0.002, "step": 12450 }, { "epoch": 0.8024102597151511, "grad_norm": 0.00024640163647747016, "learning_rate": 2.201217329036878e-06, "loss": 0.0, "step": 12451 }, { "epoch": 0.8024747051620803, "grad_norm": 0.013440612014857863, "learning_rate": 2.2005012531328323e-06, "loss": 0.0, "step": 12452 }, { "epoch": 0.8025391506090095, "grad_norm": 0.006342392524851095, "learning_rate": 2.1997851772287866e-06, "loss": 0.0, "step": 12453 }, { "epoch": 0.8026035960559387, "grad_norm": 0.00017604408165197298, "learning_rate": 2.1990691013247404e-06, "loss": 0.0, "step": 12454 }, { "epoch": 0.8026680415028679, "grad_norm": 0.0022245882873562933, "learning_rate": 2.1983530254206947e-06, "loss": 0.0, "step": 12455 }, { "epoch": 0.802732486949797, "grad_norm": 5.712391774772362e-05, "learning_rate": 2.197636949516649e-06, "loss": 0.0, "step": 12456 }, { "epoch": 0.8027969323967262, "grad_norm": 0.0006953470399971264, "learning_rate": 2.196920873612603e-06, "loss": 0.0, "step": 12457 }, { "epoch": 0.8028613778436553, "grad_norm": 0.0016720927108341782, "learning_rate": 2.1962047977085572e-06, "loss": 0.0, "step": 12458 }, { "epoch": 0.8029258232905845, "grad_norm": 0.00015475809128655122, "learning_rate": 2.1954887218045115e-06, "loss": 0.0, "step": 12459 }, { "epoch": 0.8029902687375137, "grad_norm": 0.0013262927475274097, "learning_rate": 2.194772645900466e-06, "loss": 0.0, "step": 12460 }, { "epoch": 0.8030547141844429, "grad_norm": 0.0011045115282342043, "learning_rate": 2.1940565699964197e-06, "loss": 0.0, "step": 12461 }, { "epoch": 0.803119159631372, "grad_norm": 0.0021931957013456202, "learning_rate": 2.193340494092374e-06, "loss": 0.0, "step": 12462 }, { "epoch": 0.8031836050783012, "grad_norm": 5.5322073605114676e-05, "learning_rate": 2.1926244181883283e-06, "loss": 0.0, "step": 12463 }, { "epoch": 0.8032480505252304, "grad_norm": 0.18646149330729211, "learning_rate": 2.1919083422842826e-06, "loss": 0.0006, "step": 12464 }, { "epoch": 0.8033124959721596, "grad_norm": 0.00282455793739817, "learning_rate": 2.1911922663802365e-06, "loss": 0.0, "step": 12465 }, { "epoch": 0.8033769414190888, "grad_norm": 0.004386932538795974, "learning_rate": 2.1904761904761908e-06, "loss": 0.0, "step": 12466 }, { "epoch": 0.803441386866018, "grad_norm": 1.3727636621027356e-05, "learning_rate": 2.1897601145721446e-06, "loss": 0.0, "step": 12467 }, { "epoch": 0.8035058323129471, "grad_norm": 0.00011849457349212985, "learning_rate": 2.189044038668099e-06, "loss": 0.0, "step": 12468 }, { "epoch": 0.8035702777598762, "grad_norm": 0.0011204254283753746, "learning_rate": 2.1883279627640532e-06, "loss": 0.0, "step": 12469 }, { "epoch": 0.8036347232068054, "grad_norm": 0.005514206749045671, "learning_rate": 2.1876118868600075e-06, "loss": 0.0, "step": 12470 }, { "epoch": 0.8036991686537346, "grad_norm": 0.002547410752228369, "learning_rate": 2.1868958109559614e-06, "loss": 0.0, "step": 12471 }, { "epoch": 0.8037636141006638, "grad_norm": 0.004749226856846408, "learning_rate": 2.1861797350519157e-06, "loss": 0.0, "step": 12472 }, { "epoch": 0.803828059547593, "grad_norm": 0.008761383470593858, "learning_rate": 2.1854636591478696e-06, "loss": 0.0, "step": 12473 }, { "epoch": 0.8038925049945221, "grad_norm": 0.0002561698488996513, "learning_rate": 2.184747583243824e-06, "loss": 0.0, "step": 12474 }, { "epoch": 0.8039569504414513, "grad_norm": 0.004750373251371964, "learning_rate": 2.184031507339778e-06, "loss": 0.0, "step": 12475 }, { "epoch": 0.8040213958883805, "grad_norm": 0.00010574556100120908, "learning_rate": 2.1833154314357325e-06, "loss": 0.0, "step": 12476 }, { "epoch": 0.8040858413353097, "grad_norm": 0.312174436736941, "learning_rate": 2.1825993555316864e-06, "loss": 0.0056, "step": 12477 }, { "epoch": 0.8041502867822389, "grad_norm": 0.023472666434982383, "learning_rate": 2.1818832796276407e-06, "loss": 0.0002, "step": 12478 }, { "epoch": 0.8042147322291681, "grad_norm": 0.0014214595665206025, "learning_rate": 2.181167203723595e-06, "loss": 0.0, "step": 12479 }, { "epoch": 0.8042791776760971, "grad_norm": 0.004601320748266051, "learning_rate": 2.180451127819549e-06, "loss": 0.0, "step": 12480 }, { "epoch": 0.8043436231230263, "grad_norm": 0.0004118759793522063, "learning_rate": 2.179735051915503e-06, "loss": 0.0, "step": 12481 }, { "epoch": 0.8044080685699555, "grad_norm": 0.007549649471261577, "learning_rate": 2.1790189760114574e-06, "loss": 0.0001, "step": 12482 }, { "epoch": 0.8044725140168847, "grad_norm": 0.0036913314331920907, "learning_rate": 2.1783029001074117e-06, "loss": 0.0, "step": 12483 }, { "epoch": 0.8045369594638139, "grad_norm": 0.2587022743807844, "learning_rate": 2.1775868242033656e-06, "loss": 0.0009, "step": 12484 }, { "epoch": 0.8046014049107431, "grad_norm": 0.00025450952995651784, "learning_rate": 2.17687074829932e-06, "loss": 0.0, "step": 12485 }, { "epoch": 0.8046658503576722, "grad_norm": 0.7104865939347345, "learning_rate": 2.1761546723952738e-06, "loss": 0.002, "step": 12486 }, { "epoch": 0.8047302958046014, "grad_norm": 0.004899011875170007, "learning_rate": 2.1754385964912285e-06, "loss": 0.0, "step": 12487 }, { "epoch": 0.8047947412515306, "grad_norm": 0.0022642938689466183, "learning_rate": 2.1747225205871824e-06, "loss": 0.0, "step": 12488 }, { "epoch": 0.8048591866984598, "grad_norm": 0.0004189518177420199, "learning_rate": 2.1740064446831367e-06, "loss": 0.0, "step": 12489 }, { "epoch": 0.804923632145389, "grad_norm": 2.2536943495337073e-05, "learning_rate": 2.1732903687790905e-06, "loss": 0.0, "step": 12490 }, { "epoch": 0.804988077592318, "grad_norm": 0.0035356629953806427, "learning_rate": 2.172574292875045e-06, "loss": 0.0, "step": 12491 }, { "epoch": 0.8050525230392472, "grad_norm": 0.4910174343224499, "learning_rate": 2.171858216970999e-06, "loss": 0.001, "step": 12492 }, { "epoch": 0.8051169684861764, "grad_norm": 0.0033785242190478037, "learning_rate": 2.1711421410669534e-06, "loss": 0.0, "step": 12493 }, { "epoch": 0.8051814139331056, "grad_norm": 0.00046202312693729595, "learning_rate": 2.1704260651629073e-06, "loss": 0.0, "step": 12494 }, { "epoch": 0.8052458593800348, "grad_norm": 0.0003226347943040989, "learning_rate": 2.1697099892588616e-06, "loss": 0.0, "step": 12495 }, { "epoch": 0.805310304826964, "grad_norm": 0.00031251158052698306, "learning_rate": 2.168993913354816e-06, "loss": 0.0, "step": 12496 }, { "epoch": 0.8053747502738932, "grad_norm": 0.023313700776633278, "learning_rate": 2.1682778374507698e-06, "loss": 0.0, "step": 12497 }, { "epoch": 0.8054391957208223, "grad_norm": 0.01404507452701196, "learning_rate": 2.167561761546724e-06, "loss": 0.0, "step": 12498 }, { "epoch": 0.8055036411677515, "grad_norm": 0.00015273987081015428, "learning_rate": 2.1668456856426784e-06, "loss": 0.0, "step": 12499 }, { "epoch": 0.8055680866146807, "grad_norm": 0.002063542098063826, "learning_rate": 2.1661296097386327e-06, "loss": 0.0, "step": 12500 }, { "epoch": 0.8056325320616099, "grad_norm": 0.0011278421061533598, "learning_rate": 2.1654135338345866e-06, "loss": 0.0, "step": 12501 }, { "epoch": 0.805696977508539, "grad_norm": 0.006941577005496535, "learning_rate": 2.164697457930541e-06, "loss": 0.0, "step": 12502 }, { "epoch": 0.8057614229554682, "grad_norm": 0.0012370293237017856, "learning_rate": 2.1639813820264947e-06, "loss": 0.0, "step": 12503 }, { "epoch": 0.8058258684023973, "grad_norm": 0.017707468873228566, "learning_rate": 2.1632653061224495e-06, "loss": 0.0002, "step": 12504 }, { "epoch": 0.8058903138493265, "grad_norm": 0.00018853056554883304, "learning_rate": 2.1625492302184033e-06, "loss": 0.0, "step": 12505 }, { "epoch": 0.8059547592962557, "grad_norm": 0.004347717010727028, "learning_rate": 2.1618331543143576e-06, "loss": 0.0, "step": 12506 }, { "epoch": 0.8060192047431849, "grad_norm": 0.0013038376412325182, "learning_rate": 2.1611170784103115e-06, "loss": 0.0, "step": 12507 }, { "epoch": 0.8060836501901141, "grad_norm": 0.09628063265594201, "learning_rate": 2.160401002506266e-06, "loss": 0.0002, "step": 12508 }, { "epoch": 0.8061480956370433, "grad_norm": 0.0007463655062723487, "learning_rate": 2.15968492660222e-06, "loss": 0.0, "step": 12509 }, { "epoch": 0.8062125410839724, "grad_norm": 0.00048655696128334684, "learning_rate": 2.1589688506981744e-06, "loss": 0.0, "step": 12510 }, { "epoch": 0.8062769865309016, "grad_norm": 0.005872971745491133, "learning_rate": 2.1582527747941283e-06, "loss": 0.0, "step": 12511 }, { "epoch": 0.8063414319778308, "grad_norm": 0.002717922444481002, "learning_rate": 2.1575366988900826e-06, "loss": 0.0, "step": 12512 }, { "epoch": 0.8064058774247599, "grad_norm": 0.014760318307408935, "learning_rate": 2.156820622986037e-06, "loss": 0.0, "step": 12513 }, { "epoch": 0.8064703228716891, "grad_norm": 7.437945918253065e-05, "learning_rate": 2.1561045470819907e-06, "loss": 0.0, "step": 12514 }, { "epoch": 0.8065347683186183, "grad_norm": 0.0002872171139769259, "learning_rate": 2.155388471177945e-06, "loss": 0.0, "step": 12515 }, { "epoch": 0.8065992137655474, "grad_norm": 0.4398679174420515, "learning_rate": 2.1546723952738993e-06, "loss": 0.0032, "step": 12516 }, { "epoch": 0.8066636592124766, "grad_norm": 0.0012609922360681577, "learning_rate": 2.1539563193698536e-06, "loss": 0.0, "step": 12517 }, { "epoch": 0.8067281046594058, "grad_norm": 0.00022379636048257173, "learning_rate": 2.1532402434658075e-06, "loss": 0.0, "step": 12518 }, { "epoch": 0.806792550106335, "grad_norm": 0.0017465105291618174, "learning_rate": 2.152524167561762e-06, "loss": 0.0, "step": 12519 }, { "epoch": 0.8068569955532642, "grad_norm": 0.00038133249943826756, "learning_rate": 2.1518080916577157e-06, "loss": 0.0, "step": 12520 }, { "epoch": 0.8069214410001934, "grad_norm": 0.00132960592924005, "learning_rate": 2.15109201575367e-06, "loss": 0.0, "step": 12521 }, { "epoch": 0.8069858864471225, "grad_norm": 0.017596323143278837, "learning_rate": 2.1503759398496243e-06, "loss": 0.0, "step": 12522 }, { "epoch": 0.8070503318940517, "grad_norm": 0.002651803633894323, "learning_rate": 2.1496598639455786e-06, "loss": 0.0, "step": 12523 }, { "epoch": 0.8071147773409808, "grad_norm": 0.0018630916251092704, "learning_rate": 2.1489437880415325e-06, "loss": 0.0, "step": 12524 }, { "epoch": 0.80717922278791, "grad_norm": 0.00027101958664413014, "learning_rate": 2.1482277121374868e-06, "loss": 0.0, "step": 12525 }, { "epoch": 0.8072436682348392, "grad_norm": 0.014118955231454783, "learning_rate": 2.1475116362334406e-06, "loss": 0.0, "step": 12526 }, { "epoch": 0.8073081136817684, "grad_norm": 0.0004878170706532088, "learning_rate": 2.146795560329395e-06, "loss": 0.0, "step": 12527 }, { "epoch": 0.8073725591286975, "grad_norm": 0.7035151141142062, "learning_rate": 2.1460794844253492e-06, "loss": 0.0207, "step": 12528 }, { "epoch": 0.8074370045756267, "grad_norm": 0.00014633861646761186, "learning_rate": 2.1453634085213035e-06, "loss": 0.0, "step": 12529 }, { "epoch": 0.8075014500225559, "grad_norm": 0.0007063263377159374, "learning_rate": 2.1446473326172574e-06, "loss": 0.0, "step": 12530 }, { "epoch": 0.8075658954694851, "grad_norm": 0.031044198535053354, "learning_rate": 2.1439312567132117e-06, "loss": 0.0002, "step": 12531 }, { "epoch": 0.8076303409164143, "grad_norm": 0.1859323512895166, "learning_rate": 2.143215180809166e-06, "loss": 0.0007, "step": 12532 }, { "epoch": 0.8076947863633435, "grad_norm": 0.003519137570280147, "learning_rate": 2.1424991049051203e-06, "loss": 0.0, "step": 12533 }, { "epoch": 0.8077592318102726, "grad_norm": 0.007856042322162549, "learning_rate": 2.141783029001074e-06, "loss": 0.0, "step": 12534 }, { "epoch": 0.8078236772572018, "grad_norm": 0.00011564863298519748, "learning_rate": 2.1410669530970285e-06, "loss": 0.0, "step": 12535 }, { "epoch": 0.8078881227041309, "grad_norm": 0.006344745271119972, "learning_rate": 2.1403508771929828e-06, "loss": 0.0, "step": 12536 }, { "epoch": 0.8079525681510601, "grad_norm": 0.033851168725958655, "learning_rate": 2.1396348012889367e-06, "loss": 0.0001, "step": 12537 }, { "epoch": 0.8080170135979893, "grad_norm": 0.00015541963414884185, "learning_rate": 2.138918725384891e-06, "loss": 0.0, "step": 12538 }, { "epoch": 0.8080814590449185, "grad_norm": 0.007355932033656503, "learning_rate": 2.1382026494808452e-06, "loss": 0.0, "step": 12539 }, { "epoch": 0.8081459044918476, "grad_norm": 0.0014587984007397436, "learning_rate": 2.1374865735767995e-06, "loss": 0.0, "step": 12540 }, { "epoch": 0.8082103499387768, "grad_norm": 0.3055763484401704, "learning_rate": 2.1367704976727534e-06, "loss": 0.0009, "step": 12541 }, { "epoch": 0.808274795385706, "grad_norm": 0.015262446442021883, "learning_rate": 2.1360544217687077e-06, "loss": 0.0, "step": 12542 }, { "epoch": 0.8083392408326352, "grad_norm": 0.007324747722120583, "learning_rate": 2.1353383458646616e-06, "loss": 0.0001, "step": 12543 }, { "epoch": 0.8084036862795644, "grad_norm": 0.03268571168614651, "learning_rate": 2.134622269960616e-06, "loss": 0.0003, "step": 12544 }, { "epoch": 0.8084681317264936, "grad_norm": 0.002548795640975255, "learning_rate": 2.13390619405657e-06, "loss": 0.0, "step": 12545 }, { "epoch": 0.8085325771734228, "grad_norm": 0.0005647212933699163, "learning_rate": 2.1331901181525245e-06, "loss": 0.0, "step": 12546 }, { "epoch": 0.8085970226203518, "grad_norm": 0.011028547755694413, "learning_rate": 2.1324740422484784e-06, "loss": 0.0001, "step": 12547 }, { "epoch": 0.808661468067281, "grad_norm": 0.0016098602253972305, "learning_rate": 2.1317579663444327e-06, "loss": 0.0, "step": 12548 }, { "epoch": 0.8087259135142102, "grad_norm": 0.002141363180888285, "learning_rate": 2.131041890440387e-06, "loss": 0.0, "step": 12549 }, { "epoch": 0.8087903589611394, "grad_norm": 0.009605867876297334, "learning_rate": 2.130325814536341e-06, "loss": 0.0, "step": 12550 }, { "epoch": 0.8088548044080686, "grad_norm": 0.012447253129473378, "learning_rate": 2.129609738632295e-06, "loss": 0.0, "step": 12551 }, { "epoch": 0.8089192498549977, "grad_norm": 0.0005868374143533707, "learning_rate": 2.1288936627282494e-06, "loss": 0.0, "step": 12552 }, { "epoch": 0.8089836953019269, "grad_norm": 0.005273947446889605, "learning_rate": 2.1281775868242037e-06, "loss": 0.0, "step": 12553 }, { "epoch": 0.8090481407488561, "grad_norm": 0.0002057675618183742, "learning_rate": 2.1274615109201576e-06, "loss": 0.0, "step": 12554 }, { "epoch": 0.8091125861957853, "grad_norm": 0.00044356842756500183, "learning_rate": 2.126745435016112e-06, "loss": 0.0, "step": 12555 }, { "epoch": 0.8091770316427145, "grad_norm": 0.0005960369586189239, "learning_rate": 2.1260293591120658e-06, "loss": 0.0, "step": 12556 }, { "epoch": 0.8092414770896437, "grad_norm": 0.0005960369586189239, "learning_rate": 2.1260293591120658e-06, "loss": 0.0065, "step": 12557 }, { "epoch": 0.8093059225365727, "grad_norm": 0.0009959009747822732, "learning_rate": 2.1253132832080205e-06, "loss": 0.0, "step": 12558 }, { "epoch": 0.8093703679835019, "grad_norm": 0.001079138957866863, "learning_rate": 2.1245972073039744e-06, "loss": 0.0, "step": 12559 }, { "epoch": 0.8094348134304311, "grad_norm": 0.0001700052524290455, "learning_rate": 2.1238811313999287e-06, "loss": 0.0, "step": 12560 }, { "epoch": 0.8094992588773603, "grad_norm": 0.3267133866120286, "learning_rate": 2.1231650554958826e-06, "loss": 0.0021, "step": 12561 }, { "epoch": 0.8095637043242895, "grad_norm": 0.026077396622425923, "learning_rate": 2.122448979591837e-06, "loss": 0.0015, "step": 12562 }, { "epoch": 0.8096281497712187, "grad_norm": 0.004266359877687984, "learning_rate": 2.121732903687791e-06, "loss": 0.0, "step": 12563 }, { "epoch": 0.8096925952181478, "grad_norm": 0.0020029363056970465, "learning_rate": 2.1210168277837455e-06, "loss": 0.0, "step": 12564 }, { "epoch": 0.809757040665077, "grad_norm": 0.0022158301051287744, "learning_rate": 2.1203007518796993e-06, "loss": 0.0, "step": 12565 }, { "epoch": 0.8098214861120062, "grad_norm": 0.02784966132114145, "learning_rate": 2.1195846759756536e-06, "loss": 0.0, "step": 12566 }, { "epoch": 0.8098859315589354, "grad_norm": 0.003321398332227599, "learning_rate": 2.118868600071608e-06, "loss": 0.0, "step": 12567 }, { "epoch": 0.8099503770058646, "grad_norm": 0.0006111349772194386, "learning_rate": 2.118152524167562e-06, "loss": 0.0, "step": 12568 }, { "epoch": 0.8100148224527937, "grad_norm": 8.239420264536209e-05, "learning_rate": 2.117436448263516e-06, "loss": 0.0, "step": 12569 }, { "epoch": 0.8100792678997228, "grad_norm": 0.011744492043335403, "learning_rate": 2.1167203723594704e-06, "loss": 0.0001, "step": 12570 }, { "epoch": 0.810143713346652, "grad_norm": 0.00613206029830229, "learning_rate": 2.1160042964554247e-06, "loss": 0.0, "step": 12571 }, { "epoch": 0.8102081587935812, "grad_norm": 0.0308008172561735, "learning_rate": 2.1152882205513786e-06, "loss": 0.0003, "step": 12572 }, { "epoch": 0.8102726042405104, "grad_norm": 0.010741848493233576, "learning_rate": 2.114572144647333e-06, "loss": 0.0, "step": 12573 }, { "epoch": 0.8103370496874396, "grad_norm": 0.0015504692910859963, "learning_rate": 2.1138560687432867e-06, "loss": 0.0, "step": 12574 }, { "epoch": 0.8104014951343688, "grad_norm": 0.006183430411555323, "learning_rate": 2.1131399928392415e-06, "loss": 0.0, "step": 12575 }, { "epoch": 0.810465940581298, "grad_norm": 0.00024011503847110936, "learning_rate": 2.1124239169351953e-06, "loss": 0.0, "step": 12576 }, { "epoch": 0.8105303860282271, "grad_norm": 0.0013300068832935133, "learning_rate": 2.1117078410311496e-06, "loss": 0.0, "step": 12577 }, { "epoch": 0.8105948314751563, "grad_norm": 0.0001257257657714678, "learning_rate": 2.1109917651271035e-06, "loss": 0.0, "step": 12578 }, { "epoch": 0.8106592769220855, "grad_norm": 0.0012701698727820548, "learning_rate": 2.110275689223058e-06, "loss": 0.0, "step": 12579 }, { "epoch": 0.8107237223690146, "grad_norm": 0.002781898222436216, "learning_rate": 2.1095596133190117e-06, "loss": 0.0, "step": 12580 }, { "epoch": 0.8107881678159438, "grad_norm": 0.008623381992397053, "learning_rate": 2.1088435374149664e-06, "loss": 0.0, "step": 12581 }, { "epoch": 0.810852613262873, "grad_norm": 0.0035804817932016133, "learning_rate": 2.1081274615109203e-06, "loss": 0.0, "step": 12582 }, { "epoch": 0.8109170587098021, "grad_norm": 0.0004178044112188488, "learning_rate": 2.1074113856068746e-06, "loss": 0.0, "step": 12583 }, { "epoch": 0.8109815041567313, "grad_norm": 0.20006863377531295, "learning_rate": 2.1066953097028285e-06, "loss": 0.0039, "step": 12584 }, { "epoch": 0.8110459496036605, "grad_norm": 0.023746772059720624, "learning_rate": 2.1059792337987828e-06, "loss": 0.0, "step": 12585 }, { "epoch": 0.8111103950505897, "grad_norm": 0.00017902644761458916, "learning_rate": 2.105263157894737e-06, "loss": 0.0, "step": 12586 }, { "epoch": 0.8111748404975189, "grad_norm": 0.0015365147211049636, "learning_rate": 2.1045470819906914e-06, "loss": 0.0, "step": 12587 }, { "epoch": 0.811239285944448, "grad_norm": 0.03381551467867641, "learning_rate": 2.1038310060866452e-06, "loss": 0.0002, "step": 12588 }, { "epoch": 0.8113037313913772, "grad_norm": 0.01445457605861845, "learning_rate": 2.1031149301825995e-06, "loss": 0.0001, "step": 12589 }, { "epoch": 0.8113681768383064, "grad_norm": 0.0031470428802809754, "learning_rate": 2.102398854278554e-06, "loss": 0.0, "step": 12590 }, { "epoch": 0.8114326222852355, "grad_norm": 0.47407611668946104, "learning_rate": 2.1016827783745077e-06, "loss": 0.0027, "step": 12591 }, { "epoch": 0.8114970677321647, "grad_norm": 0.001997918103711385, "learning_rate": 2.100966702470462e-06, "loss": 0.0, "step": 12592 }, { "epoch": 0.8115615131790939, "grad_norm": 0.00022226182021700908, "learning_rate": 2.1002506265664163e-06, "loss": 0.0, "step": 12593 }, { "epoch": 0.811625958626023, "grad_norm": 0.005495734573647927, "learning_rate": 2.0995345506623706e-06, "loss": 0.0, "step": 12594 }, { "epoch": 0.8116904040729522, "grad_norm": 0.36579584629506817, "learning_rate": 2.0988184747583245e-06, "loss": 0.001, "step": 12595 }, { "epoch": 0.8117548495198814, "grad_norm": 0.0017201286993632876, "learning_rate": 2.0981023988542788e-06, "loss": 0.0, "step": 12596 }, { "epoch": 0.8118192949668106, "grad_norm": 0.0012654749260697195, "learning_rate": 2.0973863229502327e-06, "loss": 0.0, "step": 12597 }, { "epoch": 0.8118837404137398, "grad_norm": 6.77314804816197e-05, "learning_rate": 2.096670247046187e-06, "loss": 0.0, "step": 12598 }, { "epoch": 0.811948185860669, "grad_norm": 0.12170673011688415, "learning_rate": 2.0959541711421412e-06, "loss": 0.0003, "step": 12599 }, { "epoch": 0.8120126313075982, "grad_norm": 0.00039734094272957175, "learning_rate": 2.0952380952380955e-06, "loss": 0.0, "step": 12600 }, { "epoch": 0.8120770767545273, "grad_norm": 0.0040080291412414185, "learning_rate": 2.0945220193340494e-06, "loss": 0.0, "step": 12601 }, { "epoch": 0.8121415222014564, "grad_norm": 0.011901667177716067, "learning_rate": 2.0938059434300037e-06, "loss": 0.0, "step": 12602 }, { "epoch": 0.8122059676483856, "grad_norm": 0.27331548420750906, "learning_rate": 2.093089867525958e-06, "loss": 0.0013, "step": 12603 }, { "epoch": 0.8122704130953148, "grad_norm": 0.004315035369221101, "learning_rate": 2.0923737916219123e-06, "loss": 0.0, "step": 12604 }, { "epoch": 0.812334858542244, "grad_norm": 1.478788977076481e-05, "learning_rate": 2.091657715717866e-06, "loss": 0.0, "step": 12605 }, { "epoch": 0.8123993039891731, "grad_norm": 0.018809907453625072, "learning_rate": 2.0909416398138205e-06, "loss": 0.0001, "step": 12606 }, { "epoch": 0.8124637494361023, "grad_norm": 0.0015582649472403046, "learning_rate": 2.090225563909775e-06, "loss": 0.0, "step": 12607 }, { "epoch": 0.8125281948830315, "grad_norm": 0.000712515429465975, "learning_rate": 2.0895094880057287e-06, "loss": 0.0, "step": 12608 }, { "epoch": 0.8125926403299607, "grad_norm": 0.00017508893570977122, "learning_rate": 2.088793412101683e-06, "loss": 0.0, "step": 12609 }, { "epoch": 0.8126570857768899, "grad_norm": 0.0010479845875821226, "learning_rate": 2.0880773361976373e-06, "loss": 0.0, "step": 12610 }, { "epoch": 0.8127215312238191, "grad_norm": 0.001556306928741489, "learning_rate": 2.0873612602935916e-06, "loss": 0.0, "step": 12611 }, { "epoch": 0.8127859766707483, "grad_norm": 0.006236001901142749, "learning_rate": 2.0866451843895454e-06, "loss": 0.0001, "step": 12612 }, { "epoch": 0.8128504221176774, "grad_norm": 0.0030114578149282394, "learning_rate": 2.0859291084854997e-06, "loss": 0.0, "step": 12613 }, { "epoch": 0.8129148675646065, "grad_norm": 0.011364972960464367, "learning_rate": 2.0852130325814536e-06, "loss": 0.0, "step": 12614 }, { "epoch": 0.8129793130115357, "grad_norm": 0.005280251572317013, "learning_rate": 2.084496956677408e-06, "loss": 0.0, "step": 12615 }, { "epoch": 0.8130437584584649, "grad_norm": 8.507760533251935e-05, "learning_rate": 2.083780880773362e-06, "loss": 0.0, "step": 12616 }, { "epoch": 0.8131082039053941, "grad_norm": 0.00038017094694071246, "learning_rate": 2.0830648048693165e-06, "loss": 0.0, "step": 12617 }, { "epoch": 0.8131726493523233, "grad_norm": 0.2922925010749905, "learning_rate": 2.0823487289652704e-06, "loss": 0.0007, "step": 12618 }, { "epoch": 0.8132370947992524, "grad_norm": 0.23094773223415585, "learning_rate": 2.0816326530612247e-06, "loss": 0.0004, "step": 12619 }, { "epoch": 0.8133015402461816, "grad_norm": 0.5233092933901883, "learning_rate": 2.0809165771571786e-06, "loss": 0.001, "step": 12620 }, { "epoch": 0.8133659856931108, "grad_norm": 0.00033577295685188345, "learning_rate": 2.080200501253133e-06, "loss": 0.0, "step": 12621 }, { "epoch": 0.81343043114004, "grad_norm": 0.03787885973590073, "learning_rate": 2.079484425349087e-06, "loss": 0.0001, "step": 12622 }, { "epoch": 0.8134948765869692, "grad_norm": 0.00023672040014827814, "learning_rate": 2.0787683494450415e-06, "loss": 0.0, "step": 12623 }, { "epoch": 0.8135593220338984, "grad_norm": 0.0004785405222872389, "learning_rate": 2.0780522735409953e-06, "loss": 0.0, "step": 12624 }, { "epoch": 0.8136237674808274, "grad_norm": 0.002556479343831401, "learning_rate": 2.0773361976369496e-06, "loss": 0.0, "step": 12625 }, { "epoch": 0.8136882129277566, "grad_norm": 0.071685791015625, "learning_rate": 2.076620121732904e-06, "loss": 0.0001, "step": 12626 }, { "epoch": 0.8137526583746858, "grad_norm": 0.00015047715241365775, "learning_rate": 2.075904045828858e-06, "loss": 0.0, "step": 12627 }, { "epoch": 0.813817103821615, "grad_norm": 0.039498544859528056, "learning_rate": 2.075187969924812e-06, "loss": 0.0001, "step": 12628 }, { "epoch": 0.8138815492685442, "grad_norm": 0.007269135108434769, "learning_rate": 2.0744718940207664e-06, "loss": 0.0, "step": 12629 }, { "epoch": 0.8139459947154734, "grad_norm": 0.00021724833356658388, "learning_rate": 2.0737558181167207e-06, "loss": 0.0, "step": 12630 }, { "epoch": 0.8140104401624025, "grad_norm": 0.15544795779068946, "learning_rate": 2.0730397422126746e-06, "loss": 0.0005, "step": 12631 }, { "epoch": 0.8140748856093317, "grad_norm": 0.007632494535068704, "learning_rate": 2.072323666308629e-06, "loss": 0.0, "step": 12632 }, { "epoch": 0.8141393310562609, "grad_norm": 0.026181218628717726, "learning_rate": 2.0716075904045827e-06, "loss": 0.0, "step": 12633 }, { "epoch": 0.8142037765031901, "grad_norm": 0.169198243764644, "learning_rate": 2.0708915145005375e-06, "loss": 0.0004, "step": 12634 }, { "epoch": 0.8142682219501193, "grad_norm": 0.02404185888492924, "learning_rate": 2.0701754385964913e-06, "loss": 0.0001, "step": 12635 }, { "epoch": 0.8143326673970483, "grad_norm": 0.003228024572993855, "learning_rate": 2.0694593626924456e-06, "loss": 0.0, "step": 12636 }, { "epoch": 0.8143971128439775, "grad_norm": 0.0023332422248495926, "learning_rate": 2.0687432867883995e-06, "loss": 0.0, "step": 12637 }, { "epoch": 0.8144615582909067, "grad_norm": 5.473414157304134e-05, "learning_rate": 2.068027210884354e-06, "loss": 0.0, "step": 12638 }, { "epoch": 0.8145260037378359, "grad_norm": 0.021708693061578042, "learning_rate": 2.067311134980308e-06, "loss": 0.0, "step": 12639 }, { "epoch": 0.8145904491847651, "grad_norm": 0.45574811805194243, "learning_rate": 2.0665950590762624e-06, "loss": 0.0004, "step": 12640 }, { "epoch": 0.8146548946316943, "grad_norm": 0.0013192928213251146, "learning_rate": 2.0658789831722163e-06, "loss": 0.0, "step": 12641 }, { "epoch": 0.8147193400786235, "grad_norm": 0.11894261211468155, "learning_rate": 2.0651629072681706e-06, "loss": 0.0002, "step": 12642 }, { "epoch": 0.8147837855255526, "grad_norm": 0.14892849528857, "learning_rate": 2.064446831364125e-06, "loss": 0.0002, "step": 12643 }, { "epoch": 0.8148482309724818, "grad_norm": 0.5862908378806496, "learning_rate": 2.0637307554600788e-06, "loss": 0.0043, "step": 12644 }, { "epoch": 0.814912676419411, "grad_norm": 0.00046785723467117026, "learning_rate": 2.063014679556033e-06, "loss": 0.0, "step": 12645 }, { "epoch": 0.8149771218663402, "grad_norm": 0.0002734640965516467, "learning_rate": 2.0622986036519874e-06, "loss": 0.0, "step": 12646 }, { "epoch": 0.8150415673132693, "grad_norm": 0.009720572095862221, "learning_rate": 2.0615825277479417e-06, "loss": 0.0, "step": 12647 }, { "epoch": 0.8151060127601985, "grad_norm": 0.0276897489043289, "learning_rate": 2.0608664518438955e-06, "loss": 0.0, "step": 12648 }, { "epoch": 0.8151704582071276, "grad_norm": 0.12226153845249521, "learning_rate": 2.06015037593985e-06, "loss": 0.0002, "step": 12649 }, { "epoch": 0.8152349036540568, "grad_norm": 0.045159530174020975, "learning_rate": 2.0594343000358037e-06, "loss": 0.0, "step": 12650 }, { "epoch": 0.815299349100986, "grad_norm": 0.0007022768080407791, "learning_rate": 2.0587182241317584e-06, "loss": 0.0, "step": 12651 }, { "epoch": 0.8153637945479152, "grad_norm": 0.023347433204983205, "learning_rate": 2.0580021482277123e-06, "loss": 0.0001, "step": 12652 }, { "epoch": 0.8154282399948444, "grad_norm": 0.00021818342706713008, "learning_rate": 2.0572860723236666e-06, "loss": 0.0, "step": 12653 }, { "epoch": 0.8154926854417736, "grad_norm": 0.19678674301530588, "learning_rate": 2.0565699964196205e-06, "loss": 0.0008, "step": 12654 }, { "epoch": 0.8155571308887027, "grad_norm": 2.9480723597161855, "learning_rate": 2.0558539205155748e-06, "loss": 0.0264, "step": 12655 }, { "epoch": 0.8156215763356319, "grad_norm": 0.000604122829339633, "learning_rate": 2.055137844611529e-06, "loss": 0.0, "step": 12656 }, { "epoch": 0.8156860217825611, "grad_norm": 0.004410635766435355, "learning_rate": 2.0544217687074834e-06, "loss": 0.0, "step": 12657 }, { "epoch": 0.8157504672294902, "grad_norm": 0.007988108148366502, "learning_rate": 2.0537056928034372e-06, "loss": 0.0, "step": 12658 }, { "epoch": 0.8158149126764194, "grad_norm": 0.00025976478303401106, "learning_rate": 2.0529896168993915e-06, "loss": 0.0, "step": 12659 }, { "epoch": 0.8158793581233486, "grad_norm": 0.024995079524881312, "learning_rate": 2.052273540995346e-06, "loss": 0.0, "step": 12660 }, { "epoch": 0.8159438035702777, "grad_norm": 0.041993960354219664, "learning_rate": 2.0515574650912997e-06, "loss": 0.0001, "step": 12661 }, { "epoch": 0.8160082490172069, "grad_norm": 0.0006448330306277122, "learning_rate": 2.050841389187254e-06, "loss": 0.0, "step": 12662 }, { "epoch": 0.8160726944641361, "grad_norm": 0.0007699161659463133, "learning_rate": 2.0501253132832083e-06, "loss": 0.0, "step": 12663 }, { "epoch": 0.8161371399110653, "grad_norm": 0.0001718285197238581, "learning_rate": 2.0494092373791626e-06, "loss": 0.0, "step": 12664 }, { "epoch": 0.8162015853579945, "grad_norm": 0.00014869667986260737, "learning_rate": 2.0486931614751165e-06, "loss": 0.0, "step": 12665 }, { "epoch": 0.8162660308049237, "grad_norm": 0.0001513088943245394, "learning_rate": 2.047977085571071e-06, "loss": 0.0, "step": 12666 }, { "epoch": 0.8163304762518528, "grad_norm": 0.005079574876596566, "learning_rate": 2.0472610096670247e-06, "loss": 0.0, "step": 12667 }, { "epoch": 0.816394921698782, "grad_norm": 0.0012937261291333171, "learning_rate": 2.046544933762979e-06, "loss": 0.0, "step": 12668 }, { "epoch": 0.8164593671457111, "grad_norm": 0.013307215880097614, "learning_rate": 2.0458288578589333e-06, "loss": 0.0, "step": 12669 }, { "epoch": 0.8165238125926403, "grad_norm": 0.0060048266720873255, "learning_rate": 2.0451127819548876e-06, "loss": 0.0, "step": 12670 }, { "epoch": 0.8165882580395695, "grad_norm": 0.00019912951092864882, "learning_rate": 2.0443967060508414e-06, "loss": 0.0, "step": 12671 }, { "epoch": 0.8166527034864987, "grad_norm": 0.00549363525012209, "learning_rate": 2.0436806301467957e-06, "loss": 0.0, "step": 12672 }, { "epoch": 0.8167171489334278, "grad_norm": 0.00042558856689712486, "learning_rate": 2.0429645542427496e-06, "loss": 0.0, "step": 12673 }, { "epoch": 0.816781594380357, "grad_norm": 0.00016062680426746665, "learning_rate": 2.042248478338704e-06, "loss": 0.0, "step": 12674 }, { "epoch": 0.8168460398272862, "grad_norm": 0.08333596551979962, "learning_rate": 2.041532402434658e-06, "loss": 0.0004, "step": 12675 }, { "epoch": 0.8169104852742154, "grad_norm": 0.2981101617579293, "learning_rate": 2.0408163265306125e-06, "loss": 0.0017, "step": 12676 }, { "epoch": 0.8169749307211446, "grad_norm": 0.0010572690640603976, "learning_rate": 2.0401002506265664e-06, "loss": 0.0, "step": 12677 }, { "epoch": 0.8170393761680738, "grad_norm": 0.0012552339913406846, "learning_rate": 2.0393841747225207e-06, "loss": 0.0, "step": 12678 }, { "epoch": 0.817103821615003, "grad_norm": 0.016703592774660905, "learning_rate": 2.038668098818475e-06, "loss": 0.0, "step": 12679 }, { "epoch": 0.8171682670619321, "grad_norm": 0.004141902966402822, "learning_rate": 2.0379520229144293e-06, "loss": 0.0, "step": 12680 }, { "epoch": 0.8172327125088612, "grad_norm": 0.0007111984253445308, "learning_rate": 2.037235947010383e-06, "loss": 0.0, "step": 12681 }, { "epoch": 0.8172971579557904, "grad_norm": 0.0021643333428704877, "learning_rate": 2.0365198711063375e-06, "loss": 0.0, "step": 12682 }, { "epoch": 0.8173616034027196, "grad_norm": 0.009506596409528539, "learning_rate": 2.0358037952022918e-06, "loss": 0.0, "step": 12683 }, { "epoch": 0.8174260488496488, "grad_norm": 0.00015312242409299139, "learning_rate": 2.0350877192982456e-06, "loss": 0.0, "step": 12684 }, { "epoch": 0.8174904942965779, "grad_norm": 0.0015363426845521342, "learning_rate": 2.0343716433942e-06, "loss": 0.0, "step": 12685 }, { "epoch": 0.8175549397435071, "grad_norm": 0.0038774986870014085, "learning_rate": 2.0336555674901542e-06, "loss": 0.0, "step": 12686 }, { "epoch": 0.8176193851904363, "grad_norm": 0.0002841460507729212, "learning_rate": 2.0329394915861085e-06, "loss": 0.0, "step": 12687 }, { "epoch": 0.8176838306373655, "grad_norm": 0.0004165062032192322, "learning_rate": 2.0322234156820624e-06, "loss": 0.0, "step": 12688 }, { "epoch": 0.8177482760842947, "grad_norm": 0.0006068016544899525, "learning_rate": 2.0315073397780167e-06, "loss": 0.0, "step": 12689 }, { "epoch": 0.8178127215312239, "grad_norm": 0.056295926580902295, "learning_rate": 2.0307912638739706e-06, "loss": 0.0002, "step": 12690 }, { "epoch": 0.817877166978153, "grad_norm": 0.0007701938668082081, "learning_rate": 2.030075187969925e-06, "loss": 0.0, "step": 12691 }, { "epoch": 0.8179416124250821, "grad_norm": 0.0004344841076649519, "learning_rate": 2.029359112065879e-06, "loss": 0.0, "step": 12692 }, { "epoch": 0.8180060578720113, "grad_norm": 0.000434818477799362, "learning_rate": 2.0286430361618335e-06, "loss": 0.0, "step": 12693 }, { "epoch": 0.8180705033189405, "grad_norm": 0.0002604699380230901, "learning_rate": 2.0279269602577873e-06, "loss": 0.0, "step": 12694 }, { "epoch": 0.8181349487658697, "grad_norm": 0.003211286809710176, "learning_rate": 2.0272108843537416e-06, "loss": 0.0, "step": 12695 }, { "epoch": 0.8181993942127989, "grad_norm": 0.018087344344049478, "learning_rate": 2.026494808449696e-06, "loss": 0.0001, "step": 12696 }, { "epoch": 0.818263839659728, "grad_norm": 0.02370398735358649, "learning_rate": 2.02577873254565e-06, "loss": 0.0002, "step": 12697 }, { "epoch": 0.8183282851066572, "grad_norm": 0.0011283720401023612, "learning_rate": 2.025062656641604e-06, "loss": 0.0, "step": 12698 }, { "epoch": 0.8183927305535864, "grad_norm": 0.0006159527725816931, "learning_rate": 2.0243465807375584e-06, "loss": 0.0, "step": 12699 }, { "epoch": 0.8184571760005156, "grad_norm": 0.00618308122004743, "learning_rate": 2.0236305048335127e-06, "loss": 0.0, "step": 12700 }, { "epoch": 0.8185216214474448, "grad_norm": 0.0012521473666674897, "learning_rate": 2.0229144289294666e-06, "loss": 0.0, "step": 12701 }, { "epoch": 0.818586066894374, "grad_norm": 0.6123207248370139, "learning_rate": 2.022198353025421e-06, "loss": 0.0015, "step": 12702 }, { "epoch": 0.818650512341303, "grad_norm": 0.002254610635089249, "learning_rate": 2.0214822771213748e-06, "loss": 0.0, "step": 12703 }, { "epoch": 0.8187149577882322, "grad_norm": 0.1917155355543102, "learning_rate": 2.0207662012173295e-06, "loss": 0.0011, "step": 12704 }, { "epoch": 0.8187794032351614, "grad_norm": 0.0010368363220421362, "learning_rate": 2.0200501253132834e-06, "loss": 0.0, "step": 12705 }, { "epoch": 0.8188438486820906, "grad_norm": 0.0002894131131910139, "learning_rate": 2.0193340494092377e-06, "loss": 0.0, "step": 12706 }, { "epoch": 0.8189082941290198, "grad_norm": 0.00039159992150386276, "learning_rate": 2.0186179735051915e-06, "loss": 0.0, "step": 12707 }, { "epoch": 0.818972739575949, "grad_norm": 0.08468321098748963, "learning_rate": 2.017901897601146e-06, "loss": 0.0002, "step": 12708 }, { "epoch": 0.8190371850228781, "grad_norm": 3.6733720040983004, "learning_rate": 2.0171858216971e-06, "loss": 0.0662, "step": 12709 }, { "epoch": 0.8191016304698073, "grad_norm": 0.002083130458600624, "learning_rate": 2.0164697457930544e-06, "loss": 0.0, "step": 12710 }, { "epoch": 0.8191660759167365, "grad_norm": 6.762979958088849e-05, "learning_rate": 2.0157536698890083e-06, "loss": 0.0, "step": 12711 }, { "epoch": 0.8192305213636657, "grad_norm": 0.07021264761194777, "learning_rate": 2.0150375939849626e-06, "loss": 0.0001, "step": 12712 }, { "epoch": 0.8192949668105949, "grad_norm": 8.095978465353855e-05, "learning_rate": 2.014321518080917e-06, "loss": 0.0, "step": 12713 }, { "epoch": 0.819359412257524, "grad_norm": 0.00044268987224894545, "learning_rate": 2.0136054421768708e-06, "loss": 0.0, "step": 12714 }, { "epoch": 0.8194238577044531, "grad_norm": 0.7423377035637856, "learning_rate": 2.012889366272825e-06, "loss": 0.0018, "step": 12715 }, { "epoch": 0.8194883031513823, "grad_norm": 0.012233627670962024, "learning_rate": 2.0121732903687794e-06, "loss": 0.0, "step": 12716 }, { "epoch": 0.8195527485983115, "grad_norm": 0.0039064674577683235, "learning_rate": 2.0114572144647337e-06, "loss": 0.0, "step": 12717 }, { "epoch": 0.8196171940452407, "grad_norm": 6.302641262364943e-05, "learning_rate": 2.0107411385606875e-06, "loss": 0.0, "step": 12718 }, { "epoch": 0.8196816394921699, "grad_norm": 0.0005544111992086107, "learning_rate": 2.010025062656642e-06, "loss": 0.0, "step": 12719 }, { "epoch": 0.8197460849390991, "grad_norm": 0.005699905157546856, "learning_rate": 2.0093089867525957e-06, "loss": 0.0, "step": 12720 }, { "epoch": 0.8198105303860282, "grad_norm": 0.24517601948537127, "learning_rate": 2.0085929108485504e-06, "loss": 0.0037, "step": 12721 }, { "epoch": 0.8198749758329574, "grad_norm": 1.4732163879247422, "learning_rate": 2.0078768349445043e-06, "loss": 0.0111, "step": 12722 }, { "epoch": 0.8199394212798866, "grad_norm": 0.0004949487012933176, "learning_rate": 2.0071607590404586e-06, "loss": 0.0, "step": 12723 }, { "epoch": 0.8200038667268158, "grad_norm": 0.00566935245972329, "learning_rate": 2.0064446831364125e-06, "loss": 0.0, "step": 12724 }, { "epoch": 0.8200683121737449, "grad_norm": 0.000825286660599744, "learning_rate": 2.005728607232367e-06, "loss": 0.0, "step": 12725 }, { "epoch": 0.8201327576206741, "grad_norm": 0.0006255925426041789, "learning_rate": 2.0050125313283207e-06, "loss": 0.0, "step": 12726 }, { "epoch": 0.8201972030676032, "grad_norm": 0.3893231441875631, "learning_rate": 2.0042964554242754e-06, "loss": 0.0011, "step": 12727 }, { "epoch": 0.8202616485145324, "grad_norm": 0.3826723426061182, "learning_rate": 2.0035803795202293e-06, "loss": 0.0002, "step": 12728 }, { "epoch": 0.8203260939614616, "grad_norm": 0.002656368265336166, "learning_rate": 2.0028643036161836e-06, "loss": 0.0, "step": 12729 }, { "epoch": 0.8203905394083908, "grad_norm": 0.0021246091607805234, "learning_rate": 2.0021482277121374e-06, "loss": 0.0, "step": 12730 }, { "epoch": 0.82045498485532, "grad_norm": 0.09306680573160549, "learning_rate": 2.0014321518080917e-06, "loss": 0.0002, "step": 12731 }, { "epoch": 0.8205194303022492, "grad_norm": 0.0013687062223432345, "learning_rate": 2.000716075904046e-06, "loss": 0.0, "step": 12732 }, { "epoch": 0.8205838757491783, "grad_norm": 0.002352388538741838, "learning_rate": 2.0000000000000003e-06, "loss": 0.0, "step": 12733 }, { "epoch": 0.8206483211961075, "grad_norm": 0.000925862567441048, "learning_rate": 1.999283924095954e-06, "loss": 0.0, "step": 12734 }, { "epoch": 0.8207127666430367, "grad_norm": 0.0026647197191653005, "learning_rate": 1.9985678481919085e-06, "loss": 0.0, "step": 12735 }, { "epoch": 0.8207772120899658, "grad_norm": 0.0005046259473754929, "learning_rate": 1.997851772287863e-06, "loss": 0.0, "step": 12736 }, { "epoch": 0.820841657536895, "grad_norm": 0.0010993903374720605, "learning_rate": 1.9971356963838167e-06, "loss": 0.0, "step": 12737 }, { "epoch": 0.8209061029838242, "grad_norm": 0.005264727428518476, "learning_rate": 1.996419620479771e-06, "loss": 0.0, "step": 12738 }, { "epoch": 0.8209705484307533, "grad_norm": 0.002761264554517652, "learning_rate": 1.9957035445757253e-06, "loss": 0.0, "step": 12739 }, { "epoch": 0.8210349938776825, "grad_norm": 0.027237537922393484, "learning_rate": 1.9949874686716796e-06, "loss": 0.0, "step": 12740 }, { "epoch": 0.8210994393246117, "grad_norm": 0.00029789924426951506, "learning_rate": 1.9942713927676335e-06, "loss": 0.0, "step": 12741 }, { "epoch": 0.8211638847715409, "grad_norm": 0.0032934977863916964, "learning_rate": 1.9935553168635878e-06, "loss": 0.0, "step": 12742 }, { "epoch": 0.8212283302184701, "grad_norm": 0.05155409225999489, "learning_rate": 1.9928392409595416e-06, "loss": 0.0006, "step": 12743 }, { "epoch": 0.8212927756653993, "grad_norm": 0.03518922239741282, "learning_rate": 1.992123165055496e-06, "loss": 0.0001, "step": 12744 }, { "epoch": 0.8213572211123285, "grad_norm": 0.0006332805434407577, "learning_rate": 1.9914070891514502e-06, "loss": 0.0, "step": 12745 }, { "epoch": 0.8214216665592576, "grad_norm": 0.001911123414774432, "learning_rate": 1.9906910132474045e-06, "loss": 0.0, "step": 12746 }, { "epoch": 0.8214861120061867, "grad_norm": 0.002936392954632133, "learning_rate": 1.9899749373433584e-06, "loss": 0.0, "step": 12747 }, { "epoch": 0.8215505574531159, "grad_norm": 0.004308939923328707, "learning_rate": 1.9892588614393127e-06, "loss": 0.0, "step": 12748 }, { "epoch": 0.8216150029000451, "grad_norm": 0.07829764482853954, "learning_rate": 1.988542785535267e-06, "loss": 0.0001, "step": 12749 }, { "epoch": 0.8216794483469743, "grad_norm": 0.002653395061435763, "learning_rate": 1.9878267096312213e-06, "loss": 0.0, "step": 12750 }, { "epoch": 0.8217438937939034, "grad_norm": 0.0009104643964468059, "learning_rate": 1.987110633727175e-06, "loss": 0.0, "step": 12751 }, { "epoch": 0.8218083392408326, "grad_norm": 0.008782726808995691, "learning_rate": 1.9863945578231295e-06, "loss": 0.0015, "step": 12752 }, { "epoch": 0.8218727846877618, "grad_norm": 0.01460205373826194, "learning_rate": 1.9856784819190838e-06, "loss": 0.0, "step": 12753 }, { "epoch": 0.821937230134691, "grad_norm": 0.4726310597162175, "learning_rate": 1.9849624060150376e-06, "loss": 0.0026, "step": 12754 }, { "epoch": 0.8220016755816202, "grad_norm": 0.04270153103544648, "learning_rate": 1.984246330110992e-06, "loss": 0.0001, "step": 12755 }, { "epoch": 0.8220661210285494, "grad_norm": 0.5393062261715389, "learning_rate": 1.9835302542069462e-06, "loss": 0.0009, "step": 12756 }, { "epoch": 0.8221305664754786, "grad_norm": 2.490178844356341, "learning_rate": 1.9828141783029005e-06, "loss": 0.0009, "step": 12757 }, { "epoch": 0.8221950119224077, "grad_norm": 0.2574040327126001, "learning_rate": 1.9820981023988544e-06, "loss": 0.0014, "step": 12758 }, { "epoch": 0.8222594573693368, "grad_norm": 0.1584653388532432, "learning_rate": 1.9813820264948087e-06, "loss": 0.0004, "step": 12759 }, { "epoch": 0.822323902816266, "grad_norm": 0.04805075777073572, "learning_rate": 1.9806659505907626e-06, "loss": 0.0003, "step": 12760 }, { "epoch": 0.8223883482631952, "grad_norm": 0.0788094696601303, "learning_rate": 1.979949874686717e-06, "loss": 0.0001, "step": 12761 }, { "epoch": 0.8224527937101244, "grad_norm": 0.004995515025960436, "learning_rate": 1.979233798782671e-06, "loss": 0.0001, "step": 12762 }, { "epoch": 0.8225172391570535, "grad_norm": 0.0001973465953139601, "learning_rate": 1.9785177228786255e-06, "loss": 0.0, "step": 12763 }, { "epoch": 0.8225816846039827, "grad_norm": 0.0012884267076477087, "learning_rate": 1.9778016469745794e-06, "loss": 0.0, "step": 12764 }, { "epoch": 0.8226461300509119, "grad_norm": 0.005440834923307024, "learning_rate": 1.9770855710705337e-06, "loss": 0.0, "step": 12765 }, { "epoch": 0.8227105754978411, "grad_norm": 0.293044830616161, "learning_rate": 1.9763694951664875e-06, "loss": 0.0007, "step": 12766 }, { "epoch": 0.8227750209447703, "grad_norm": 0.00020292315307484786, "learning_rate": 1.975653419262442e-06, "loss": 0.0, "step": 12767 }, { "epoch": 0.8228394663916995, "grad_norm": 0.014746585006457157, "learning_rate": 1.974937343358396e-06, "loss": 0.0001, "step": 12768 }, { "epoch": 0.8229039118386287, "grad_norm": 0.0029612802244079247, "learning_rate": 1.9742212674543504e-06, "loss": 0.0, "step": 12769 }, { "epoch": 0.8229683572855577, "grad_norm": 0.0009473144841386316, "learning_rate": 1.9735051915503043e-06, "loss": 0.0, "step": 12770 }, { "epoch": 0.8230328027324869, "grad_norm": 0.0028644580881699412, "learning_rate": 1.9727891156462586e-06, "loss": 0.0, "step": 12771 }, { "epoch": 0.8230972481794161, "grad_norm": 0.016254687805129026, "learning_rate": 1.972073039742213e-06, "loss": 0.0, "step": 12772 }, { "epoch": 0.8231616936263453, "grad_norm": 0.0022775558697995233, "learning_rate": 1.9713569638381668e-06, "loss": 0.0, "step": 12773 }, { "epoch": 0.8232261390732745, "grad_norm": 0.00014820028206541386, "learning_rate": 1.970640887934121e-06, "loss": 0.0, "step": 12774 }, { "epoch": 0.8232905845202036, "grad_norm": 0.11497893146273225, "learning_rate": 1.9699248120300754e-06, "loss": 0.0016, "step": 12775 }, { "epoch": 0.8233550299671328, "grad_norm": 0.02543805923047829, "learning_rate": 1.9692087361260297e-06, "loss": 0.0, "step": 12776 }, { "epoch": 0.823419475414062, "grad_norm": 0.0007624847949835808, "learning_rate": 1.9684926602219835e-06, "loss": 0.0, "step": 12777 }, { "epoch": 0.8234839208609912, "grad_norm": 0.0002072227547720038, "learning_rate": 1.967776584317938e-06, "loss": 0.0, "step": 12778 }, { "epoch": 0.8235483663079204, "grad_norm": 0.0020613742597048756, "learning_rate": 1.9670605084138917e-06, "loss": 0.0, "step": 12779 }, { "epoch": 0.8236128117548496, "grad_norm": 0.07955951346787625, "learning_rate": 1.9663444325098464e-06, "loss": 0.0002, "step": 12780 }, { "epoch": 0.8236772572017786, "grad_norm": 0.0011062340486165578, "learning_rate": 1.9656283566058003e-06, "loss": 0.0, "step": 12781 }, { "epoch": 0.8237417026487078, "grad_norm": 0.0018729547487564492, "learning_rate": 1.9649122807017546e-06, "loss": 0.0, "step": 12782 }, { "epoch": 0.823806148095637, "grad_norm": 1.9180195290861062, "learning_rate": 1.9641962047977085e-06, "loss": 0.0044, "step": 12783 }, { "epoch": 0.8238705935425662, "grad_norm": 0.0004110962381387348, "learning_rate": 1.963480128893663e-06, "loss": 0.0, "step": 12784 }, { "epoch": 0.8239350389894954, "grad_norm": 0.002992485947360091, "learning_rate": 1.962764052989617e-06, "loss": 0.0, "step": 12785 }, { "epoch": 0.8239994844364246, "grad_norm": 0.23977967782600768, "learning_rate": 1.9620479770855714e-06, "loss": 0.0028, "step": 12786 }, { "epoch": 0.8240639298833538, "grad_norm": 0.0001052902595383848, "learning_rate": 1.9613319011815253e-06, "loss": 0.0, "step": 12787 }, { "epoch": 0.8241283753302829, "grad_norm": 0.0009807068807572042, "learning_rate": 1.9606158252774796e-06, "loss": 0.0, "step": 12788 }, { "epoch": 0.8241928207772121, "grad_norm": 0.00017703910016623285, "learning_rate": 1.959899749373434e-06, "loss": 0.0, "step": 12789 }, { "epoch": 0.8242572662241413, "grad_norm": 0.0008779980011984871, "learning_rate": 1.9591836734693877e-06, "loss": 0.0, "step": 12790 }, { "epoch": 0.8243217116710705, "grad_norm": 0.0007441098091688068, "learning_rate": 1.958467597565342e-06, "loss": 0.0, "step": 12791 }, { "epoch": 0.8243861571179996, "grad_norm": 0.0012419554472922133, "learning_rate": 1.9577515216612963e-06, "loss": 0.0, "step": 12792 }, { "epoch": 0.8244506025649287, "grad_norm": 0.0016284702447365287, "learning_rate": 1.9570354457572506e-06, "loss": 0.0, "step": 12793 }, { "epoch": 0.8245150480118579, "grad_norm": 0.0033685588571939636, "learning_rate": 1.9563193698532045e-06, "loss": 0.0, "step": 12794 }, { "epoch": 0.8245794934587871, "grad_norm": 0.0286288462786047, "learning_rate": 1.955603293949159e-06, "loss": 0.0, "step": 12795 }, { "epoch": 0.8246439389057163, "grad_norm": 0.0021863046623147764, "learning_rate": 1.9548872180451127e-06, "loss": 0.0, "step": 12796 }, { "epoch": 0.8247083843526455, "grad_norm": 0.002743261369717264, "learning_rate": 1.9541711421410674e-06, "loss": 0.0, "step": 12797 }, { "epoch": 0.8247728297995747, "grad_norm": 0.030985487605830843, "learning_rate": 1.9534550662370213e-06, "loss": 0.0, "step": 12798 }, { "epoch": 0.8248372752465039, "grad_norm": 0.011039880199293242, "learning_rate": 1.9527389903329756e-06, "loss": 0.0, "step": 12799 }, { "epoch": 0.824901720693433, "grad_norm": 0.0006824560142423226, "learning_rate": 1.9520229144289295e-06, "loss": 0.0, "step": 12800 }, { "epoch": 0.8249661661403622, "grad_norm": 0.010945881791514213, "learning_rate": 1.9513068385248838e-06, "loss": 0.0001, "step": 12801 }, { "epoch": 0.8250306115872914, "grad_norm": 0.00721237801787959, "learning_rate": 1.950590762620838e-06, "loss": 0.0, "step": 12802 }, { "epoch": 0.8250950570342205, "grad_norm": 0.0004965975994171999, "learning_rate": 1.9498746867167923e-06, "loss": 0.0, "step": 12803 }, { "epoch": 0.8251595024811497, "grad_norm": 0.009016010154456617, "learning_rate": 1.9491586108127462e-06, "loss": 0.0, "step": 12804 }, { "epoch": 0.8252239479280788, "grad_norm": 0.0012284352036761618, "learning_rate": 1.9484425349087005e-06, "loss": 0.0, "step": 12805 }, { "epoch": 0.825288393375008, "grad_norm": 0.0028087657996823242, "learning_rate": 1.947726459004655e-06, "loss": 0.0, "step": 12806 }, { "epoch": 0.8253528388219372, "grad_norm": 0.0011335282820437035, "learning_rate": 1.9470103831006087e-06, "loss": 0.0, "step": 12807 }, { "epoch": 0.8254172842688664, "grad_norm": 0.024974543026714955, "learning_rate": 1.946294307196563e-06, "loss": 0.0, "step": 12808 }, { "epoch": 0.8254817297157956, "grad_norm": 0.01026302773500995, "learning_rate": 1.9455782312925173e-06, "loss": 0.0001, "step": 12809 }, { "epoch": 0.8255461751627248, "grad_norm": 0.00013102626201935377, "learning_rate": 1.9448621553884716e-06, "loss": 0.0, "step": 12810 }, { "epoch": 0.825610620609654, "grad_norm": 0.05074874360207065, "learning_rate": 1.9441460794844255e-06, "loss": 0.0002, "step": 12811 }, { "epoch": 0.8256750660565831, "grad_norm": 0.16791835295001642, "learning_rate": 1.9434300035803798e-06, "loss": 0.0002, "step": 12812 }, { "epoch": 0.8257395115035123, "grad_norm": 0.22231346647938868, "learning_rate": 1.9427139276763336e-06, "loss": 0.0009, "step": 12813 }, { "epoch": 0.8258039569504414, "grad_norm": 0.005087052383364399, "learning_rate": 1.941997851772288e-06, "loss": 0.0001, "step": 12814 }, { "epoch": 0.8258684023973706, "grad_norm": 0.29658397413916804, "learning_rate": 1.9412817758682422e-06, "loss": 0.0004, "step": 12815 }, { "epoch": 0.8259328478442998, "grad_norm": 0.00015184382402304126, "learning_rate": 1.9405656999641965e-06, "loss": 0.0, "step": 12816 }, { "epoch": 0.825997293291229, "grad_norm": 0.00011728335842496472, "learning_rate": 1.9398496240601504e-06, "loss": 0.0, "step": 12817 }, { "epoch": 0.8260617387381581, "grad_norm": 0.12153047977766473, "learning_rate": 1.9391335481561047e-06, "loss": 0.0014, "step": 12818 }, { "epoch": 0.8261261841850873, "grad_norm": 0.0024044143837508186, "learning_rate": 1.9384174722520586e-06, "loss": 0.0, "step": 12819 }, { "epoch": 0.8261906296320165, "grad_norm": 0.26979896131740444, "learning_rate": 1.9377013963480133e-06, "loss": 0.0059, "step": 12820 }, { "epoch": 0.8262550750789457, "grad_norm": 0.007687804269723589, "learning_rate": 1.936985320443967e-06, "loss": 0.0, "step": 12821 }, { "epoch": 0.8263195205258749, "grad_norm": 0.001681070516331721, "learning_rate": 1.9362692445399215e-06, "loss": 0.0, "step": 12822 }, { "epoch": 0.8263839659728041, "grad_norm": 0.010557766168112366, "learning_rate": 1.9355531686358754e-06, "loss": 0.0, "step": 12823 }, { "epoch": 0.8264484114197332, "grad_norm": 0.0008512911900243197, "learning_rate": 1.9348370927318297e-06, "loss": 0.0, "step": 12824 }, { "epoch": 0.8265128568666623, "grad_norm": 0.0013926741667203172, "learning_rate": 1.934121016827784e-06, "loss": 0.0, "step": 12825 }, { "epoch": 0.8265773023135915, "grad_norm": 0.0006454232476995218, "learning_rate": 1.9334049409237383e-06, "loss": 0.0, "step": 12826 }, { "epoch": 0.8266417477605207, "grad_norm": 0.007012337352928124, "learning_rate": 1.932688865019692e-06, "loss": 0.0, "step": 12827 }, { "epoch": 0.8267061932074499, "grad_norm": 0.0016290758214399952, "learning_rate": 1.9319727891156464e-06, "loss": 0.0, "step": 12828 }, { "epoch": 0.826770638654379, "grad_norm": 0.0004002776358242824, "learning_rate": 1.9312567132116007e-06, "loss": 0.0, "step": 12829 }, { "epoch": 0.8268350841013082, "grad_norm": 0.0006474920133281832, "learning_rate": 1.9305406373075546e-06, "loss": 0.0, "step": 12830 }, { "epoch": 0.8268995295482374, "grad_norm": 0.0004922729677889804, "learning_rate": 1.929824561403509e-06, "loss": 0.0, "step": 12831 }, { "epoch": 0.8269639749951666, "grad_norm": 0.015237039801407486, "learning_rate": 1.929108485499463e-06, "loss": 0.0, "step": 12832 }, { "epoch": 0.8270284204420958, "grad_norm": 0.001385017502838934, "learning_rate": 1.9283924095954175e-06, "loss": 0.0, "step": 12833 }, { "epoch": 0.827092865889025, "grad_norm": 0.00036743085902484786, "learning_rate": 1.9276763336913714e-06, "loss": 0.0, "step": 12834 }, { "epoch": 0.8271573113359542, "grad_norm": 0.21470143199397795, "learning_rate": 1.9269602577873257e-06, "loss": 0.0052, "step": 12835 }, { "epoch": 0.8272217567828833, "grad_norm": 0.20051220260145272, "learning_rate": 1.9262441818832795e-06, "loss": 0.0009, "step": 12836 }, { "epoch": 0.8272862022298124, "grad_norm": 0.0013799034224737958, "learning_rate": 1.925528105979234e-06, "loss": 0.0, "step": 12837 }, { "epoch": 0.8273506476767416, "grad_norm": 0.0011499711319856818, "learning_rate": 1.924812030075188e-06, "loss": 0.0, "step": 12838 }, { "epoch": 0.8274150931236708, "grad_norm": 0.004180721553757159, "learning_rate": 1.9240959541711424e-06, "loss": 0.0, "step": 12839 }, { "epoch": 0.8274795385706, "grad_norm": 0.00888103511971256, "learning_rate": 1.9233798782670963e-06, "loss": 0.0, "step": 12840 }, { "epoch": 0.8275439840175292, "grad_norm": 0.0004716433398487072, "learning_rate": 1.9226638023630506e-06, "loss": 0.0, "step": 12841 }, { "epoch": 0.8276084294644583, "grad_norm": 3.567135691782576e-05, "learning_rate": 1.921947726459005e-06, "loss": 0.0, "step": 12842 }, { "epoch": 0.8276728749113875, "grad_norm": 0.006151835977891471, "learning_rate": 1.921231650554959e-06, "loss": 0.0, "step": 12843 }, { "epoch": 0.8277373203583167, "grad_norm": 0.0017068130805905073, "learning_rate": 1.920515574650913e-06, "loss": 0.0, "step": 12844 }, { "epoch": 0.8278017658052459, "grad_norm": 0.029947653519642226, "learning_rate": 1.9197994987468674e-06, "loss": 0.0015, "step": 12845 }, { "epoch": 0.8278662112521751, "grad_norm": 0.03704959829540365, "learning_rate": 1.9190834228428217e-06, "loss": 0.0001, "step": 12846 }, { "epoch": 0.8279306566991043, "grad_norm": 0.0018888023632662365, "learning_rate": 1.9183673469387756e-06, "loss": 0.0, "step": 12847 }, { "epoch": 0.8279951021460333, "grad_norm": 0.0032775118244479816, "learning_rate": 1.91765127103473e-06, "loss": 0.0, "step": 12848 }, { "epoch": 0.8280595475929625, "grad_norm": 0.002627906814267534, "learning_rate": 1.9169351951306837e-06, "loss": 0.0, "step": 12849 }, { "epoch": 0.8281239930398917, "grad_norm": 0.0008013443872099772, "learning_rate": 1.9162191192266385e-06, "loss": 0.0, "step": 12850 }, { "epoch": 0.8281884384868209, "grad_norm": 0.0009410073850608853, "learning_rate": 1.9155030433225923e-06, "loss": 0.0, "step": 12851 }, { "epoch": 0.8282528839337501, "grad_norm": 0.0010344266427138322, "learning_rate": 1.9147869674185466e-06, "loss": 0.0, "step": 12852 }, { "epoch": 0.8283173293806793, "grad_norm": 0.00014077844258373367, "learning_rate": 1.9140708915145005e-06, "loss": 0.0, "step": 12853 }, { "epoch": 0.8283817748276084, "grad_norm": 0.00022600811992336105, "learning_rate": 1.913354815610455e-06, "loss": 0.0, "step": 12854 }, { "epoch": 0.8284462202745376, "grad_norm": 0.0003438217147535301, "learning_rate": 1.912638739706409e-06, "loss": 0.0, "step": 12855 }, { "epoch": 0.8285106657214668, "grad_norm": 1.3295451481395686, "learning_rate": 1.9119226638023634e-06, "loss": 0.0073, "step": 12856 }, { "epoch": 0.828575111168396, "grad_norm": 0.09529210403966394, "learning_rate": 1.9112065878983173e-06, "loss": 0.0002, "step": 12857 }, { "epoch": 0.8286395566153252, "grad_norm": 0.00013075154801560885, "learning_rate": 1.9104905119942716e-06, "loss": 0.0, "step": 12858 }, { "epoch": 0.8287040020622543, "grad_norm": 0.005347229015423188, "learning_rate": 1.909774436090226e-06, "loss": 0.0, "step": 12859 }, { "epoch": 0.8287684475091834, "grad_norm": 0.011590017739172188, "learning_rate": 1.9090583601861798e-06, "loss": 0.0, "step": 12860 }, { "epoch": 0.8288328929561126, "grad_norm": 0.002305119796443571, "learning_rate": 1.908342284282134e-06, "loss": 0.0, "step": 12861 }, { "epoch": 0.8288973384030418, "grad_norm": 0.0032701660571884997, "learning_rate": 1.9076262083780883e-06, "loss": 0.0, "step": 12862 }, { "epoch": 0.828961783849971, "grad_norm": 0.00032209619860416687, "learning_rate": 1.9069101324740424e-06, "loss": 0.0, "step": 12863 }, { "epoch": 0.8290262292969002, "grad_norm": 0.0001742985353381191, "learning_rate": 1.9061940565699965e-06, "loss": 0.0, "step": 12864 }, { "epoch": 0.8290906747438294, "grad_norm": 0.006867729174768813, "learning_rate": 1.9054779806659506e-06, "loss": 0.0, "step": 12865 }, { "epoch": 0.8291551201907585, "grad_norm": 6.386471846661756e-05, "learning_rate": 1.904761904761905e-06, "loss": 0.0, "step": 12866 }, { "epoch": 0.8292195656376877, "grad_norm": 8.795452018101676e-05, "learning_rate": 1.9040458288578592e-06, "loss": 0.0, "step": 12867 }, { "epoch": 0.8292840110846169, "grad_norm": 0.014635279825817802, "learning_rate": 1.9033297529538133e-06, "loss": 0.0, "step": 12868 }, { "epoch": 0.8293484565315461, "grad_norm": 0.20185402148521048, "learning_rate": 1.9026136770497674e-06, "loss": 0.0011, "step": 12869 }, { "epoch": 0.8294129019784752, "grad_norm": 3.5018456418513304e-05, "learning_rate": 1.9018976011457217e-06, "loss": 0.0, "step": 12870 }, { "epoch": 0.8294773474254044, "grad_norm": 0.0011095530381541555, "learning_rate": 1.9011815252416758e-06, "loss": 0.0, "step": 12871 }, { "epoch": 0.8295417928723335, "grad_norm": 0.06928320343427943, "learning_rate": 1.9004654493376299e-06, "loss": 0.0001, "step": 12872 }, { "epoch": 0.8296062383192627, "grad_norm": 0.005472200014042761, "learning_rate": 1.8997493734335842e-06, "loss": 0.0001, "step": 12873 }, { "epoch": 0.8296706837661919, "grad_norm": 0.01115602003143175, "learning_rate": 1.8990332975295385e-06, "loss": 0.0001, "step": 12874 }, { "epoch": 0.8297351292131211, "grad_norm": 0.0010032057744439192, "learning_rate": 1.8983172216254925e-06, "loss": 0.0, "step": 12875 }, { "epoch": 0.8297995746600503, "grad_norm": 0.002481002567431664, "learning_rate": 1.8976011457214466e-06, "loss": 0.0, "step": 12876 }, { "epoch": 0.8298640201069795, "grad_norm": 0.005168957551730069, "learning_rate": 1.8968850698174007e-06, "loss": 0.0, "step": 12877 }, { "epoch": 0.8299284655539086, "grad_norm": 0.0038538895973451717, "learning_rate": 1.8961689939133548e-06, "loss": 0.0, "step": 12878 }, { "epoch": 0.8299929110008378, "grad_norm": 0.12830823844414282, "learning_rate": 1.8954529180093093e-06, "loss": 0.0001, "step": 12879 }, { "epoch": 0.830057356447767, "grad_norm": 0.0022606158190638017, "learning_rate": 1.8947368421052634e-06, "loss": 0.0, "step": 12880 }, { "epoch": 0.8301218018946961, "grad_norm": 0.1553001860605195, "learning_rate": 1.8940207662012175e-06, "loss": 0.0007, "step": 12881 }, { "epoch": 0.8301862473416253, "grad_norm": 9.681461373014582e-05, "learning_rate": 1.8933046902971716e-06, "loss": 0.0, "step": 12882 }, { "epoch": 0.8302506927885545, "grad_norm": 0.006053399325819987, "learning_rate": 1.8925886143931257e-06, "loss": 0.0, "step": 12883 }, { "epoch": 0.8303151382354836, "grad_norm": 0.0003135291907860067, "learning_rate": 1.89187253848908e-06, "loss": 0.0, "step": 12884 }, { "epoch": 0.8303795836824128, "grad_norm": 0.0016962123214610799, "learning_rate": 1.8911564625850343e-06, "loss": 0.0, "step": 12885 }, { "epoch": 0.830444029129342, "grad_norm": 0.0022821039817459542, "learning_rate": 1.8904403866809883e-06, "loss": 0.0, "step": 12886 }, { "epoch": 0.8305084745762712, "grad_norm": 0.44118902887440714, "learning_rate": 1.8897243107769424e-06, "loss": 0.0008, "step": 12887 }, { "epoch": 0.8305729200232004, "grad_norm": 0.0026145982784008394, "learning_rate": 1.8890082348728967e-06, "loss": 0.0, "step": 12888 }, { "epoch": 0.8306373654701296, "grad_norm": 2.009516340008284e-05, "learning_rate": 1.8882921589688508e-06, "loss": 0.0, "step": 12889 }, { "epoch": 0.8307018109170587, "grad_norm": 0.00039474701415418735, "learning_rate": 1.887576083064805e-06, "loss": 0.0, "step": 12890 }, { "epoch": 0.8307662563639879, "grad_norm": 0.02382485355526577, "learning_rate": 1.8868600071607592e-06, "loss": 0.0001, "step": 12891 }, { "epoch": 0.830830701810917, "grad_norm": 0.005398793179076116, "learning_rate": 1.8861439312567135e-06, "loss": 0.0, "step": 12892 }, { "epoch": 0.8308951472578462, "grad_norm": 0.0768639292053062, "learning_rate": 1.8854278553526676e-06, "loss": 0.0002, "step": 12893 }, { "epoch": 0.8309595927047754, "grad_norm": 0.008180770152188349, "learning_rate": 1.8847117794486217e-06, "loss": 0.0, "step": 12894 }, { "epoch": 0.8310240381517046, "grad_norm": 0.003654351436249659, "learning_rate": 1.8839957035445758e-06, "loss": 0.0, "step": 12895 }, { "epoch": 0.8310884835986337, "grad_norm": 0.0013665234520897544, "learning_rate": 1.8832796276405303e-06, "loss": 0.0, "step": 12896 }, { "epoch": 0.8311529290455629, "grad_norm": 0.00821381169757801, "learning_rate": 1.8825635517364844e-06, "loss": 0.0, "step": 12897 }, { "epoch": 0.8312173744924921, "grad_norm": 0.02980132621626116, "learning_rate": 1.8818474758324384e-06, "loss": 0.0001, "step": 12898 }, { "epoch": 0.8312818199394213, "grad_norm": 0.15304842610763447, "learning_rate": 1.8811313999283925e-06, "loss": 0.0002, "step": 12899 }, { "epoch": 0.8313462653863505, "grad_norm": 3.2859700974284204e-05, "learning_rate": 1.8804153240243466e-06, "loss": 0.0, "step": 12900 }, { "epoch": 0.8314107108332797, "grad_norm": 7.330661953553586e-05, "learning_rate": 1.8796992481203007e-06, "loss": 0.0, "step": 12901 }, { "epoch": 0.8314751562802088, "grad_norm": 0.0016363356131233538, "learning_rate": 1.8789831722162552e-06, "loss": 0.0, "step": 12902 }, { "epoch": 0.8315396017271379, "grad_norm": 0.0023635623644680886, "learning_rate": 1.8782670963122093e-06, "loss": 0.0, "step": 12903 }, { "epoch": 0.8316040471740671, "grad_norm": 0.0011234160999025852, "learning_rate": 1.8775510204081634e-06, "loss": 0.0, "step": 12904 }, { "epoch": 0.8316684926209963, "grad_norm": 0.00569605487531826, "learning_rate": 1.8768349445041175e-06, "loss": 0.0, "step": 12905 }, { "epoch": 0.8317329380679255, "grad_norm": 0.11860830172496771, "learning_rate": 1.8761188686000718e-06, "loss": 0.0001, "step": 12906 }, { "epoch": 0.8317973835148547, "grad_norm": 0.0001888324722297905, "learning_rate": 1.8754027926960259e-06, "loss": 0.0, "step": 12907 }, { "epoch": 0.8318618289617838, "grad_norm": 0.043075241266047205, "learning_rate": 1.8746867167919802e-06, "loss": 0.0001, "step": 12908 }, { "epoch": 0.831926274408713, "grad_norm": 0.0002445940055020511, "learning_rate": 1.8739706408879342e-06, "loss": 0.0, "step": 12909 }, { "epoch": 0.8319907198556422, "grad_norm": 0.002135059555199994, "learning_rate": 1.8732545649838885e-06, "loss": 0.0, "step": 12910 }, { "epoch": 0.8320551653025714, "grad_norm": 3.074306074790163, "learning_rate": 1.8725384890798426e-06, "loss": 0.0207, "step": 12911 }, { "epoch": 0.8321196107495006, "grad_norm": 0.0009971946495302751, "learning_rate": 1.8718224131757967e-06, "loss": 0.0, "step": 12912 }, { "epoch": 0.8321840561964298, "grad_norm": 0.0005966592023226857, "learning_rate": 1.8711063372717508e-06, "loss": 0.0, "step": 12913 }, { "epoch": 0.832248501643359, "grad_norm": 0.0005944522041070304, "learning_rate": 1.8703902613677053e-06, "loss": 0.0, "step": 12914 }, { "epoch": 0.832312947090288, "grad_norm": 0.00020677188897750951, "learning_rate": 1.8696741854636594e-06, "loss": 0.0, "step": 12915 }, { "epoch": 0.8323773925372172, "grad_norm": 0.00010503859283321327, "learning_rate": 1.8689581095596135e-06, "loss": 0.0, "step": 12916 }, { "epoch": 0.8324418379841464, "grad_norm": 0.06789593276378175, "learning_rate": 1.8682420336555676e-06, "loss": 0.0002, "step": 12917 }, { "epoch": 0.8325062834310756, "grad_norm": 0.00022078347307954933, "learning_rate": 1.8675259577515217e-06, "loss": 0.0, "step": 12918 }, { "epoch": 0.8325707288780048, "grad_norm": 0.14633415785093218, "learning_rate": 1.866809881847476e-06, "loss": 0.0004, "step": 12919 }, { "epoch": 0.832635174324934, "grad_norm": 0.26466572272238886, "learning_rate": 1.8660938059434303e-06, "loss": 0.0044, "step": 12920 }, { "epoch": 0.8326996197718631, "grad_norm": 0.00018119899261728837, "learning_rate": 1.8653777300393843e-06, "loss": 0.0, "step": 12921 }, { "epoch": 0.8327640652187923, "grad_norm": 0.49176382570365107, "learning_rate": 1.8646616541353384e-06, "loss": 0.0004, "step": 12922 }, { "epoch": 0.8328285106657215, "grad_norm": 0.17224296823060695, "learning_rate": 1.8639455782312927e-06, "loss": 0.0004, "step": 12923 }, { "epoch": 0.8328929561126507, "grad_norm": 0.8200194062285915, "learning_rate": 1.8632295023272468e-06, "loss": 0.0046, "step": 12924 }, { "epoch": 0.8329574015595799, "grad_norm": 0.022811282350755628, "learning_rate": 1.862513426423201e-06, "loss": 0.0, "step": 12925 }, { "epoch": 0.8330218470065089, "grad_norm": 0.0002876985247923036, "learning_rate": 1.8617973505191552e-06, "loss": 0.0, "step": 12926 }, { "epoch": 0.8330862924534381, "grad_norm": 0.00026064448045058826, "learning_rate": 1.8610812746151095e-06, "loss": 0.0, "step": 12927 }, { "epoch": 0.8331507379003673, "grad_norm": 0.00276155562535339, "learning_rate": 1.8603651987110636e-06, "loss": 0.0, "step": 12928 }, { "epoch": 0.8332151833472965, "grad_norm": 0.0018431929870960982, "learning_rate": 1.8596491228070177e-06, "loss": 0.0, "step": 12929 }, { "epoch": 0.8332796287942257, "grad_norm": 0.0007927471892093145, "learning_rate": 1.8589330469029718e-06, "loss": 0.0, "step": 12930 }, { "epoch": 0.8333440742411549, "grad_norm": 0.602887935199894, "learning_rate": 1.8582169709989263e-06, "loss": 0.0018, "step": 12931 }, { "epoch": 0.833408519688084, "grad_norm": 0.047878668853832275, "learning_rate": 1.8575008950948804e-06, "loss": 0.0001, "step": 12932 }, { "epoch": 0.8334729651350132, "grad_norm": 0.03237215842096609, "learning_rate": 1.8567848191908345e-06, "loss": 0.0, "step": 12933 }, { "epoch": 0.8335374105819424, "grad_norm": 0.014911026549980291, "learning_rate": 1.8560687432867885e-06, "loss": 0.0, "step": 12934 }, { "epoch": 0.8336018560288716, "grad_norm": 0.01923477641968151, "learning_rate": 1.8553526673827426e-06, "loss": 0.0001, "step": 12935 }, { "epoch": 0.8336663014758008, "grad_norm": 0.2234138849757082, "learning_rate": 1.8546365914786967e-06, "loss": 0.0002, "step": 12936 }, { "epoch": 0.8337307469227299, "grad_norm": 0.023034146600674356, "learning_rate": 1.8539205155746512e-06, "loss": 0.0, "step": 12937 }, { "epoch": 0.833795192369659, "grad_norm": 0.004499721841399995, "learning_rate": 1.8532044396706053e-06, "loss": 0.0, "step": 12938 }, { "epoch": 0.8338596378165882, "grad_norm": 0.027223810403340872, "learning_rate": 1.8524883637665594e-06, "loss": 0.0, "step": 12939 }, { "epoch": 0.8339240832635174, "grad_norm": 0.0010825319904780766, "learning_rate": 1.8517722878625135e-06, "loss": 0.0, "step": 12940 }, { "epoch": 0.8339885287104466, "grad_norm": 0.00017965621067447417, "learning_rate": 1.8510562119584678e-06, "loss": 0.0, "step": 12941 }, { "epoch": 0.8340529741573758, "grad_norm": 0.3216080980251922, "learning_rate": 1.8503401360544219e-06, "loss": 0.0006, "step": 12942 }, { "epoch": 0.834117419604305, "grad_norm": 0.0010344771420191343, "learning_rate": 1.8496240601503762e-06, "loss": 0.0, "step": 12943 }, { "epoch": 0.8341818650512342, "grad_norm": 0.0012871061826992497, "learning_rate": 1.8489079842463303e-06, "loss": 0.0, "step": 12944 }, { "epoch": 0.8342463104981633, "grad_norm": 0.001207658190563529, "learning_rate": 1.8481919083422846e-06, "loss": 0.0, "step": 12945 }, { "epoch": 0.8343107559450925, "grad_norm": 0.0007330767288757314, "learning_rate": 1.8474758324382386e-06, "loss": 0.0, "step": 12946 }, { "epoch": 0.8343752013920217, "grad_norm": 0.004926647550943549, "learning_rate": 1.8467597565341927e-06, "loss": 0.0, "step": 12947 }, { "epoch": 0.8344396468389508, "grad_norm": 0.01605856973953187, "learning_rate": 1.8460436806301468e-06, "loss": 0.0, "step": 12948 }, { "epoch": 0.83450409228588, "grad_norm": 0.0005230443666051655, "learning_rate": 1.8453276047261013e-06, "loss": 0.0, "step": 12949 }, { "epoch": 0.8345685377328091, "grad_norm": 0.0008104200669707647, "learning_rate": 1.8446115288220554e-06, "loss": 0.0, "step": 12950 }, { "epoch": 0.8346329831797383, "grad_norm": 0.011457724398698297, "learning_rate": 1.8438954529180095e-06, "loss": 0.0, "step": 12951 }, { "epoch": 0.8346974286266675, "grad_norm": 0.024955868465614733, "learning_rate": 1.8431793770139636e-06, "loss": 0.0003, "step": 12952 }, { "epoch": 0.8347618740735967, "grad_norm": 0.0011789983636083115, "learning_rate": 1.8424633011099177e-06, "loss": 0.0, "step": 12953 }, { "epoch": 0.8348263195205259, "grad_norm": 0.00127945397321626, "learning_rate": 1.8417472252058718e-06, "loss": 0.0, "step": 12954 }, { "epoch": 0.8348907649674551, "grad_norm": 0.0005486498215946787, "learning_rate": 1.8410311493018263e-06, "loss": 0.0, "step": 12955 }, { "epoch": 0.8349552104143843, "grad_norm": 0.0013076929794172173, "learning_rate": 1.8403150733977804e-06, "loss": 0.0, "step": 12956 }, { "epoch": 0.8350196558613134, "grad_norm": 0.002699150201750239, "learning_rate": 1.8395989974937344e-06, "loss": 0.0, "step": 12957 }, { "epoch": 0.8350841013082426, "grad_norm": 0.0014782031120580252, "learning_rate": 1.8388829215896885e-06, "loss": 0.0, "step": 12958 }, { "epoch": 0.8351485467551717, "grad_norm": 0.00013325395203645215, "learning_rate": 1.8381668456856428e-06, "loss": 0.0, "step": 12959 }, { "epoch": 0.8352129922021009, "grad_norm": 0.00028691027465177585, "learning_rate": 1.837450769781597e-06, "loss": 0.0, "step": 12960 }, { "epoch": 0.8352774376490301, "grad_norm": 0.015239722260245718, "learning_rate": 1.8367346938775512e-06, "loss": 0.0001, "step": 12961 }, { "epoch": 0.8353418830959592, "grad_norm": 0.01791622106760161, "learning_rate": 1.8360186179735053e-06, "loss": 0.0001, "step": 12962 }, { "epoch": 0.8354063285428884, "grad_norm": 0.0010604469781897421, "learning_rate": 1.8353025420694596e-06, "loss": 0.0, "step": 12963 }, { "epoch": 0.8354707739898176, "grad_norm": 0.0024505637205691246, "learning_rate": 1.8345864661654137e-06, "loss": 0.0, "step": 12964 }, { "epoch": 0.8355352194367468, "grad_norm": 0.0013904709710636813, "learning_rate": 1.8338703902613678e-06, "loss": 0.0, "step": 12965 }, { "epoch": 0.835599664883676, "grad_norm": 0.03134726683056867, "learning_rate": 1.833154314357322e-06, "loss": 0.0001, "step": 12966 }, { "epoch": 0.8356641103306052, "grad_norm": 0.32471975430680294, "learning_rate": 1.8324382384532764e-06, "loss": 0.0007, "step": 12967 }, { "epoch": 0.8357285557775344, "grad_norm": 0.46926647978812036, "learning_rate": 1.8317221625492305e-06, "loss": 0.0024, "step": 12968 }, { "epoch": 0.8357930012244635, "grad_norm": 0.0007497435298024752, "learning_rate": 1.8310060866451845e-06, "loss": 0.0, "step": 12969 }, { "epoch": 0.8358574466713926, "grad_norm": 8.529497298495229e-05, "learning_rate": 1.8302900107411386e-06, "loss": 0.0, "step": 12970 }, { "epoch": 0.8359218921183218, "grad_norm": 0.010444437001298015, "learning_rate": 1.8295739348370927e-06, "loss": 0.0001, "step": 12971 }, { "epoch": 0.835986337565251, "grad_norm": 0.0005033715826079014, "learning_rate": 1.8288578589330472e-06, "loss": 0.0, "step": 12972 }, { "epoch": 0.8360507830121802, "grad_norm": 0.010451150054464193, "learning_rate": 1.8281417830290013e-06, "loss": 0.0, "step": 12973 }, { "epoch": 0.8361152284591093, "grad_norm": 0.1669388847722097, "learning_rate": 1.8274257071249554e-06, "loss": 0.0002, "step": 12974 }, { "epoch": 0.8361796739060385, "grad_norm": 3.708374839204555e-05, "learning_rate": 1.8267096312209095e-06, "loss": 0.0, "step": 12975 }, { "epoch": 0.8362441193529677, "grad_norm": 0.024097830011570242, "learning_rate": 1.8259935553168636e-06, "loss": 0.0, "step": 12976 }, { "epoch": 0.8363085647998969, "grad_norm": 0.00035244492069838926, "learning_rate": 1.8252774794128179e-06, "loss": 0.0, "step": 12977 }, { "epoch": 0.8363730102468261, "grad_norm": 0.0006009073649762303, "learning_rate": 1.8245614035087722e-06, "loss": 0.0, "step": 12978 }, { "epoch": 0.8364374556937553, "grad_norm": 0.002436471679603375, "learning_rate": 1.8238453276047263e-06, "loss": 0.0, "step": 12979 }, { "epoch": 0.8365019011406845, "grad_norm": 0.0019328984019827203, "learning_rate": 1.8231292517006803e-06, "loss": 0.0, "step": 12980 }, { "epoch": 0.8365663465876135, "grad_norm": 0.00047925150121626243, "learning_rate": 1.8224131757966346e-06, "loss": 0.0, "step": 12981 }, { "epoch": 0.8366307920345427, "grad_norm": 0.0023810597907460897, "learning_rate": 1.8216970998925887e-06, "loss": 0.0, "step": 12982 }, { "epoch": 0.8366952374814719, "grad_norm": 0.005150080251448669, "learning_rate": 1.8209810239885428e-06, "loss": 0.0, "step": 12983 }, { "epoch": 0.8367596829284011, "grad_norm": 0.0005138275505945283, "learning_rate": 1.8202649480844971e-06, "loss": 0.0, "step": 12984 }, { "epoch": 0.8368241283753303, "grad_norm": 0.0001395393045962735, "learning_rate": 1.8195488721804514e-06, "loss": 0.0, "step": 12985 }, { "epoch": 0.8368885738222595, "grad_norm": 0.015921765880894, "learning_rate": 1.8188327962764055e-06, "loss": 0.0001, "step": 12986 }, { "epoch": 0.8369530192691886, "grad_norm": 0.10171368698405213, "learning_rate": 1.8181167203723596e-06, "loss": 0.0009, "step": 12987 }, { "epoch": 0.8370174647161178, "grad_norm": 0.003001050790157522, "learning_rate": 1.8174006444683137e-06, "loss": 0.0, "step": 12988 }, { "epoch": 0.837081910163047, "grad_norm": 0.0007591269273822723, "learning_rate": 1.8166845685642678e-06, "loss": 0.0, "step": 12989 }, { "epoch": 0.8371463556099762, "grad_norm": 0.0001454134195656195, "learning_rate": 1.8159684926602223e-06, "loss": 0.0, "step": 12990 }, { "epoch": 0.8372108010569054, "grad_norm": 0.10966075031034136, "learning_rate": 1.8152524167561764e-06, "loss": 0.0001, "step": 12991 }, { "epoch": 0.8372752465038346, "grad_norm": 0.004862647588603843, "learning_rate": 1.8145363408521305e-06, "loss": 0.0, "step": 12992 }, { "epoch": 0.8373396919507636, "grad_norm": 0.19124268335483594, "learning_rate": 1.8138202649480845e-06, "loss": 0.0006, "step": 12993 }, { "epoch": 0.8374041373976928, "grad_norm": 0.00014524860075802834, "learning_rate": 1.8131041890440388e-06, "loss": 0.0, "step": 12994 }, { "epoch": 0.837468582844622, "grad_norm": 0.04439166994184201, "learning_rate": 1.812388113139993e-06, "loss": 0.0001, "step": 12995 }, { "epoch": 0.8375330282915512, "grad_norm": 6.69389849704551e-05, "learning_rate": 1.8116720372359472e-06, "loss": 0.0, "step": 12996 }, { "epoch": 0.8375974737384804, "grad_norm": 0.001974699603729132, "learning_rate": 1.8109559613319013e-06, "loss": 0.0, "step": 12997 }, { "epoch": 0.8376619191854096, "grad_norm": 0.00018389771702319728, "learning_rate": 1.8102398854278556e-06, "loss": 0.0, "step": 12998 }, { "epoch": 0.8377263646323387, "grad_norm": 0.12907701798589907, "learning_rate": 1.8095238095238097e-06, "loss": 0.0002, "step": 12999 }, { "epoch": 0.8377908100792679, "grad_norm": 0.23746540765057983, "learning_rate": 1.8088077336197638e-06, "loss": 0.0008, "step": 13000 }, { "epoch": 0.8378552555261971, "grad_norm": 0.05998519156857347, "learning_rate": 1.808091657715718e-06, "loss": 0.0001, "step": 13001 }, { "epoch": 0.8379197009731263, "grad_norm": 0.001119847624552485, "learning_rate": 1.8073755818116724e-06, "loss": 0.0, "step": 13002 }, { "epoch": 0.8379841464200555, "grad_norm": 0.46324393715573475, "learning_rate": 1.8066595059076265e-06, "loss": 0.0016, "step": 13003 }, { "epoch": 0.8380485918669845, "grad_norm": 0.002904501204279146, "learning_rate": 1.8059434300035806e-06, "loss": 0.0, "step": 13004 }, { "epoch": 0.8381130373139137, "grad_norm": 0.0032653766814766654, "learning_rate": 1.8052273540995346e-06, "loss": 0.0, "step": 13005 }, { "epoch": 0.8381774827608429, "grad_norm": 0.000118351637666897, "learning_rate": 1.8045112781954887e-06, "loss": 0.0, "step": 13006 }, { "epoch": 0.8382419282077721, "grad_norm": 0.09376592798824523, "learning_rate": 1.8037952022914432e-06, "loss": 0.0002, "step": 13007 }, { "epoch": 0.8383063736547013, "grad_norm": 0.0040541966063241675, "learning_rate": 1.8030791263873973e-06, "loss": 0.0, "step": 13008 }, { "epoch": 0.8383708191016305, "grad_norm": 0.013738120997783352, "learning_rate": 1.8023630504833514e-06, "loss": 0.0001, "step": 13009 }, { "epoch": 0.8384352645485597, "grad_norm": 0.0031340080052982603, "learning_rate": 1.8016469745793055e-06, "loss": 0.0, "step": 13010 }, { "epoch": 0.8384997099954888, "grad_norm": 0.0009455405513584888, "learning_rate": 1.8009308986752596e-06, "loss": 0.0, "step": 13011 }, { "epoch": 0.838564155442418, "grad_norm": 0.0036418512175713817, "learning_rate": 1.8002148227712139e-06, "loss": 0.0, "step": 13012 }, { "epoch": 0.8386286008893472, "grad_norm": 0.0006205554728098501, "learning_rate": 1.7994987468671682e-06, "loss": 0.0, "step": 13013 }, { "epoch": 0.8386930463362764, "grad_norm": 0.0025184925414780623, "learning_rate": 1.7987826709631223e-06, "loss": 0.0, "step": 13014 }, { "epoch": 0.8387574917832055, "grad_norm": 9.977119185934306e-05, "learning_rate": 1.7980665950590764e-06, "loss": 0.0, "step": 13015 }, { "epoch": 0.8388219372301347, "grad_norm": 0.001322791226674588, "learning_rate": 1.7973505191550307e-06, "loss": 0.0, "step": 13016 }, { "epoch": 0.8388863826770638, "grad_norm": 9.753431868816942e-05, "learning_rate": 1.7966344432509847e-06, "loss": 0.0, "step": 13017 }, { "epoch": 0.838950828123993, "grad_norm": 0.024187093953887785, "learning_rate": 1.7959183673469388e-06, "loss": 0.0, "step": 13018 }, { "epoch": 0.8390152735709222, "grad_norm": 0.0008915200755770839, "learning_rate": 1.7952022914428931e-06, "loss": 0.0, "step": 13019 }, { "epoch": 0.8390797190178514, "grad_norm": 0.1700030878168758, "learning_rate": 1.7944862155388474e-06, "loss": 0.0021, "step": 13020 }, { "epoch": 0.8391441644647806, "grad_norm": 0.009521015167901947, "learning_rate": 1.7937701396348015e-06, "loss": 0.0001, "step": 13021 }, { "epoch": 0.8392086099117098, "grad_norm": 0.0009619655795215219, "learning_rate": 1.7930540637307556e-06, "loss": 0.0, "step": 13022 }, { "epoch": 0.8392730553586389, "grad_norm": 0.04029302332851765, "learning_rate": 1.7923379878267097e-06, "loss": 0.0, "step": 13023 }, { "epoch": 0.8393375008055681, "grad_norm": 0.0386175096813077, "learning_rate": 1.7916219119226638e-06, "loss": 0.0, "step": 13024 }, { "epoch": 0.8394019462524973, "grad_norm": 4.674073385948886e-05, "learning_rate": 1.7909058360186183e-06, "loss": 0.0, "step": 13025 }, { "epoch": 0.8394663916994264, "grad_norm": 0.13500024579167066, "learning_rate": 1.7901897601145724e-06, "loss": 0.0003, "step": 13026 }, { "epoch": 0.8395308371463556, "grad_norm": 0.0024757972109323823, "learning_rate": 1.7894736842105265e-06, "loss": 0.0, "step": 13027 }, { "epoch": 0.8395952825932848, "grad_norm": 0.0021501952153607775, "learning_rate": 1.7887576083064805e-06, "loss": 0.0, "step": 13028 }, { "epoch": 0.8396597280402139, "grad_norm": 0.0008697595381524505, "learning_rate": 1.7880415324024346e-06, "loss": 0.0, "step": 13029 }, { "epoch": 0.8397241734871431, "grad_norm": 0.4445862117639593, "learning_rate": 1.787325456498389e-06, "loss": 0.0035, "step": 13030 }, { "epoch": 0.8397886189340723, "grad_norm": 0.36612350437383684, "learning_rate": 1.7866093805943432e-06, "loss": 0.0016, "step": 13031 }, { "epoch": 0.8398530643810015, "grad_norm": 0.012165564798806148, "learning_rate": 1.7858933046902973e-06, "loss": 0.0, "step": 13032 }, { "epoch": 0.8399175098279307, "grad_norm": 0.0779756907391507, "learning_rate": 1.7851772287862514e-06, "loss": 0.0001, "step": 13033 }, { "epoch": 0.8399819552748599, "grad_norm": 0.026000581433871277, "learning_rate": 1.7844611528822057e-06, "loss": 0.0015, "step": 13034 }, { "epoch": 0.840046400721789, "grad_norm": 0.2525092647996164, "learning_rate": 1.7837450769781598e-06, "loss": 0.0005, "step": 13035 }, { "epoch": 0.8401108461687182, "grad_norm": 0.0005119127002491398, "learning_rate": 1.7830290010741139e-06, "loss": 0.0, "step": 13036 }, { "epoch": 0.8401752916156473, "grad_norm": 0.0020882671639366607, "learning_rate": 1.7823129251700682e-06, "loss": 0.0, "step": 13037 }, { "epoch": 0.8402397370625765, "grad_norm": 0.01590513585309205, "learning_rate": 1.7815968492660225e-06, "loss": 0.0, "step": 13038 }, { "epoch": 0.8403041825095057, "grad_norm": 0.0003583728461652098, "learning_rate": 1.7808807733619766e-06, "loss": 0.0, "step": 13039 }, { "epoch": 0.8403686279564349, "grad_norm": 0.0017618107669094471, "learning_rate": 1.7801646974579306e-06, "loss": 0.0, "step": 13040 }, { "epoch": 0.840433073403364, "grad_norm": 0.0051314915355801785, "learning_rate": 1.7794486215538847e-06, "loss": 0.0, "step": 13041 }, { "epoch": 0.8404975188502932, "grad_norm": 0.0015903867467092674, "learning_rate": 1.7787325456498392e-06, "loss": 0.0, "step": 13042 }, { "epoch": 0.8405619642972224, "grad_norm": 0.0004816465431161624, "learning_rate": 1.7780164697457933e-06, "loss": 0.0, "step": 13043 }, { "epoch": 0.8406264097441516, "grad_norm": 0.006361106917538149, "learning_rate": 1.7773003938417474e-06, "loss": 0.0, "step": 13044 }, { "epoch": 0.8406908551910808, "grad_norm": 0.035033549990959596, "learning_rate": 1.7765843179377015e-06, "loss": 0.0001, "step": 13045 }, { "epoch": 0.84075530063801, "grad_norm": 0.005053168074157976, "learning_rate": 1.7758682420336556e-06, "loss": 0.0, "step": 13046 }, { "epoch": 0.8408197460849391, "grad_norm": 0.1503405115928112, "learning_rate": 1.7751521661296097e-06, "loss": 0.0003, "step": 13047 }, { "epoch": 0.8408841915318682, "grad_norm": 0.8358501852193341, "learning_rate": 1.7744360902255642e-06, "loss": 0.0055, "step": 13048 }, { "epoch": 0.8409486369787974, "grad_norm": 0.0012061131465209045, "learning_rate": 1.7737200143215183e-06, "loss": 0.0, "step": 13049 }, { "epoch": 0.8410130824257266, "grad_norm": 0.030221909655107952, "learning_rate": 1.7730039384174724e-06, "loss": 0.0, "step": 13050 }, { "epoch": 0.8410775278726558, "grad_norm": 0.0003382566794423338, "learning_rate": 1.7722878625134265e-06, "loss": 0.0, "step": 13051 }, { "epoch": 0.841141973319585, "grad_norm": 0.001474604475494994, "learning_rate": 1.7715717866093807e-06, "loss": 0.0, "step": 13052 }, { "epoch": 0.8412064187665141, "grad_norm": 0.0005242323894890739, "learning_rate": 1.7708557107053348e-06, "loss": 0.0, "step": 13053 }, { "epoch": 0.8412708642134433, "grad_norm": 0.01598698800507151, "learning_rate": 1.7701396348012891e-06, "loss": 0.0, "step": 13054 }, { "epoch": 0.8413353096603725, "grad_norm": 0.02646729332155176, "learning_rate": 1.7694235588972432e-06, "loss": 0.0, "step": 13055 }, { "epoch": 0.8413997551073017, "grad_norm": 0.0004705005211530495, "learning_rate": 1.7687074829931975e-06, "loss": 0.0, "step": 13056 }, { "epoch": 0.8414642005542309, "grad_norm": 0.2572792781261736, "learning_rate": 1.7679914070891516e-06, "loss": 0.001, "step": 13057 }, { "epoch": 0.8415286460011601, "grad_norm": 0.003987081818030144, "learning_rate": 1.7672753311851057e-06, "loss": 0.0, "step": 13058 }, { "epoch": 0.8415930914480892, "grad_norm": 0.017189671808628206, "learning_rate": 1.7665592552810598e-06, "loss": 0.0, "step": 13059 }, { "epoch": 0.8416575368950183, "grad_norm": 0.038183230144691487, "learning_rate": 1.7658431793770143e-06, "loss": 0.0, "step": 13060 }, { "epoch": 0.8417219823419475, "grad_norm": 0.041134170528374744, "learning_rate": 1.7651271034729684e-06, "loss": 0.0001, "step": 13061 }, { "epoch": 0.8417864277888767, "grad_norm": 0.08703766028306478, "learning_rate": 1.7644110275689225e-06, "loss": 0.0002, "step": 13062 }, { "epoch": 0.8418508732358059, "grad_norm": 0.00010780192104593178, "learning_rate": 1.7636949516648766e-06, "loss": 0.0, "step": 13063 }, { "epoch": 0.8419153186827351, "grad_norm": 0.11928142763613796, "learning_rate": 1.7629788757608306e-06, "loss": 0.0004, "step": 13064 }, { "epoch": 0.8419797641296642, "grad_norm": 0.5486655215015411, "learning_rate": 1.762262799856785e-06, "loss": 0.0035, "step": 13065 }, { "epoch": 0.8420442095765934, "grad_norm": 0.005871428197205636, "learning_rate": 1.7615467239527392e-06, "loss": 0.0, "step": 13066 }, { "epoch": 0.8421086550235226, "grad_norm": 0.0003421205363785894, "learning_rate": 1.7608306480486933e-06, "loss": 0.0, "step": 13067 }, { "epoch": 0.8421731004704518, "grad_norm": 0.00034174588045342405, "learning_rate": 1.7601145721446474e-06, "loss": 0.0, "step": 13068 }, { "epoch": 0.842237545917381, "grad_norm": 0.004250460203295504, "learning_rate": 1.7593984962406017e-06, "loss": 0.0, "step": 13069 }, { "epoch": 0.8423019913643102, "grad_norm": 0.0015205250632764336, "learning_rate": 1.7586824203365558e-06, "loss": 0.0, "step": 13070 }, { "epoch": 0.8423664368112392, "grad_norm": 0.004258007696294317, "learning_rate": 1.7579663444325099e-06, "loss": 0.0, "step": 13071 }, { "epoch": 0.8424308822581684, "grad_norm": 0.0034072375699601702, "learning_rate": 1.7572502685284642e-06, "loss": 0.0, "step": 13072 }, { "epoch": 0.8424953277050976, "grad_norm": 0.0014717670616816948, "learning_rate": 1.7565341926244185e-06, "loss": 0.0, "step": 13073 }, { "epoch": 0.8425597731520268, "grad_norm": 0.13174503866161089, "learning_rate": 1.7558181167203726e-06, "loss": 0.0001, "step": 13074 }, { "epoch": 0.842624218598956, "grad_norm": 0.13783244480645623, "learning_rate": 1.7551020408163267e-06, "loss": 0.0013, "step": 13075 }, { "epoch": 0.8426886640458852, "grad_norm": 0.005166068120200246, "learning_rate": 1.7543859649122807e-06, "loss": 0.0, "step": 13076 }, { "epoch": 0.8427531094928143, "grad_norm": 0.0006774612637503077, "learning_rate": 1.7536698890082353e-06, "loss": 0.0, "step": 13077 }, { "epoch": 0.8428175549397435, "grad_norm": 0.17182032842691972, "learning_rate": 1.7529538131041893e-06, "loss": 0.0017, "step": 13078 }, { "epoch": 0.8428820003866727, "grad_norm": 0.04182167002943656, "learning_rate": 1.7522377372001434e-06, "loss": 0.0002, "step": 13079 }, { "epoch": 0.8429464458336019, "grad_norm": 0.0244410842129277, "learning_rate": 1.7515216612960975e-06, "loss": 0.0001, "step": 13080 }, { "epoch": 0.8430108912805311, "grad_norm": 0.010742733410488706, "learning_rate": 1.7508055853920516e-06, "loss": 0.0, "step": 13081 }, { "epoch": 0.8430753367274602, "grad_norm": 0.010742733410488706, "learning_rate": 1.7508055853920516e-06, "loss": 0.0257, "step": 13082 }, { "epoch": 0.8431397821743893, "grad_norm": 7.704857180219094e-05, "learning_rate": 1.7500895094880057e-06, "loss": 0.0, "step": 13083 }, { "epoch": 0.8432042276213185, "grad_norm": 0.06449464703172729, "learning_rate": 1.7493734335839602e-06, "loss": 0.0001, "step": 13084 }, { "epoch": 0.8432686730682477, "grad_norm": 0.000153695612872603, "learning_rate": 1.7486573576799143e-06, "loss": 0.0, "step": 13085 }, { "epoch": 0.8433331185151769, "grad_norm": 0.004558501781863454, "learning_rate": 1.7479412817758684e-06, "loss": 0.0, "step": 13086 }, { "epoch": 0.8433975639621061, "grad_norm": 0.0014270143678160212, "learning_rate": 1.7472252058718225e-06, "loss": 0.0, "step": 13087 }, { "epoch": 0.8434620094090353, "grad_norm": 0.0021367750367809737, "learning_rate": 1.7465091299677768e-06, "loss": 0.0, "step": 13088 }, { "epoch": 0.8435264548559644, "grad_norm": 0.06312020112435841, "learning_rate": 1.7457930540637308e-06, "loss": 0.0001, "step": 13089 }, { "epoch": 0.8435909003028936, "grad_norm": 0.0005994224308608808, "learning_rate": 1.7450769781596851e-06, "loss": 0.0, "step": 13090 }, { "epoch": 0.8436553457498228, "grad_norm": 0.008210769671738014, "learning_rate": 1.7443609022556392e-06, "loss": 0.0, "step": 13091 }, { "epoch": 0.843719791196752, "grad_norm": 0.009704245330285514, "learning_rate": 1.7436448263515935e-06, "loss": 0.0, "step": 13092 }, { "epoch": 0.8437842366436811, "grad_norm": 0.45894062564205756, "learning_rate": 1.7429287504475476e-06, "loss": 0.0032, "step": 13093 }, { "epoch": 0.8438486820906103, "grad_norm": 0.0023116636587011403, "learning_rate": 1.7422126745435017e-06, "loss": 0.0, "step": 13094 }, { "epoch": 0.8439131275375394, "grad_norm": 0.002211421761670453, "learning_rate": 1.7414965986394558e-06, "loss": 0.0, "step": 13095 }, { "epoch": 0.8439775729844686, "grad_norm": 0.01702725153815147, "learning_rate": 1.7407805227354103e-06, "loss": 0.0, "step": 13096 }, { "epoch": 0.8440420184313978, "grad_norm": 0.0001140036103517741, "learning_rate": 1.7400644468313644e-06, "loss": 0.0, "step": 13097 }, { "epoch": 0.844106463878327, "grad_norm": 0.10455236751921528, "learning_rate": 1.7393483709273185e-06, "loss": 0.0018, "step": 13098 }, { "epoch": 0.8441709093252562, "grad_norm": 0.002027961693557616, "learning_rate": 1.7386322950232726e-06, "loss": 0.0, "step": 13099 }, { "epoch": 0.8442353547721854, "grad_norm": 0.00024180815422684246, "learning_rate": 1.7379162191192266e-06, "loss": 0.0, "step": 13100 }, { "epoch": 0.8442998002191145, "grad_norm": 0.079310239055661, "learning_rate": 1.7372001432151807e-06, "loss": 0.0001, "step": 13101 }, { "epoch": 0.8443642456660437, "grad_norm": 0.00905125777422377, "learning_rate": 1.7364840673111352e-06, "loss": 0.0, "step": 13102 }, { "epoch": 0.8444286911129729, "grad_norm": 0.2213289704661812, "learning_rate": 1.7357679914070893e-06, "loss": 0.0009, "step": 13103 }, { "epoch": 0.844493136559902, "grad_norm": 0.013132135644655554, "learning_rate": 1.7350519155030434e-06, "loss": 0.0, "step": 13104 }, { "epoch": 0.8445575820068312, "grad_norm": 0.0019095839276892347, "learning_rate": 1.7343358395989975e-06, "loss": 0.0, "step": 13105 }, { "epoch": 0.8446220274537604, "grad_norm": 0.003959361517452852, "learning_rate": 1.7336197636949518e-06, "loss": 0.0, "step": 13106 }, { "epoch": 0.8446864729006895, "grad_norm": 0.0032159310611906385, "learning_rate": 1.7329036877909059e-06, "loss": 0.0, "step": 13107 }, { "epoch": 0.8447509183476187, "grad_norm": 0.0019923615717410644, "learning_rate": 1.7321876118868602e-06, "loss": 0.0, "step": 13108 }, { "epoch": 0.8448153637945479, "grad_norm": 0.004360544594844355, "learning_rate": 1.7314715359828143e-06, "loss": 0.0, "step": 13109 }, { "epoch": 0.8448798092414771, "grad_norm": 0.0004440588758890354, "learning_rate": 1.7307554600787686e-06, "loss": 0.0, "step": 13110 }, { "epoch": 0.8449442546884063, "grad_norm": 0.05991632583505031, "learning_rate": 1.7300393841747227e-06, "loss": 0.0001, "step": 13111 }, { "epoch": 0.8450087001353355, "grad_norm": 0.0005124873215020824, "learning_rate": 1.7293233082706767e-06, "loss": 0.0, "step": 13112 }, { "epoch": 0.8450731455822647, "grad_norm": 0.004131471238295923, "learning_rate": 1.728607232366631e-06, "loss": 0.0, "step": 13113 }, { "epoch": 0.8451375910291938, "grad_norm": 0.002933325315730856, "learning_rate": 1.7278911564625853e-06, "loss": 0.0, "step": 13114 }, { "epoch": 0.8452020364761229, "grad_norm": 0.0019792287577856935, "learning_rate": 1.7271750805585394e-06, "loss": 0.0, "step": 13115 }, { "epoch": 0.8452664819230521, "grad_norm": 0.022908054626963152, "learning_rate": 1.7264590046544935e-06, "loss": 0.0, "step": 13116 }, { "epoch": 0.8453309273699813, "grad_norm": 0.001399493420350886, "learning_rate": 1.7257429287504476e-06, "loss": 0.0, "step": 13117 }, { "epoch": 0.8453953728169105, "grad_norm": 0.001018219180645176, "learning_rate": 1.7250268528464017e-06, "loss": 0.0, "step": 13118 }, { "epoch": 0.8454598182638396, "grad_norm": 0.04616747542702802, "learning_rate": 1.7243107769423562e-06, "loss": 0.0, "step": 13119 }, { "epoch": 0.8455242637107688, "grad_norm": 0.009493654566610898, "learning_rate": 1.7235947010383103e-06, "loss": 0.0, "step": 13120 }, { "epoch": 0.845588709157698, "grad_norm": 0.00967814859014821, "learning_rate": 1.7228786251342644e-06, "loss": 0.0, "step": 13121 }, { "epoch": 0.8456531546046272, "grad_norm": 0.10150213006238015, "learning_rate": 1.7221625492302185e-06, "loss": 0.0003, "step": 13122 }, { "epoch": 0.8457176000515564, "grad_norm": 0.6385996833154148, "learning_rate": 1.7214464733261726e-06, "loss": 0.0013, "step": 13123 }, { "epoch": 0.8457820454984856, "grad_norm": 0.027547564191873486, "learning_rate": 1.7207303974221269e-06, "loss": 0.0001, "step": 13124 }, { "epoch": 0.8458464909454148, "grad_norm": 0.04341508013828472, "learning_rate": 1.7200143215180811e-06, "loss": 0.0003, "step": 13125 }, { "epoch": 0.8459109363923438, "grad_norm": 0.00016965293402197205, "learning_rate": 1.7192982456140352e-06, "loss": 0.0, "step": 13126 }, { "epoch": 0.845975381839273, "grad_norm": 0.002586838111347734, "learning_rate": 1.7185821697099893e-06, "loss": 0.0, "step": 13127 }, { "epoch": 0.8460398272862022, "grad_norm": 0.02806070046606198, "learning_rate": 1.7178660938059436e-06, "loss": 0.0003, "step": 13128 }, { "epoch": 0.8461042727331314, "grad_norm": 0.00040487585066894285, "learning_rate": 1.7171500179018977e-06, "loss": 0.0, "step": 13129 }, { "epoch": 0.8461687181800606, "grad_norm": 0.0041082910374663, "learning_rate": 1.7164339419978518e-06, "loss": 0.0, "step": 13130 }, { "epoch": 0.8462331636269897, "grad_norm": 0.0015562831145490758, "learning_rate": 1.715717866093806e-06, "loss": 0.0, "step": 13131 }, { "epoch": 0.8462976090739189, "grad_norm": 0.0009302798442001268, "learning_rate": 1.7150017901897604e-06, "loss": 0.0, "step": 13132 }, { "epoch": 0.8463620545208481, "grad_norm": 0.01062960002645244, "learning_rate": 1.7142857142857145e-06, "loss": 0.0, "step": 13133 }, { "epoch": 0.8464264999677773, "grad_norm": 0.00834073110916737, "learning_rate": 1.7135696383816686e-06, "loss": 0.0, "step": 13134 }, { "epoch": 0.8464909454147065, "grad_norm": 0.0009627436988840297, "learning_rate": 1.7128535624776227e-06, "loss": 0.0, "step": 13135 }, { "epoch": 0.8465553908616357, "grad_norm": 0.0009720207341823486, "learning_rate": 1.7121374865735767e-06, "loss": 0.0, "step": 13136 }, { "epoch": 0.8466198363085649, "grad_norm": 0.007233971164138382, "learning_rate": 1.7114214106695313e-06, "loss": 0.0, "step": 13137 }, { "epoch": 0.8466842817554939, "grad_norm": 0.006121266302367881, "learning_rate": 1.7107053347654853e-06, "loss": 0.0, "step": 13138 }, { "epoch": 0.8467487272024231, "grad_norm": 0.00019090575891950602, "learning_rate": 1.7099892588614394e-06, "loss": 0.0, "step": 13139 }, { "epoch": 0.8468131726493523, "grad_norm": 0.3189073772494604, "learning_rate": 1.7092731829573935e-06, "loss": 0.0069, "step": 13140 }, { "epoch": 0.8468776180962815, "grad_norm": 0.00029818719882822894, "learning_rate": 1.7085571070533478e-06, "loss": 0.0, "step": 13141 }, { "epoch": 0.8469420635432107, "grad_norm": 0.02607671581948451, "learning_rate": 1.707841031149302e-06, "loss": 0.0001, "step": 13142 }, { "epoch": 0.8470065089901399, "grad_norm": 0.10383467270417568, "learning_rate": 1.7071249552452562e-06, "loss": 0.0007, "step": 13143 }, { "epoch": 0.847070954437069, "grad_norm": 0.00030382140353013776, "learning_rate": 1.7064088793412103e-06, "loss": 0.0, "step": 13144 }, { "epoch": 0.8471353998839982, "grad_norm": 0.006279257922826684, "learning_rate": 1.7056928034371646e-06, "loss": 0.0, "step": 13145 }, { "epoch": 0.8471998453309274, "grad_norm": 0.45831563402449355, "learning_rate": 1.7049767275331187e-06, "loss": 0.0036, "step": 13146 }, { "epoch": 0.8472642907778566, "grad_norm": 0.004021349795027649, "learning_rate": 1.7042606516290728e-06, "loss": 0.0, "step": 13147 }, { "epoch": 0.8473287362247858, "grad_norm": 0.03808135836534344, "learning_rate": 1.703544575725027e-06, "loss": 0.0, "step": 13148 }, { "epoch": 0.8473931816717148, "grad_norm": 0.11625193826279853, "learning_rate": 1.7028284998209814e-06, "loss": 0.0017, "step": 13149 }, { "epoch": 0.847457627118644, "grad_norm": 0.00826856773484578, "learning_rate": 1.7021124239169354e-06, "loss": 0.0001, "step": 13150 }, { "epoch": 0.8475220725655732, "grad_norm": 0.06094455670493373, "learning_rate": 1.7013963480128895e-06, "loss": 0.0001, "step": 13151 }, { "epoch": 0.8475865180125024, "grad_norm": 0.32370913335997803, "learning_rate": 1.7006802721088436e-06, "loss": 0.0005, "step": 13152 }, { "epoch": 0.8476509634594316, "grad_norm": 0.004277870466133441, "learning_rate": 1.6999641962047977e-06, "loss": 0.0, "step": 13153 }, { "epoch": 0.8477154089063608, "grad_norm": 0.006180712845803867, "learning_rate": 1.6992481203007522e-06, "loss": 0.0001, "step": 13154 }, { "epoch": 0.84777985435329, "grad_norm": 0.00029611523936459857, "learning_rate": 1.6985320443967063e-06, "loss": 0.0, "step": 13155 }, { "epoch": 0.8478442998002191, "grad_norm": 0.0003400751143154014, "learning_rate": 1.6978159684926604e-06, "loss": 0.0, "step": 13156 }, { "epoch": 0.8479087452471483, "grad_norm": 0.04685887416980266, "learning_rate": 1.6970998925886145e-06, "loss": 0.0001, "step": 13157 }, { "epoch": 0.8479731906940775, "grad_norm": 0.004478614412734495, "learning_rate": 1.6963838166845686e-06, "loss": 0.0, "step": 13158 }, { "epoch": 0.8480376361410067, "grad_norm": 0.0872153273947622, "learning_rate": 1.6956677407805229e-06, "loss": 0.0016, "step": 13159 }, { "epoch": 0.8481020815879358, "grad_norm": 0.0006567281632697598, "learning_rate": 1.6949516648764772e-06, "loss": 0.0, "step": 13160 }, { "epoch": 0.848166527034865, "grad_norm": 0.000274595862112701, "learning_rate": 1.6942355889724312e-06, "loss": 0.0, "step": 13161 }, { "epoch": 0.8482309724817941, "grad_norm": 0.002213287427482091, "learning_rate": 1.6935195130683853e-06, "loss": 0.0, "step": 13162 }, { "epoch": 0.8482954179287233, "grad_norm": 0.0006916792952072503, "learning_rate": 1.6928034371643396e-06, "loss": 0.0, "step": 13163 }, { "epoch": 0.8483598633756525, "grad_norm": 0.014104434453009731, "learning_rate": 1.6920873612602937e-06, "loss": 0.0002, "step": 13164 }, { "epoch": 0.8484243088225817, "grad_norm": 0.37413188666834707, "learning_rate": 1.6913712853562478e-06, "loss": 0.0051, "step": 13165 }, { "epoch": 0.8484887542695109, "grad_norm": 0.22097166148012748, "learning_rate": 1.690655209452202e-06, "loss": 0.0032, "step": 13166 }, { "epoch": 0.84855319971644, "grad_norm": 0.05122863864429417, "learning_rate": 1.6899391335481564e-06, "loss": 0.0001, "step": 13167 }, { "epoch": 0.8486176451633692, "grad_norm": 0.0022830902384754453, "learning_rate": 1.6892230576441105e-06, "loss": 0.0, "step": 13168 }, { "epoch": 0.8486820906102984, "grad_norm": 0.00552262182764459, "learning_rate": 1.6885069817400646e-06, "loss": 0.0, "step": 13169 }, { "epoch": 0.8487465360572276, "grad_norm": 0.004210165256711199, "learning_rate": 1.6877909058360187e-06, "loss": 0.0, "step": 13170 }, { "epoch": 0.8488109815041567, "grad_norm": 0.08326636885960183, "learning_rate": 1.6870748299319727e-06, "loss": 0.0001, "step": 13171 }, { "epoch": 0.8488754269510859, "grad_norm": 0.002165883292149573, "learning_rate": 1.6863587540279273e-06, "loss": 0.0, "step": 13172 }, { "epoch": 0.848939872398015, "grad_norm": 0.02688358309133841, "learning_rate": 1.6856426781238813e-06, "loss": 0.0, "step": 13173 }, { "epoch": 0.8490043178449442, "grad_norm": 0.0035068966026358408, "learning_rate": 1.6849266022198354e-06, "loss": 0.0015, "step": 13174 }, { "epoch": 0.8490687632918734, "grad_norm": 0.08898056050565528, "learning_rate": 1.6842105263157895e-06, "loss": 0.0001, "step": 13175 }, { "epoch": 0.8491332087388026, "grad_norm": 0.01142288213511176, "learning_rate": 1.6834944504117436e-06, "loss": 0.0, "step": 13176 }, { "epoch": 0.8491976541857318, "grad_norm": 0.12086776057633856, "learning_rate": 1.682778374507698e-06, "loss": 0.0002, "step": 13177 }, { "epoch": 0.849262099632661, "grad_norm": 0.016550340064353578, "learning_rate": 1.6820622986036522e-06, "loss": 0.0002, "step": 13178 }, { "epoch": 0.8493265450795902, "grad_norm": 0.00171017518821912, "learning_rate": 1.6813462226996063e-06, "loss": 0.0, "step": 13179 }, { "epoch": 0.8493909905265193, "grad_norm": 0.07949205546168953, "learning_rate": 1.6806301467955604e-06, "loss": 0.0012, "step": 13180 }, { "epoch": 0.8494554359734485, "grad_norm": 0.09155284118646113, "learning_rate": 1.6799140708915147e-06, "loss": 0.0008, "step": 13181 }, { "epoch": 0.8495198814203776, "grad_norm": 0.0025166251856908886, "learning_rate": 1.6791979949874688e-06, "loss": 0.0, "step": 13182 }, { "epoch": 0.8495843268673068, "grad_norm": 0.01984616554006101, "learning_rate": 1.678481919083423e-06, "loss": 0.0002, "step": 13183 }, { "epoch": 0.849648772314236, "grad_norm": 0.0010042060360331753, "learning_rate": 1.6777658431793771e-06, "loss": 0.0, "step": 13184 }, { "epoch": 0.8497132177611652, "grad_norm": 0.0009949796621702785, "learning_rate": 1.6770497672753314e-06, "loss": 0.0, "step": 13185 }, { "epoch": 0.8497776632080943, "grad_norm": 0.053973604706768964, "learning_rate": 1.6763336913712855e-06, "loss": 0.0001, "step": 13186 }, { "epoch": 0.8498421086550235, "grad_norm": 0.4081985619394894, "learning_rate": 1.6756176154672396e-06, "loss": 0.0036, "step": 13187 }, { "epoch": 0.8499065541019527, "grad_norm": 0.02374758216156314, "learning_rate": 1.6749015395631937e-06, "loss": 0.0001, "step": 13188 }, { "epoch": 0.8499709995488819, "grad_norm": 0.05115024449824533, "learning_rate": 1.6741854636591482e-06, "loss": 0.0002, "step": 13189 }, { "epoch": 0.8500354449958111, "grad_norm": 1.4907648784656893, "learning_rate": 1.6734693877551023e-06, "loss": 0.006, "step": 13190 }, { "epoch": 0.8500998904427403, "grad_norm": 0.000761882675939549, "learning_rate": 1.6727533118510564e-06, "loss": 0.0, "step": 13191 }, { "epoch": 0.8501643358896694, "grad_norm": 0.0010225074365764744, "learning_rate": 1.6720372359470105e-06, "loss": 0.0, "step": 13192 }, { "epoch": 0.8502287813365985, "grad_norm": 0.011092580154141158, "learning_rate": 1.6713211600429646e-06, "loss": 0.0, "step": 13193 }, { "epoch": 0.8502932267835277, "grad_norm": 0.0006417750320247429, "learning_rate": 1.6706050841389187e-06, "loss": 0.0, "step": 13194 }, { "epoch": 0.8503576722304569, "grad_norm": 0.0035144805856446135, "learning_rate": 1.6698890082348732e-06, "loss": 0.0, "step": 13195 }, { "epoch": 0.8504221176773861, "grad_norm": 0.0010827521412211539, "learning_rate": 1.6691729323308273e-06, "loss": 0.0, "step": 13196 }, { "epoch": 0.8504865631243153, "grad_norm": 0.03509750524593469, "learning_rate": 1.6684568564267813e-06, "loss": 0.0001, "step": 13197 }, { "epoch": 0.8505510085712444, "grad_norm": 0.00036619498825359834, "learning_rate": 1.6677407805227354e-06, "loss": 0.0, "step": 13198 }, { "epoch": 0.8506154540181736, "grad_norm": 0.009914766960140308, "learning_rate": 1.6670247046186897e-06, "loss": 0.0, "step": 13199 }, { "epoch": 0.8506798994651028, "grad_norm": 0.0017422356060475125, "learning_rate": 1.6663086287146438e-06, "loss": 0.0, "step": 13200 }, { "epoch": 0.850744344912032, "grad_norm": 0.5032569191045334, "learning_rate": 1.6655925528105981e-06, "loss": 0.0029, "step": 13201 }, { "epoch": 0.8508087903589612, "grad_norm": 0.10452537366147462, "learning_rate": 1.6648764769065522e-06, "loss": 0.0004, "step": 13202 }, { "epoch": 0.8508732358058904, "grad_norm": 0.5053188131076043, "learning_rate": 1.6641604010025065e-06, "loss": 0.0021, "step": 13203 }, { "epoch": 0.8509376812528194, "grad_norm": 0.0012992472535777047, "learning_rate": 1.6634443250984606e-06, "loss": 0.0, "step": 13204 }, { "epoch": 0.8510021266997486, "grad_norm": 0.00011059526996946594, "learning_rate": 1.6627282491944147e-06, "loss": 0.0, "step": 13205 }, { "epoch": 0.8510665721466778, "grad_norm": 0.005765190230397704, "learning_rate": 1.6620121732903688e-06, "loss": 0.0, "step": 13206 }, { "epoch": 0.851131017593607, "grad_norm": 0.003982361016511815, "learning_rate": 1.6612960973863233e-06, "loss": 0.0, "step": 13207 }, { "epoch": 0.8511954630405362, "grad_norm": 0.1607033931876445, "learning_rate": 1.6605800214822774e-06, "loss": 0.0006, "step": 13208 }, { "epoch": 0.8512599084874654, "grad_norm": 0.0023549343502827992, "learning_rate": 1.6598639455782314e-06, "loss": 0.0, "step": 13209 }, { "epoch": 0.8513243539343945, "grad_norm": 0.004134231289848903, "learning_rate": 1.6591478696741855e-06, "loss": 0.0, "step": 13210 }, { "epoch": 0.8513887993813237, "grad_norm": 0.0012640425582206376, "learning_rate": 1.6584317937701396e-06, "loss": 0.0, "step": 13211 }, { "epoch": 0.8514532448282529, "grad_norm": 0.0021418223676945244, "learning_rate": 1.657715717866094e-06, "loss": 0.0, "step": 13212 }, { "epoch": 0.8515176902751821, "grad_norm": 0.0007885922196547374, "learning_rate": 1.6569996419620482e-06, "loss": 0.0, "step": 13213 }, { "epoch": 0.8515821357221113, "grad_norm": 0.0014711894637515442, "learning_rate": 1.6562835660580023e-06, "loss": 0.0, "step": 13214 }, { "epoch": 0.8516465811690405, "grad_norm": 0.008335604533173686, "learning_rate": 1.6555674901539564e-06, "loss": 0.0, "step": 13215 }, { "epoch": 0.8517110266159695, "grad_norm": 0.011965576789911826, "learning_rate": 1.6548514142499107e-06, "loss": 0.0, "step": 13216 }, { "epoch": 0.8517754720628987, "grad_norm": 0.0012711725620480775, "learning_rate": 1.6541353383458648e-06, "loss": 0.0, "step": 13217 }, { "epoch": 0.8518399175098279, "grad_norm": 0.03274330104237303, "learning_rate": 1.653419262441819e-06, "loss": 0.0, "step": 13218 }, { "epoch": 0.8519043629567571, "grad_norm": 0.013053572989104686, "learning_rate": 1.6527031865377732e-06, "loss": 0.0, "step": 13219 }, { "epoch": 0.8519688084036863, "grad_norm": 0.0005108394442840666, "learning_rate": 1.6519871106337275e-06, "loss": 0.0, "step": 13220 }, { "epoch": 0.8520332538506155, "grad_norm": 0.008659800547823577, "learning_rate": 1.6512710347296815e-06, "loss": 0.0, "step": 13221 }, { "epoch": 0.8520976992975446, "grad_norm": 0.013328921264361386, "learning_rate": 1.6505549588256356e-06, "loss": 0.0, "step": 13222 }, { "epoch": 0.8521621447444738, "grad_norm": 0.00534133565134591, "learning_rate": 1.6498388829215897e-06, "loss": 0.0, "step": 13223 }, { "epoch": 0.852226590191403, "grad_norm": 0.004128411945539397, "learning_rate": 1.6491228070175442e-06, "loss": 0.0, "step": 13224 }, { "epoch": 0.8522910356383322, "grad_norm": 0.0011464066259772201, "learning_rate": 1.6484067311134983e-06, "loss": 0.0, "step": 13225 }, { "epoch": 0.8523554810852614, "grad_norm": 0.0012449689486442015, "learning_rate": 1.6476906552094524e-06, "loss": 0.0, "step": 13226 }, { "epoch": 0.8524199265321905, "grad_norm": 0.01839498996198106, "learning_rate": 1.6469745793054065e-06, "loss": 0.0, "step": 13227 }, { "epoch": 0.8524843719791196, "grad_norm": 0.02975664278763737, "learning_rate": 1.6462585034013606e-06, "loss": 0.0, "step": 13228 }, { "epoch": 0.8525488174260488, "grad_norm": 0.004641548279767545, "learning_rate": 1.6455424274973147e-06, "loss": 0.0, "step": 13229 }, { "epoch": 0.852613262872978, "grad_norm": 0.06830025568807072, "learning_rate": 1.6448263515932692e-06, "loss": 0.0005, "step": 13230 }, { "epoch": 0.8526777083199072, "grad_norm": 1.172192695787183, "learning_rate": 1.6441102756892233e-06, "loss": 0.0056, "step": 13231 }, { "epoch": 0.8527421537668364, "grad_norm": 0.011497391242717231, "learning_rate": 1.6433941997851773e-06, "loss": 0.0, "step": 13232 }, { "epoch": 0.8528065992137656, "grad_norm": 0.05892172851319213, "learning_rate": 1.6426781238811314e-06, "loss": 0.0, "step": 13233 }, { "epoch": 0.8528710446606947, "grad_norm": 0.004463375528091414, "learning_rate": 1.6419620479770857e-06, "loss": 0.0, "step": 13234 }, { "epoch": 0.8529354901076239, "grad_norm": 0.0018064123459318863, "learning_rate": 1.6412459720730398e-06, "loss": 0.0, "step": 13235 }, { "epoch": 0.8529999355545531, "grad_norm": 0.06578452059686321, "learning_rate": 1.6405298961689941e-06, "loss": 0.0001, "step": 13236 }, { "epoch": 0.8530643810014823, "grad_norm": 0.016703103161657937, "learning_rate": 1.6398138202649482e-06, "loss": 0.0, "step": 13237 }, { "epoch": 0.8531288264484114, "grad_norm": 0.014220012566819788, "learning_rate": 1.6390977443609025e-06, "loss": 0.0002, "step": 13238 }, { "epoch": 0.8531932718953406, "grad_norm": 0.09662233925181075, "learning_rate": 1.6383816684568566e-06, "loss": 0.0001, "step": 13239 }, { "epoch": 0.8532577173422697, "grad_norm": 0.19108335351306138, "learning_rate": 1.6376655925528107e-06, "loss": 0.0005, "step": 13240 }, { "epoch": 0.8533221627891989, "grad_norm": 0.021867105829644136, "learning_rate": 1.6369495166487648e-06, "loss": 0.0, "step": 13241 }, { "epoch": 0.8533866082361281, "grad_norm": 0.005161472184663322, "learning_rate": 1.6362334407447193e-06, "loss": 0.0, "step": 13242 }, { "epoch": 0.8534510536830573, "grad_norm": 0.005717583823887771, "learning_rate": 1.6355173648406734e-06, "loss": 0.0, "step": 13243 }, { "epoch": 0.8535154991299865, "grad_norm": 0.008121971108982103, "learning_rate": 1.6348012889366274e-06, "loss": 0.0, "step": 13244 }, { "epoch": 0.8535799445769157, "grad_norm": 0.00037337266763354336, "learning_rate": 1.6340852130325815e-06, "loss": 0.0, "step": 13245 }, { "epoch": 0.8536443900238448, "grad_norm": 0.002726389582061402, "learning_rate": 1.6333691371285356e-06, "loss": 0.0, "step": 13246 }, { "epoch": 0.853708835470774, "grad_norm": 0.12864904319243925, "learning_rate": 1.6326530612244897e-06, "loss": 0.0001, "step": 13247 }, { "epoch": 0.8537732809177032, "grad_norm": 0.01956431971786598, "learning_rate": 1.6319369853204442e-06, "loss": 0.0, "step": 13248 }, { "epoch": 0.8538377263646323, "grad_norm": 0.0007446259199047396, "learning_rate": 1.6312209094163983e-06, "loss": 0.0, "step": 13249 }, { "epoch": 0.8539021718115615, "grad_norm": 8.923878846050036e-05, "learning_rate": 1.6305048335123524e-06, "loss": 0.0, "step": 13250 }, { "epoch": 0.8539666172584907, "grad_norm": 0.00614986020674208, "learning_rate": 1.6297887576083065e-06, "loss": 0.0, "step": 13251 }, { "epoch": 0.8540310627054198, "grad_norm": 0.0002994068452241638, "learning_rate": 1.6290726817042608e-06, "loss": 0.0, "step": 13252 }, { "epoch": 0.854095508152349, "grad_norm": 0.019766686431275136, "learning_rate": 1.6283566058002149e-06, "loss": 0.0001, "step": 13253 }, { "epoch": 0.8541599535992782, "grad_norm": 0.08729203685103124, "learning_rate": 1.6276405298961692e-06, "loss": 0.0016, "step": 13254 }, { "epoch": 0.8542243990462074, "grad_norm": 0.0016916745618347053, "learning_rate": 1.6269244539921233e-06, "loss": 0.0, "step": 13255 }, { "epoch": 0.8542888444931366, "grad_norm": 0.0019892775296158497, "learning_rate": 1.6262083780880776e-06, "loss": 0.0, "step": 13256 }, { "epoch": 0.8543532899400658, "grad_norm": 0.0023993870594850473, "learning_rate": 1.6254923021840316e-06, "loss": 0.0, "step": 13257 }, { "epoch": 0.854417735386995, "grad_norm": 0.00016196712541359675, "learning_rate": 1.6247762262799857e-06, "loss": 0.0, "step": 13258 }, { "epoch": 0.8544821808339241, "grad_norm": 6.87116925052651e-05, "learning_rate": 1.62406015037594e-06, "loss": 0.0, "step": 13259 }, { "epoch": 0.8545466262808532, "grad_norm": 0.004721255990723467, "learning_rate": 1.6233440744718943e-06, "loss": 0.0, "step": 13260 }, { "epoch": 0.8546110717277824, "grad_norm": 0.0018702730203080486, "learning_rate": 1.6226279985678484e-06, "loss": 0.0, "step": 13261 }, { "epoch": 0.8546755171747116, "grad_norm": 0.04213422495428291, "learning_rate": 1.6219119226638025e-06, "loss": 0.0004, "step": 13262 }, { "epoch": 0.8547399626216408, "grad_norm": 0.09176947306452253, "learning_rate": 1.6211958467597566e-06, "loss": 0.0001, "step": 13263 }, { "epoch": 0.8548044080685699, "grad_norm": 0.0002837684054832139, "learning_rate": 1.6204797708557107e-06, "loss": 0.0, "step": 13264 }, { "epoch": 0.8548688535154991, "grad_norm": 0.00034699342449560114, "learning_rate": 1.6197636949516652e-06, "loss": 0.0, "step": 13265 }, { "epoch": 0.8549332989624283, "grad_norm": 0.0001257464769171491, "learning_rate": 1.6190476190476193e-06, "loss": 0.0, "step": 13266 }, { "epoch": 0.8549977444093575, "grad_norm": 0.015559039084421477, "learning_rate": 1.6183315431435734e-06, "loss": 0.0, "step": 13267 }, { "epoch": 0.8550621898562867, "grad_norm": 0.0008531191131122104, "learning_rate": 1.6176154672395274e-06, "loss": 0.0, "step": 13268 }, { "epoch": 0.8551266353032159, "grad_norm": 0.003998335045763489, "learning_rate": 1.6168993913354815e-06, "loss": 0.0, "step": 13269 }, { "epoch": 0.855191080750145, "grad_norm": 0.163619043841412, "learning_rate": 1.6161833154314358e-06, "loss": 0.0004, "step": 13270 }, { "epoch": 0.8552555261970741, "grad_norm": 0.03945032064933214, "learning_rate": 1.6154672395273901e-06, "loss": 0.0002, "step": 13271 }, { "epoch": 0.8553199716440033, "grad_norm": 3.905622064627832e-05, "learning_rate": 1.6147511636233442e-06, "loss": 0.0, "step": 13272 }, { "epoch": 0.8553844170909325, "grad_norm": 0.08700623339778292, "learning_rate": 1.6140350877192983e-06, "loss": 0.0001, "step": 13273 }, { "epoch": 0.8554488625378617, "grad_norm": 0.0002938337301690031, "learning_rate": 1.6133190118152526e-06, "loss": 0.0, "step": 13274 }, { "epoch": 0.8555133079847909, "grad_norm": 0.0016490099642151128, "learning_rate": 1.6126029359112067e-06, "loss": 0.0, "step": 13275 }, { "epoch": 0.85557775343172, "grad_norm": 0.21113241867889743, "learning_rate": 1.6118868600071608e-06, "loss": 0.0012, "step": 13276 }, { "epoch": 0.8556421988786492, "grad_norm": 0.00034076801657522657, "learning_rate": 1.611170784103115e-06, "loss": 0.0, "step": 13277 }, { "epoch": 0.8557066443255784, "grad_norm": 0.0007656847467303316, "learning_rate": 1.6104547081990694e-06, "loss": 0.0, "step": 13278 }, { "epoch": 0.8557710897725076, "grad_norm": 0.004611446967922683, "learning_rate": 1.6097386322950235e-06, "loss": 0.0, "step": 13279 }, { "epoch": 0.8558355352194368, "grad_norm": 4.522851739194399e-05, "learning_rate": 1.6090225563909775e-06, "loss": 0.0, "step": 13280 }, { "epoch": 0.855899980666366, "grad_norm": 0.09413581178952365, "learning_rate": 1.6083064804869316e-06, "loss": 0.0017, "step": 13281 }, { "epoch": 0.855964426113295, "grad_norm": 0.004960847512303381, "learning_rate": 1.6075904045828857e-06, "loss": 0.0, "step": 13282 }, { "epoch": 0.8560288715602242, "grad_norm": 0.035130821078171386, "learning_rate": 1.6068743286788402e-06, "loss": 0.0, "step": 13283 }, { "epoch": 0.8560933170071534, "grad_norm": 0.6814632458239104, "learning_rate": 1.6061582527747943e-06, "loss": 0.0024, "step": 13284 }, { "epoch": 0.8561577624540826, "grad_norm": 0.0006295020560277422, "learning_rate": 1.6054421768707484e-06, "loss": 0.0, "step": 13285 }, { "epoch": 0.8562222079010118, "grad_norm": 0.000463167640538834, "learning_rate": 1.6047261009667025e-06, "loss": 0.0, "step": 13286 }, { "epoch": 0.856286653347941, "grad_norm": 0.003007813351736707, "learning_rate": 1.6040100250626568e-06, "loss": 0.0, "step": 13287 }, { "epoch": 0.8563510987948701, "grad_norm": 0.008826912320212115, "learning_rate": 1.6032939491586109e-06, "loss": 0.0, "step": 13288 }, { "epoch": 0.8564155442417993, "grad_norm": 0.004008362923994413, "learning_rate": 1.6025778732545652e-06, "loss": 0.0, "step": 13289 }, { "epoch": 0.8564799896887285, "grad_norm": 0.00025324509025633033, "learning_rate": 1.6018617973505193e-06, "loss": 0.0, "step": 13290 }, { "epoch": 0.8565444351356577, "grad_norm": 0.009025551561342875, "learning_rate": 1.6011457214464736e-06, "loss": 0.0, "step": 13291 }, { "epoch": 0.8566088805825869, "grad_norm": 0.15220108443539926, "learning_rate": 1.6004296455424276e-06, "loss": 0.0002, "step": 13292 }, { "epoch": 0.8566733260295161, "grad_norm": 0.0005597975580254473, "learning_rate": 1.5997135696383817e-06, "loss": 0.0, "step": 13293 }, { "epoch": 0.8567377714764451, "grad_norm": 0.2044531426959965, "learning_rate": 1.598997493734336e-06, "loss": 0.0004, "step": 13294 }, { "epoch": 0.8568022169233743, "grad_norm": 0.0065360515775523985, "learning_rate": 1.5982814178302903e-06, "loss": 0.0, "step": 13295 }, { "epoch": 0.8568666623703035, "grad_norm": 0.00010121851515030464, "learning_rate": 1.5975653419262444e-06, "loss": 0.0, "step": 13296 }, { "epoch": 0.8569311078172327, "grad_norm": 0.1027933826978185, "learning_rate": 1.5968492660221985e-06, "loss": 0.0018, "step": 13297 }, { "epoch": 0.8569955532641619, "grad_norm": 0.31212899118972154, "learning_rate": 1.5961331901181526e-06, "loss": 0.0011, "step": 13298 }, { "epoch": 0.8570599987110911, "grad_norm": 0.011037713981860054, "learning_rate": 1.5954171142141067e-06, "loss": 0.0, "step": 13299 }, { "epoch": 0.8571244441580202, "grad_norm": 0.17396643847202156, "learning_rate": 1.5947010383100612e-06, "loss": 0.0002, "step": 13300 }, { "epoch": 0.8571888896049494, "grad_norm": 0.27335187388744736, "learning_rate": 1.5939849624060153e-06, "loss": 0.0031, "step": 13301 }, { "epoch": 0.8572533350518786, "grad_norm": 0.5506759164068347, "learning_rate": 1.5932688865019694e-06, "loss": 0.0027, "step": 13302 }, { "epoch": 0.8573177804988078, "grad_norm": 0.038104221039077645, "learning_rate": 1.5925528105979234e-06, "loss": 0.0002, "step": 13303 }, { "epoch": 0.857382225945737, "grad_norm": 0.03073348441255423, "learning_rate": 1.5918367346938775e-06, "loss": 0.0, "step": 13304 }, { "epoch": 0.8574466713926661, "grad_norm": 0.003336774922123674, "learning_rate": 1.5911206587898318e-06, "loss": 0.0, "step": 13305 }, { "epoch": 0.8575111168395952, "grad_norm": 0.019049603577953083, "learning_rate": 1.5904045828857861e-06, "loss": 0.0, "step": 13306 }, { "epoch": 0.8575755622865244, "grad_norm": 0.006879444816098133, "learning_rate": 1.5896885069817402e-06, "loss": 0.0, "step": 13307 }, { "epoch": 0.8576400077334536, "grad_norm": 0.0035166293965754702, "learning_rate": 1.5889724310776943e-06, "loss": 0.0, "step": 13308 }, { "epoch": 0.8577044531803828, "grad_norm": 0.10649776478809318, "learning_rate": 1.5882563551736486e-06, "loss": 0.0012, "step": 13309 }, { "epoch": 0.857768898627312, "grad_norm": 0.03601981388202897, "learning_rate": 1.5875402792696027e-06, "loss": 0.0, "step": 13310 }, { "epoch": 0.8578333440742412, "grad_norm": 0.00010030491278225593, "learning_rate": 1.5868242033655568e-06, "loss": 0.0, "step": 13311 }, { "epoch": 0.8578977895211704, "grad_norm": 0.004006094877726113, "learning_rate": 1.586108127461511e-06, "loss": 0.0, "step": 13312 }, { "epoch": 0.8579622349680995, "grad_norm": 0.01206481644215532, "learning_rate": 1.5853920515574654e-06, "loss": 0.0, "step": 13313 }, { "epoch": 0.8580266804150287, "grad_norm": 0.0012709160823947188, "learning_rate": 1.5846759756534195e-06, "loss": 0.0, "step": 13314 }, { "epoch": 0.8580911258619579, "grad_norm": 0.0005756146179106836, "learning_rate": 1.5839598997493736e-06, "loss": 0.0, "step": 13315 }, { "epoch": 0.858155571308887, "grad_norm": 0.005792067297186545, "learning_rate": 1.5832438238453276e-06, "loss": 0.0, "step": 13316 }, { "epoch": 0.8582200167558162, "grad_norm": 0.01710870502387094, "learning_rate": 1.5825277479412817e-06, "loss": 0.0001, "step": 13317 }, { "epoch": 0.8582844622027453, "grad_norm": 0.002173941342640871, "learning_rate": 1.5818116720372362e-06, "loss": 0.0, "step": 13318 }, { "epoch": 0.8583489076496745, "grad_norm": 0.0006425608284127846, "learning_rate": 1.5810955961331903e-06, "loss": 0.0, "step": 13319 }, { "epoch": 0.8584133530966037, "grad_norm": 0.30172473763244206, "learning_rate": 1.5803795202291444e-06, "loss": 0.0016, "step": 13320 }, { "epoch": 0.8584777985435329, "grad_norm": 0.01861099902398579, "learning_rate": 1.5796634443250985e-06, "loss": 0.0, "step": 13321 }, { "epoch": 0.8585422439904621, "grad_norm": 0.0012165400924309016, "learning_rate": 1.5789473684210526e-06, "loss": 0.0, "step": 13322 }, { "epoch": 0.8586066894373913, "grad_norm": 0.0016069345021319553, "learning_rate": 1.5782312925170069e-06, "loss": 0.0, "step": 13323 }, { "epoch": 0.8586711348843205, "grad_norm": 0.011594244425208303, "learning_rate": 1.5775152166129612e-06, "loss": 0.0001, "step": 13324 }, { "epoch": 0.8587355803312496, "grad_norm": 0.0014469452087132976, "learning_rate": 1.5767991407089153e-06, "loss": 0.0, "step": 13325 }, { "epoch": 0.8588000257781788, "grad_norm": 0.4306278862323567, "learning_rate": 1.5760830648048694e-06, "loss": 0.0004, "step": 13326 }, { "epoch": 0.8588644712251079, "grad_norm": 0.003471658113443766, "learning_rate": 1.5753669889008237e-06, "loss": 0.0, "step": 13327 }, { "epoch": 0.8589289166720371, "grad_norm": 0.0004689620724818602, "learning_rate": 1.5746509129967777e-06, "loss": 0.0, "step": 13328 }, { "epoch": 0.8589933621189663, "grad_norm": 0.02153173085345865, "learning_rate": 1.573934837092732e-06, "loss": 0.0001, "step": 13329 }, { "epoch": 0.8590578075658954, "grad_norm": 0.2610378517842289, "learning_rate": 1.5732187611886861e-06, "loss": 0.0006, "step": 13330 }, { "epoch": 0.8591222530128246, "grad_norm": 0.02625268560888587, "learning_rate": 1.5725026852846404e-06, "loss": 0.0001, "step": 13331 }, { "epoch": 0.8591866984597538, "grad_norm": 0.050066121532948815, "learning_rate": 1.5717866093805945e-06, "loss": 0.0022, "step": 13332 }, { "epoch": 0.859251143906683, "grad_norm": 0.0002840123484040217, "learning_rate": 1.5710705334765486e-06, "loss": 0.0, "step": 13333 }, { "epoch": 0.8593155893536122, "grad_norm": 0.016482623120749718, "learning_rate": 1.5703544575725027e-06, "loss": 0.0, "step": 13334 }, { "epoch": 0.8593800348005414, "grad_norm": 0.0861558848682203, "learning_rate": 1.5696383816684572e-06, "loss": 0.0016, "step": 13335 }, { "epoch": 0.8594444802474706, "grad_norm": 0.00206119523180686, "learning_rate": 1.5689223057644113e-06, "loss": 0.0, "step": 13336 }, { "epoch": 0.8595089256943997, "grad_norm": 0.0010139368389927733, "learning_rate": 1.5682062298603654e-06, "loss": 0.0, "step": 13337 }, { "epoch": 0.8595733711413288, "grad_norm": 0.0006872459690220183, "learning_rate": 1.5674901539563195e-06, "loss": 0.0, "step": 13338 }, { "epoch": 0.859637816588258, "grad_norm": 0.0007045706704706853, "learning_rate": 1.5667740780522735e-06, "loss": 0.0, "step": 13339 }, { "epoch": 0.8597022620351872, "grad_norm": 0.05906231015101705, "learning_rate": 1.5660580021482276e-06, "loss": 0.0, "step": 13340 }, { "epoch": 0.8597667074821164, "grad_norm": 0.0011324638062900777, "learning_rate": 1.5653419262441821e-06, "loss": 0.0, "step": 13341 }, { "epoch": 0.8598311529290456, "grad_norm": 0.010297346909910793, "learning_rate": 1.5646258503401362e-06, "loss": 0.0, "step": 13342 }, { "epoch": 0.8598955983759747, "grad_norm": 0.29292520835291497, "learning_rate": 1.5639097744360903e-06, "loss": 0.0003, "step": 13343 }, { "epoch": 0.8599600438229039, "grad_norm": 0.031157416981430704, "learning_rate": 1.5631936985320444e-06, "loss": 0.0016, "step": 13344 }, { "epoch": 0.8600244892698331, "grad_norm": 0.005336673025525772, "learning_rate": 1.5624776226279987e-06, "loss": 0.0, "step": 13345 }, { "epoch": 0.8600889347167623, "grad_norm": 0.0007176590913766336, "learning_rate": 1.5617615467239528e-06, "loss": 0.0, "step": 13346 }, { "epoch": 0.8601533801636915, "grad_norm": 6.578943500745828e-05, "learning_rate": 1.561045470819907e-06, "loss": 0.0, "step": 13347 }, { "epoch": 0.8602178256106207, "grad_norm": 0.0002624024066516301, "learning_rate": 1.5603293949158612e-06, "loss": 0.0, "step": 13348 }, { "epoch": 0.8602822710575497, "grad_norm": 0.09216368719245878, "learning_rate": 1.5596133190118155e-06, "loss": 0.0006, "step": 13349 }, { "epoch": 0.8603467165044789, "grad_norm": 0.003946083988254043, "learning_rate": 1.5588972431077696e-06, "loss": 0.0, "step": 13350 }, { "epoch": 0.8604111619514081, "grad_norm": 0.000322491336789557, "learning_rate": 1.5581811672037236e-06, "loss": 0.0, "step": 13351 }, { "epoch": 0.8604756073983373, "grad_norm": 0.00325187811401634, "learning_rate": 1.5574650912996777e-06, "loss": 0.0, "step": 13352 }, { "epoch": 0.8605400528452665, "grad_norm": 0.02672152168310301, "learning_rate": 1.5567490153956322e-06, "loss": 0.0001, "step": 13353 }, { "epoch": 0.8606044982921957, "grad_norm": 0.0010806659731611257, "learning_rate": 1.5560329394915863e-06, "loss": 0.0, "step": 13354 }, { "epoch": 0.8606689437391248, "grad_norm": 0.0012196018515187096, "learning_rate": 1.5553168635875404e-06, "loss": 0.0, "step": 13355 }, { "epoch": 0.860733389186054, "grad_norm": 0.03056133185486748, "learning_rate": 1.5546007876834945e-06, "loss": 0.0001, "step": 13356 }, { "epoch": 0.8607978346329832, "grad_norm": 0.003190366588636452, "learning_rate": 1.5538847117794486e-06, "loss": 0.0, "step": 13357 }, { "epoch": 0.8608622800799124, "grad_norm": 0.0010321709471120919, "learning_rate": 1.5531686358754029e-06, "loss": 0.0, "step": 13358 }, { "epoch": 0.8609267255268416, "grad_norm": 0.005754727492837118, "learning_rate": 1.5524525599713572e-06, "loss": 0.0, "step": 13359 }, { "epoch": 0.8609911709737708, "grad_norm": 0.02314667772246618, "learning_rate": 1.5517364840673113e-06, "loss": 0.0003, "step": 13360 }, { "epoch": 0.8610556164206998, "grad_norm": 0.006225745757706881, "learning_rate": 1.5510204081632654e-06, "loss": 0.0, "step": 13361 }, { "epoch": 0.861120061867629, "grad_norm": 0.012019083819958221, "learning_rate": 1.5503043322592197e-06, "loss": 0.0, "step": 13362 }, { "epoch": 0.8611845073145582, "grad_norm": 0.000692814239205552, "learning_rate": 1.5495882563551737e-06, "loss": 0.0, "step": 13363 }, { "epoch": 0.8612489527614874, "grad_norm": 0.004901376096575237, "learning_rate": 1.548872180451128e-06, "loss": 0.0, "step": 13364 }, { "epoch": 0.8613133982084166, "grad_norm": 0.02895489313893144, "learning_rate": 1.5481561045470821e-06, "loss": 0.0001, "step": 13365 }, { "epoch": 0.8613778436553458, "grad_norm": 0.05815220203678352, "learning_rate": 1.5474400286430364e-06, "loss": 0.0002, "step": 13366 }, { "epoch": 0.8614422891022749, "grad_norm": 0.0003883691743082568, "learning_rate": 1.5467239527389905e-06, "loss": 0.0, "step": 13367 }, { "epoch": 0.8615067345492041, "grad_norm": 0.0003275088709743321, "learning_rate": 1.5460078768349446e-06, "loss": 0.0, "step": 13368 }, { "epoch": 0.8615711799961333, "grad_norm": 0.0009924672732805403, "learning_rate": 1.5452918009308987e-06, "loss": 0.0, "step": 13369 }, { "epoch": 0.8616356254430625, "grad_norm": 0.0034706501414582956, "learning_rate": 1.5445757250268532e-06, "loss": 0.0, "step": 13370 }, { "epoch": 0.8617000708899917, "grad_norm": 0.08772039352150697, "learning_rate": 1.5438596491228073e-06, "loss": 0.0017, "step": 13371 }, { "epoch": 0.8617645163369207, "grad_norm": 0.0026765761249290475, "learning_rate": 1.5431435732187614e-06, "loss": 0.0, "step": 13372 }, { "epoch": 0.8618289617838499, "grad_norm": 0.0022527401314781322, "learning_rate": 1.5424274973147155e-06, "loss": 0.0, "step": 13373 }, { "epoch": 0.8618934072307791, "grad_norm": 0.012568387233077847, "learning_rate": 1.5417114214106696e-06, "loss": 0.0, "step": 13374 }, { "epoch": 0.8619578526777083, "grad_norm": 0.030271575278257603, "learning_rate": 1.5409953455066236e-06, "loss": 0.0004, "step": 13375 }, { "epoch": 0.8620222981246375, "grad_norm": 4.230431264947589e-05, "learning_rate": 1.5402792696025781e-06, "loss": 0.0, "step": 13376 }, { "epoch": 0.8620867435715667, "grad_norm": 0.0004940029506976715, "learning_rate": 1.5395631936985322e-06, "loss": 0.0, "step": 13377 }, { "epoch": 0.8621511890184959, "grad_norm": 0.0011218007381096433, "learning_rate": 1.5388471177944863e-06, "loss": 0.0, "step": 13378 }, { "epoch": 0.862215634465425, "grad_norm": 0.07873246830828655, "learning_rate": 1.5381310418904404e-06, "loss": 0.0001, "step": 13379 }, { "epoch": 0.8622800799123542, "grad_norm": 0.025534215775952065, "learning_rate": 1.5374149659863947e-06, "loss": 0.0, "step": 13380 }, { "epoch": 0.8623445253592834, "grad_norm": 0.01992692039099067, "learning_rate": 1.5366988900823488e-06, "loss": 0.0002, "step": 13381 }, { "epoch": 0.8624089708062126, "grad_norm": 0.30028876324941983, "learning_rate": 1.535982814178303e-06, "loss": 0.0015, "step": 13382 }, { "epoch": 0.8624734162531417, "grad_norm": 0.0885966816085772, "learning_rate": 1.5352667382742572e-06, "loss": 0.0001, "step": 13383 }, { "epoch": 0.8625378617000709, "grad_norm": 7.695888165750718e-05, "learning_rate": 1.5345506623702115e-06, "loss": 0.0, "step": 13384 }, { "epoch": 0.862602307147, "grad_norm": 0.0010478483803299753, "learning_rate": 1.5338345864661656e-06, "loss": 0.0, "step": 13385 }, { "epoch": 0.8626667525939292, "grad_norm": 0.0009519866972095988, "learning_rate": 1.5331185105621197e-06, "loss": 0.0, "step": 13386 }, { "epoch": 0.8627311980408584, "grad_norm": 0.05243510078726089, "learning_rate": 1.5324024346580737e-06, "loss": 0.0001, "step": 13387 }, { "epoch": 0.8627956434877876, "grad_norm": 0.0004253109465269119, "learning_rate": 1.5316863587540282e-06, "loss": 0.0, "step": 13388 }, { "epoch": 0.8628600889347168, "grad_norm": 0.003103137673456359, "learning_rate": 1.5309702828499823e-06, "loss": 0.0, "step": 13389 }, { "epoch": 0.862924534381646, "grad_norm": 0.0005662525075994763, "learning_rate": 1.5302542069459364e-06, "loss": 0.0, "step": 13390 }, { "epoch": 0.8629889798285751, "grad_norm": 0.0021847116480264747, "learning_rate": 1.5295381310418905e-06, "loss": 0.0, "step": 13391 }, { "epoch": 0.8630534252755043, "grad_norm": 0.1484782827970648, "learning_rate": 1.5288220551378446e-06, "loss": 0.0001, "step": 13392 }, { "epoch": 0.8631178707224335, "grad_norm": 0.01737780890753743, "learning_rate": 1.5281059792337987e-06, "loss": 0.0, "step": 13393 }, { "epoch": 0.8631823161693626, "grad_norm": 0.0061111167426056365, "learning_rate": 1.5273899033297532e-06, "loss": 0.0001, "step": 13394 }, { "epoch": 0.8632467616162918, "grad_norm": 6.793325152705395e-05, "learning_rate": 1.5266738274257073e-06, "loss": 0.0, "step": 13395 }, { "epoch": 0.863311207063221, "grad_norm": 0.0013361856110366322, "learning_rate": 1.5259577515216614e-06, "loss": 0.0, "step": 13396 }, { "epoch": 0.8633756525101501, "grad_norm": 0.04722671877927456, "learning_rate": 1.5252416756176155e-06, "loss": 0.0, "step": 13397 }, { "epoch": 0.8634400979570793, "grad_norm": 0.03648959870640535, "learning_rate": 1.5245255997135698e-06, "loss": 0.0001, "step": 13398 }, { "epoch": 0.8635045434040085, "grad_norm": 0.0003418445429188015, "learning_rate": 1.523809523809524e-06, "loss": 0.0, "step": 13399 }, { "epoch": 0.8635689888509377, "grad_norm": 0.0008608845575165467, "learning_rate": 1.5230934479054781e-06, "loss": 0.0, "step": 13400 }, { "epoch": 0.8636334342978669, "grad_norm": 0.002113036385751762, "learning_rate": 1.5223773720014322e-06, "loss": 0.0, "step": 13401 }, { "epoch": 0.8636978797447961, "grad_norm": 0.0015854664134770094, "learning_rate": 1.5216612960973865e-06, "loss": 0.0, "step": 13402 }, { "epoch": 0.8637623251917252, "grad_norm": 0.0069202574918006445, "learning_rate": 1.5209452201933406e-06, "loss": 0.0, "step": 13403 }, { "epoch": 0.8638267706386544, "grad_norm": 0.0801661461210914, "learning_rate": 1.5202291442892947e-06, "loss": 0.0005, "step": 13404 }, { "epoch": 0.8638912160855835, "grad_norm": 0.03243276882707622, "learning_rate": 1.519513068385249e-06, "loss": 0.0017, "step": 13405 }, { "epoch": 0.8639556615325127, "grad_norm": 0.0045153442530477235, "learning_rate": 1.5187969924812033e-06, "loss": 0.0, "step": 13406 }, { "epoch": 0.8640201069794419, "grad_norm": 0.0018214858012334234, "learning_rate": 1.5180809165771574e-06, "loss": 0.0, "step": 13407 }, { "epoch": 0.864084552426371, "grad_norm": 0.006574459754307482, "learning_rate": 1.5173648406731115e-06, "loss": 0.0, "step": 13408 }, { "epoch": 0.8641489978733002, "grad_norm": 0.000748829527636167, "learning_rate": 1.5166487647690656e-06, "loss": 0.0, "step": 13409 }, { "epoch": 0.8642134433202294, "grad_norm": 0.003037122112591288, "learning_rate": 1.5159326888650196e-06, "loss": 0.0, "step": 13410 }, { "epoch": 0.8642778887671586, "grad_norm": 0.0004237258940566272, "learning_rate": 1.5152166129609742e-06, "loss": 0.0, "step": 13411 }, { "epoch": 0.8643423342140878, "grad_norm": 0.03228855131648255, "learning_rate": 1.5145005370569282e-06, "loss": 0.0, "step": 13412 }, { "epoch": 0.864406779661017, "grad_norm": 0.04334934425745581, "learning_rate": 1.5137844611528823e-06, "loss": 0.0001, "step": 13413 }, { "epoch": 0.8644712251079462, "grad_norm": 0.003014186912717113, "learning_rate": 1.5130683852488364e-06, "loss": 0.0, "step": 13414 }, { "epoch": 0.8645356705548753, "grad_norm": 0.0003104792844514378, "learning_rate": 1.5123523093447905e-06, "loss": 0.0, "step": 13415 }, { "epoch": 0.8646001160018044, "grad_norm": 3.002535634592271e-05, "learning_rate": 1.5116362334407448e-06, "loss": 0.0, "step": 13416 }, { "epoch": 0.8646645614487336, "grad_norm": 0.011916289380351773, "learning_rate": 1.510920157536699e-06, "loss": 0.0001, "step": 13417 }, { "epoch": 0.8647290068956628, "grad_norm": 0.00014985182630863873, "learning_rate": 1.5102040816326532e-06, "loss": 0.0, "step": 13418 }, { "epoch": 0.864793452342592, "grad_norm": 0.0009954287758235075, "learning_rate": 1.5094880057286073e-06, "loss": 0.0, "step": 13419 }, { "epoch": 0.8648578977895212, "grad_norm": 0.020676748662305275, "learning_rate": 1.5087719298245616e-06, "loss": 0.0, "step": 13420 }, { "epoch": 0.8649223432364503, "grad_norm": 0.016235645549665126, "learning_rate": 1.5080558539205157e-06, "loss": 0.0001, "step": 13421 }, { "epoch": 0.8649867886833795, "grad_norm": 0.004837391867327394, "learning_rate": 1.5073397780164697e-06, "loss": 0.0, "step": 13422 }, { "epoch": 0.8650512341303087, "grad_norm": 0.024349001979618767, "learning_rate": 1.506623702112424e-06, "loss": 0.0001, "step": 13423 }, { "epoch": 0.8651156795772379, "grad_norm": 0.00068867812212618, "learning_rate": 1.5059076262083783e-06, "loss": 0.0, "step": 13424 }, { "epoch": 0.8651801250241671, "grad_norm": 0.0016578578833495088, "learning_rate": 1.5051915503043324e-06, "loss": 0.0, "step": 13425 }, { "epoch": 0.8652445704710963, "grad_norm": 0.03446109256285739, "learning_rate": 1.5044754744002865e-06, "loss": 0.0001, "step": 13426 }, { "epoch": 0.8653090159180253, "grad_norm": 0.00011652735111931998, "learning_rate": 1.5037593984962406e-06, "loss": 0.0, "step": 13427 }, { "epoch": 0.8653734613649545, "grad_norm": 0.115670160063929, "learning_rate": 1.5030433225921947e-06, "loss": 0.0006, "step": 13428 }, { "epoch": 0.8654379068118837, "grad_norm": 0.0024135343454817215, "learning_rate": 1.5023272466881492e-06, "loss": 0.0, "step": 13429 }, { "epoch": 0.8655023522588129, "grad_norm": 0.017187191238393462, "learning_rate": 1.5016111707841033e-06, "loss": 0.0001, "step": 13430 }, { "epoch": 0.8655667977057421, "grad_norm": 0.35311210110829644, "learning_rate": 1.5008950948800574e-06, "loss": 0.0006, "step": 13431 }, { "epoch": 0.8656312431526713, "grad_norm": 0.3191252663884319, "learning_rate": 1.5001790189760115e-06, "loss": 0.0019, "step": 13432 }, { "epoch": 0.8656956885996004, "grad_norm": 0.04233723504764846, "learning_rate": 1.4994629430719658e-06, "loss": 0.0016, "step": 13433 }, { "epoch": 0.8657601340465296, "grad_norm": 0.00145089825630913, "learning_rate": 1.4987468671679198e-06, "loss": 0.0, "step": 13434 }, { "epoch": 0.8658245794934588, "grad_norm": 0.20710145012059114, "learning_rate": 1.4980307912638741e-06, "loss": 0.0002, "step": 13435 }, { "epoch": 0.865889024940388, "grad_norm": 0.0031344022196231128, "learning_rate": 1.4973147153598282e-06, "loss": 0.0, "step": 13436 }, { "epoch": 0.8659534703873172, "grad_norm": 0.0118111356298611, "learning_rate": 1.4965986394557825e-06, "loss": 0.0, "step": 13437 }, { "epoch": 0.8660179158342464, "grad_norm": 0.0065583660622332745, "learning_rate": 1.4958825635517366e-06, "loss": 0.0, "step": 13438 }, { "epoch": 0.8660823612811754, "grad_norm": 0.2883475069855709, "learning_rate": 1.4951664876476907e-06, "loss": 0.0025, "step": 13439 }, { "epoch": 0.8661468067281046, "grad_norm": 0.005772183739680817, "learning_rate": 1.494450411743645e-06, "loss": 0.0, "step": 13440 }, { "epoch": 0.8662112521750338, "grad_norm": 0.0052695852327754155, "learning_rate": 1.4937343358395993e-06, "loss": 0.0, "step": 13441 }, { "epoch": 0.866275697621963, "grad_norm": 0.0012456353387858528, "learning_rate": 1.4930182599355534e-06, "loss": 0.0, "step": 13442 }, { "epoch": 0.8663401430688922, "grad_norm": 0.0015878721758347974, "learning_rate": 1.4923021840315075e-06, "loss": 0.0, "step": 13443 }, { "epoch": 0.8664045885158214, "grad_norm": 0.014455519841065518, "learning_rate": 1.4915861081274616e-06, "loss": 0.0, "step": 13444 }, { "epoch": 0.8664690339627505, "grad_norm": 0.04454679008845088, "learning_rate": 1.4908700322234157e-06, "loss": 0.0001, "step": 13445 }, { "epoch": 0.8665334794096797, "grad_norm": 0.45188975503978307, "learning_rate": 1.4901539563193702e-06, "loss": 0.003, "step": 13446 }, { "epoch": 0.8665979248566089, "grad_norm": 0.010539871515908213, "learning_rate": 1.4894378804153242e-06, "loss": 0.0001, "step": 13447 }, { "epoch": 0.8666623703035381, "grad_norm": 0.005161306898547368, "learning_rate": 1.4887218045112783e-06, "loss": 0.0, "step": 13448 }, { "epoch": 0.8667268157504673, "grad_norm": 0.0016819094308091322, "learning_rate": 1.4880057286072324e-06, "loss": 0.0, "step": 13449 }, { "epoch": 0.8667912611973964, "grad_norm": 0.13813809970298882, "learning_rate": 1.4872896527031865e-06, "loss": 0.0018, "step": 13450 }, { "epoch": 0.8668557066443255, "grad_norm": 2.5470248661249853e-05, "learning_rate": 1.4865735767991408e-06, "loss": 0.0, "step": 13451 }, { "epoch": 0.8669201520912547, "grad_norm": 0.001220750155884022, "learning_rate": 1.4858575008950951e-06, "loss": 0.0, "step": 13452 }, { "epoch": 0.8669845975381839, "grad_norm": 0.09825203355172227, "learning_rate": 1.4851414249910492e-06, "loss": 0.001, "step": 13453 }, { "epoch": 0.8670490429851131, "grad_norm": 0.0006788445189597282, "learning_rate": 1.4844253490870033e-06, "loss": 0.0, "step": 13454 }, { "epoch": 0.8671134884320423, "grad_norm": 0.040948573152339476, "learning_rate": 1.4837092731829576e-06, "loss": 0.0002, "step": 13455 }, { "epoch": 0.8671779338789715, "grad_norm": 1.2300284915043287, "learning_rate": 1.4829931972789117e-06, "loss": 0.0033, "step": 13456 }, { "epoch": 0.8672423793259006, "grad_norm": 0.0013821472762745976, "learning_rate": 1.4822771213748658e-06, "loss": 0.0, "step": 13457 }, { "epoch": 0.8673068247728298, "grad_norm": 0.0001129702660427535, "learning_rate": 1.48156104547082e-06, "loss": 0.0, "step": 13458 }, { "epoch": 0.867371270219759, "grad_norm": 0.05667991960518738, "learning_rate": 1.4808449695667744e-06, "loss": 0.0002, "step": 13459 }, { "epoch": 0.8674357156666882, "grad_norm": 0.01425209902570669, "learning_rate": 1.4801288936627284e-06, "loss": 0.0001, "step": 13460 }, { "epoch": 0.8675001611136173, "grad_norm": 0.002137552431334319, "learning_rate": 1.4794128177586825e-06, "loss": 0.0, "step": 13461 }, { "epoch": 0.8675646065605465, "grad_norm": 0.13578705766458848, "learning_rate": 1.4786967418546366e-06, "loss": 0.0001, "step": 13462 }, { "epoch": 0.8676290520074756, "grad_norm": 0.0021114395670508357, "learning_rate": 1.4779806659505907e-06, "loss": 0.0, "step": 13463 }, { "epoch": 0.8676934974544048, "grad_norm": 0.00019131494950189957, "learning_rate": 1.4772645900465452e-06, "loss": 0.0, "step": 13464 }, { "epoch": 0.867757942901334, "grad_norm": 0.0035857106761646687, "learning_rate": 1.4765485141424993e-06, "loss": 0.0, "step": 13465 }, { "epoch": 0.8678223883482632, "grad_norm": 0.0001692712253456178, "learning_rate": 1.4758324382384534e-06, "loss": 0.0, "step": 13466 }, { "epoch": 0.8678868337951924, "grad_norm": 0.0005669241135073661, "learning_rate": 1.4751163623344075e-06, "loss": 0.0, "step": 13467 }, { "epoch": 0.8679512792421216, "grad_norm": 0.001889031131247042, "learning_rate": 1.4744002864303616e-06, "loss": 0.0, "step": 13468 }, { "epoch": 0.8680157246890507, "grad_norm": 0.05634150570667161, "learning_rate": 1.4736842105263159e-06, "loss": 0.0001, "step": 13469 }, { "epoch": 0.8680801701359799, "grad_norm": 0.016770040901093828, "learning_rate": 1.4729681346222702e-06, "loss": 0.0, "step": 13470 }, { "epoch": 0.8681446155829091, "grad_norm": 0.007393320511604408, "learning_rate": 1.4722520587182242e-06, "loss": 0.0, "step": 13471 }, { "epoch": 0.8682090610298382, "grad_norm": 0.005720097209988372, "learning_rate": 1.4715359828141783e-06, "loss": 0.0, "step": 13472 }, { "epoch": 0.8682735064767674, "grad_norm": 0.0014226270995623246, "learning_rate": 1.4708199069101326e-06, "loss": 0.0, "step": 13473 }, { "epoch": 0.8683379519236966, "grad_norm": 0.03148733109778701, "learning_rate": 1.4701038310060867e-06, "loss": 0.0001, "step": 13474 }, { "epoch": 0.8684023973706257, "grad_norm": 0.018190387811027366, "learning_rate": 1.469387755102041e-06, "loss": 0.0, "step": 13475 }, { "epoch": 0.8684668428175549, "grad_norm": 0.00022876699057328286, "learning_rate": 1.468671679197995e-06, "loss": 0.0, "step": 13476 }, { "epoch": 0.8685312882644841, "grad_norm": 0.007228422026355488, "learning_rate": 1.4679556032939494e-06, "loss": 0.0, "step": 13477 }, { "epoch": 0.8685957337114133, "grad_norm": 0.022195556988556204, "learning_rate": 1.4672395273899035e-06, "loss": 0.0, "step": 13478 }, { "epoch": 0.8686601791583425, "grad_norm": 0.0018545337949908754, "learning_rate": 1.4665234514858576e-06, "loss": 0.0, "step": 13479 }, { "epoch": 0.8687246246052717, "grad_norm": 0.12720693745515196, "learning_rate": 1.4658073755818117e-06, "loss": 0.0003, "step": 13480 }, { "epoch": 0.8687890700522009, "grad_norm": 0.14163634432106645, "learning_rate": 1.4650912996777662e-06, "loss": 0.0018, "step": 13481 }, { "epoch": 0.86885351549913, "grad_norm": 0.4481010131587821, "learning_rate": 1.4643752237737203e-06, "loss": 0.0029, "step": 13482 }, { "epoch": 0.8689179609460591, "grad_norm": 0.00217015626315937, "learning_rate": 1.4636591478696743e-06, "loss": 0.0, "step": 13483 }, { "epoch": 0.8689824063929883, "grad_norm": 0.0025758466115319723, "learning_rate": 1.4629430719656284e-06, "loss": 0.0, "step": 13484 }, { "epoch": 0.8690468518399175, "grad_norm": 0.44933947723859485, "learning_rate": 1.4622269960615825e-06, "loss": 0.0008, "step": 13485 }, { "epoch": 0.8691112972868467, "grad_norm": 0.006622376254887656, "learning_rate": 1.4615109201575366e-06, "loss": 0.0, "step": 13486 }, { "epoch": 0.8691757427337758, "grad_norm": 0.010118893463182543, "learning_rate": 1.4607948442534911e-06, "loss": 0.0, "step": 13487 }, { "epoch": 0.869240188180705, "grad_norm": 0.002533162206517466, "learning_rate": 1.4600787683494452e-06, "loss": 0.0, "step": 13488 }, { "epoch": 0.8693046336276342, "grad_norm": 0.0046699535377350216, "learning_rate": 1.4593626924453993e-06, "loss": 0.0, "step": 13489 }, { "epoch": 0.8693690790745634, "grad_norm": 0.0006231500933800507, "learning_rate": 1.4586466165413534e-06, "loss": 0.0, "step": 13490 }, { "epoch": 0.8694335245214926, "grad_norm": 0.0129105570295586, "learning_rate": 1.4579305406373077e-06, "loss": 0.0, "step": 13491 }, { "epoch": 0.8694979699684218, "grad_norm": 0.6536391411143632, "learning_rate": 1.4572144647332618e-06, "loss": 0.003, "step": 13492 }, { "epoch": 0.869562415415351, "grad_norm": 0.00645190186822942, "learning_rate": 1.456498388829216e-06, "loss": 0.0, "step": 13493 }, { "epoch": 0.86962686086228, "grad_norm": 0.021776802677437902, "learning_rate": 1.4557823129251701e-06, "loss": 0.0002, "step": 13494 }, { "epoch": 0.8696913063092092, "grad_norm": 0.9354631868333679, "learning_rate": 1.4550662370211244e-06, "loss": 0.0034, "step": 13495 }, { "epoch": 0.8697557517561384, "grad_norm": 0.24174388126058172, "learning_rate": 1.4543501611170785e-06, "loss": 0.002, "step": 13496 }, { "epoch": 0.8698201972030676, "grad_norm": 0.002051742025662557, "learning_rate": 1.4536340852130326e-06, "loss": 0.0, "step": 13497 }, { "epoch": 0.8698846426499968, "grad_norm": 0.03131467117702846, "learning_rate": 1.4529180093089867e-06, "loss": 0.0, "step": 13498 }, { "epoch": 0.869949088096926, "grad_norm": 0.016429974192892383, "learning_rate": 1.4522019334049412e-06, "loss": 0.0002, "step": 13499 }, { "epoch": 0.8700135335438551, "grad_norm": 0.0035691864897899376, "learning_rate": 1.4514858575008953e-06, "loss": 0.0, "step": 13500 }, { "epoch": 0.8700779789907843, "grad_norm": 0.014284719551174541, "learning_rate": 1.4507697815968494e-06, "loss": 0.0, "step": 13501 }, { "epoch": 0.8701424244377135, "grad_norm": 0.0003508945434332547, "learning_rate": 1.4500537056928035e-06, "loss": 0.0, "step": 13502 }, { "epoch": 0.8702068698846427, "grad_norm": 0.005536226193423074, "learning_rate": 1.4493376297887576e-06, "loss": 0.0, "step": 13503 }, { "epoch": 0.8702713153315719, "grad_norm": 0.013431267923642803, "learning_rate": 1.4486215538847119e-06, "loss": 0.0, "step": 13504 }, { "epoch": 0.8703357607785009, "grad_norm": 0.03924292381700065, "learning_rate": 1.4479054779806662e-06, "loss": 0.0, "step": 13505 }, { "epoch": 0.8704002062254301, "grad_norm": 0.014671159050005588, "learning_rate": 1.4471894020766202e-06, "loss": 0.0, "step": 13506 }, { "epoch": 0.8704646516723593, "grad_norm": 0.00044016681878419526, "learning_rate": 1.4464733261725743e-06, "loss": 0.0, "step": 13507 }, { "epoch": 0.8705290971192885, "grad_norm": 0.0027930283003449025, "learning_rate": 1.4457572502685286e-06, "loss": 0.0, "step": 13508 }, { "epoch": 0.8705935425662177, "grad_norm": 0.005349489845537028, "learning_rate": 1.4450411743644827e-06, "loss": 0.0, "step": 13509 }, { "epoch": 0.8706579880131469, "grad_norm": 0.0025331927243357815, "learning_rate": 1.444325098460437e-06, "loss": 0.0, "step": 13510 }, { "epoch": 0.870722433460076, "grad_norm": 0.1705458464266017, "learning_rate": 1.4436090225563911e-06, "loss": 0.0003, "step": 13511 }, { "epoch": 0.8707868789070052, "grad_norm": 0.017371828112596428, "learning_rate": 1.4428929466523454e-06, "loss": 0.0, "step": 13512 }, { "epoch": 0.8708513243539344, "grad_norm": 0.0038565979744293725, "learning_rate": 1.4421768707482995e-06, "loss": 0.0, "step": 13513 }, { "epoch": 0.8709157698008636, "grad_norm": 0.0015431336010864626, "learning_rate": 1.4414607948442536e-06, "loss": 0.0, "step": 13514 }, { "epoch": 0.8709802152477928, "grad_norm": 0.38062584393036586, "learning_rate": 1.4407447189402077e-06, "loss": 0.0007, "step": 13515 }, { "epoch": 0.871044660694722, "grad_norm": 0.002684096329343692, "learning_rate": 1.4400286430361622e-06, "loss": 0.0, "step": 13516 }, { "epoch": 0.871109106141651, "grad_norm": 0.0004909211941232381, "learning_rate": 1.4393125671321163e-06, "loss": 0.0, "step": 13517 }, { "epoch": 0.8711735515885802, "grad_norm": 0.001157450722865243, "learning_rate": 1.4385964912280704e-06, "loss": 0.0, "step": 13518 }, { "epoch": 0.8712379970355094, "grad_norm": 0.0035391587568699614, "learning_rate": 1.4378804153240244e-06, "loss": 0.0, "step": 13519 }, { "epoch": 0.8713024424824386, "grad_norm": 0.00045686885179854926, "learning_rate": 1.4371643394199785e-06, "loss": 0.0, "step": 13520 }, { "epoch": 0.8713668879293678, "grad_norm": 0.0021138812403863616, "learning_rate": 1.4364482635159326e-06, "loss": 0.0, "step": 13521 }, { "epoch": 0.871431333376297, "grad_norm": 0.2128194987620841, "learning_rate": 1.4357321876118871e-06, "loss": 0.0006, "step": 13522 }, { "epoch": 0.8714957788232262, "grad_norm": 0.01931788608564039, "learning_rate": 1.4350161117078412e-06, "loss": 0.0, "step": 13523 }, { "epoch": 0.8715602242701553, "grad_norm": 0.0003648647198489776, "learning_rate": 1.4343000358037953e-06, "loss": 0.0, "step": 13524 }, { "epoch": 0.8716246697170845, "grad_norm": 0.004620281109182782, "learning_rate": 1.4335839598997494e-06, "loss": 0.0, "step": 13525 }, { "epoch": 0.8716891151640137, "grad_norm": 0.017122646764027347, "learning_rate": 1.4328678839957037e-06, "loss": 0.0, "step": 13526 }, { "epoch": 0.8717535606109429, "grad_norm": 0.0012365848463775104, "learning_rate": 1.4321518080916578e-06, "loss": 0.0, "step": 13527 }, { "epoch": 0.871818006057872, "grad_norm": 0.0016554455275155562, "learning_rate": 1.431435732187612e-06, "loss": 0.0, "step": 13528 }, { "epoch": 0.8718824515048011, "grad_norm": 0.018926993280560703, "learning_rate": 1.4307196562835662e-06, "loss": 0.0015, "step": 13529 }, { "epoch": 0.8719468969517303, "grad_norm": 0.014421366017851302, "learning_rate": 1.4300035803795205e-06, "loss": 0.0001, "step": 13530 }, { "epoch": 0.8720113423986595, "grad_norm": 0.0012043756470092263, "learning_rate": 1.4292875044754745e-06, "loss": 0.0, "step": 13531 }, { "epoch": 0.8720757878455887, "grad_norm": 0.06249889730434415, "learning_rate": 1.4285714285714286e-06, "loss": 0.0001, "step": 13532 }, { "epoch": 0.8721402332925179, "grad_norm": 0.00483204331224003, "learning_rate": 1.4278553526673827e-06, "loss": 0.0, "step": 13533 }, { "epoch": 0.8722046787394471, "grad_norm": 0.016188550479828548, "learning_rate": 1.4271392767633372e-06, "loss": 0.0001, "step": 13534 }, { "epoch": 0.8722691241863763, "grad_norm": 0.2588630282665265, "learning_rate": 1.4264232008592913e-06, "loss": 0.0022, "step": 13535 }, { "epoch": 0.8723335696333054, "grad_norm": 0.017522219661073486, "learning_rate": 1.4257071249552454e-06, "loss": 0.0, "step": 13536 }, { "epoch": 0.8723980150802346, "grad_norm": 4.777473903327223, "learning_rate": 1.4249910490511995e-06, "loss": 0.0184, "step": 13537 }, { "epoch": 0.8724624605271638, "grad_norm": 0.1122914928480347, "learning_rate": 1.4242749731471536e-06, "loss": 0.0, "step": 13538 }, { "epoch": 0.8725269059740929, "grad_norm": 0.0003457079673620274, "learning_rate": 1.4235588972431077e-06, "loss": 0.0, "step": 13539 }, { "epoch": 0.8725913514210221, "grad_norm": 0.008842368697230442, "learning_rate": 1.4228428213390622e-06, "loss": 0.0, "step": 13540 }, { "epoch": 0.8726557968679513, "grad_norm": 0.16889078991419842, "learning_rate": 1.4221267454350163e-06, "loss": 0.0005, "step": 13541 }, { "epoch": 0.8727202423148804, "grad_norm": 0.09180406278511782, "learning_rate": 1.4214106695309703e-06, "loss": 0.0001, "step": 13542 }, { "epoch": 0.8727846877618096, "grad_norm": 0.0002542434149114111, "learning_rate": 1.4206945936269244e-06, "loss": 0.0, "step": 13543 }, { "epoch": 0.8728491332087388, "grad_norm": 0.00021139095458569033, "learning_rate": 1.4199785177228787e-06, "loss": 0.0, "step": 13544 }, { "epoch": 0.872913578655668, "grad_norm": 0.006701556482451194, "learning_rate": 1.419262441818833e-06, "loss": 0.0, "step": 13545 }, { "epoch": 0.8729780241025972, "grad_norm": 0.0005409555739583791, "learning_rate": 1.4185463659147871e-06, "loss": 0.0, "step": 13546 }, { "epoch": 0.8730424695495264, "grad_norm": 0.29029280227176923, "learning_rate": 1.4178302900107412e-06, "loss": 0.0005, "step": 13547 }, { "epoch": 0.8731069149964555, "grad_norm": 0.003792905193337104, "learning_rate": 1.4171142141066955e-06, "loss": 0.0, "step": 13548 }, { "epoch": 0.8731713604433847, "grad_norm": 0.027987387467200055, "learning_rate": 1.4163981382026496e-06, "loss": 0.0, "step": 13549 }, { "epoch": 0.8732358058903138, "grad_norm": 0.004768562503807217, "learning_rate": 1.4156820622986037e-06, "loss": 0.0, "step": 13550 }, { "epoch": 0.873300251337243, "grad_norm": 0.0008558681731003965, "learning_rate": 1.414965986394558e-06, "loss": 0.0, "step": 13551 }, { "epoch": 0.8733646967841722, "grad_norm": 0.09874931857320618, "learning_rate": 1.4142499104905123e-06, "loss": 0.0001, "step": 13552 }, { "epoch": 0.8734291422311014, "grad_norm": 0.5997927258018302, "learning_rate": 1.4135338345864664e-06, "loss": 0.0015, "step": 13553 }, { "epoch": 0.8734935876780305, "grad_norm": 0.06590779190624765, "learning_rate": 1.4128177586824204e-06, "loss": 0.0001, "step": 13554 }, { "epoch": 0.8735580331249597, "grad_norm": 0.010813035118637227, "learning_rate": 1.4121016827783745e-06, "loss": 0.0001, "step": 13555 }, { "epoch": 0.8736224785718889, "grad_norm": 0.0007776535748360822, "learning_rate": 1.4113856068743286e-06, "loss": 0.0, "step": 13556 }, { "epoch": 0.8736869240188181, "grad_norm": 0.514541151528957, "learning_rate": 1.4106695309702831e-06, "loss": 0.0216, "step": 13557 }, { "epoch": 0.8737513694657473, "grad_norm": 0.0011680135680204743, "learning_rate": 1.4099534550662372e-06, "loss": 0.0, "step": 13558 }, { "epoch": 0.8738158149126765, "grad_norm": 0.001274273368860219, "learning_rate": 1.4092373791621913e-06, "loss": 0.0, "step": 13559 }, { "epoch": 0.8738802603596056, "grad_norm": 0.0016573716185651368, "learning_rate": 1.4085213032581454e-06, "loss": 0.0, "step": 13560 }, { "epoch": 0.8739447058065347, "grad_norm": 0.0005385912905206142, "learning_rate": 1.4078052273540995e-06, "loss": 0.0, "step": 13561 }, { "epoch": 0.8740091512534639, "grad_norm": 0.09338146530950199, "learning_rate": 1.4070891514500538e-06, "loss": 0.0002, "step": 13562 }, { "epoch": 0.8740735967003931, "grad_norm": 0.0030035474103590263, "learning_rate": 1.406373075546008e-06, "loss": 0.0, "step": 13563 }, { "epoch": 0.8741380421473223, "grad_norm": 0.0005251321157146958, "learning_rate": 1.4056569996419622e-06, "loss": 0.0, "step": 13564 }, { "epoch": 0.8742024875942515, "grad_norm": 0.0021644242133924342, "learning_rate": 1.4049409237379162e-06, "loss": 0.0, "step": 13565 }, { "epoch": 0.8742669330411806, "grad_norm": 0.0002113496909306623, "learning_rate": 1.4042248478338705e-06, "loss": 0.0, "step": 13566 }, { "epoch": 0.8743313784881098, "grad_norm": 0.002869868115987156, "learning_rate": 1.4035087719298246e-06, "loss": 0.0, "step": 13567 }, { "epoch": 0.874395823935039, "grad_norm": 3.127468659924674, "learning_rate": 1.4027926960257787e-06, "loss": 0.0123, "step": 13568 }, { "epoch": 0.8744602693819682, "grad_norm": 0.08067815140353134, "learning_rate": 1.402076620121733e-06, "loss": 0.0002, "step": 13569 }, { "epoch": 0.8745247148288974, "grad_norm": 0.017830586287385566, "learning_rate": 1.4013605442176873e-06, "loss": 0.0, "step": 13570 }, { "epoch": 0.8745891602758266, "grad_norm": 0.006552162045926843, "learning_rate": 1.4006444683136414e-06, "loss": 0.0, "step": 13571 }, { "epoch": 0.8746536057227556, "grad_norm": 8.130109325365869e-05, "learning_rate": 1.3999283924095955e-06, "loss": 0.0, "step": 13572 }, { "epoch": 0.8747180511696848, "grad_norm": 0.00481453068839515, "learning_rate": 1.3992123165055496e-06, "loss": 0.0, "step": 13573 }, { "epoch": 0.874782496616614, "grad_norm": 0.0016966316384423679, "learning_rate": 1.3984962406015037e-06, "loss": 0.0, "step": 13574 }, { "epoch": 0.8748469420635432, "grad_norm": 6.507700313948872e-05, "learning_rate": 1.3977801646974582e-06, "loss": 0.0, "step": 13575 }, { "epoch": 0.8749113875104724, "grad_norm": 0.0010351706398880871, "learning_rate": 1.3970640887934123e-06, "loss": 0.0, "step": 13576 }, { "epoch": 0.8749758329574016, "grad_norm": 0.002042297500884413, "learning_rate": 1.3963480128893664e-06, "loss": 0.0, "step": 13577 }, { "epoch": 0.8750402784043307, "grad_norm": 0.00035755874045150096, "learning_rate": 1.3956319369853204e-06, "loss": 0.0, "step": 13578 }, { "epoch": 0.8751047238512599, "grad_norm": 0.0007519831593113182, "learning_rate": 1.3949158610812747e-06, "loss": 0.0, "step": 13579 }, { "epoch": 0.8751691692981891, "grad_norm": 0.01997057158629694, "learning_rate": 1.394199785177229e-06, "loss": 0.0015, "step": 13580 }, { "epoch": 0.8752336147451183, "grad_norm": 0.00042799543401737973, "learning_rate": 1.3934837092731831e-06, "loss": 0.0, "step": 13581 }, { "epoch": 0.8752980601920475, "grad_norm": 0.003192598232773263, "learning_rate": 1.3927676333691372e-06, "loss": 0.0, "step": 13582 }, { "epoch": 0.8753625056389766, "grad_norm": 0.00041092294582336747, "learning_rate": 1.3920515574650915e-06, "loss": 0.0, "step": 13583 }, { "epoch": 0.8754269510859057, "grad_norm": 0.00015048739275314575, "learning_rate": 1.3913354815610456e-06, "loss": 0.0, "step": 13584 }, { "epoch": 0.8754913965328349, "grad_norm": 0.00040192569485888335, "learning_rate": 1.3906194056569997e-06, "loss": 0.0, "step": 13585 }, { "epoch": 0.8755558419797641, "grad_norm": 0.003118728490399756, "learning_rate": 1.389903329752954e-06, "loss": 0.0, "step": 13586 }, { "epoch": 0.8756202874266933, "grad_norm": 0.028921511163939883, "learning_rate": 1.3891872538489083e-06, "loss": 0.0003, "step": 13587 }, { "epoch": 0.8756847328736225, "grad_norm": 0.0007635765470389747, "learning_rate": 1.3884711779448624e-06, "loss": 0.0, "step": 13588 }, { "epoch": 0.8757491783205517, "grad_norm": 0.019386178158575473, "learning_rate": 1.3877551020408165e-06, "loss": 0.0001, "step": 13589 }, { "epoch": 0.8758136237674808, "grad_norm": 0.008952326877814495, "learning_rate": 1.3870390261367705e-06, "loss": 0.0, "step": 13590 }, { "epoch": 0.87587806921441, "grad_norm": 0.0064969913757071, "learning_rate": 1.3863229502327246e-06, "loss": 0.0, "step": 13591 }, { "epoch": 0.8759425146613392, "grad_norm": 0.24652262056953084, "learning_rate": 1.3856068743286791e-06, "loss": 0.0098, "step": 13592 }, { "epoch": 0.8760069601082684, "grad_norm": 0.008717637446990048, "learning_rate": 1.3848907984246332e-06, "loss": 0.0, "step": 13593 }, { "epoch": 0.8760714055551976, "grad_norm": 0.1055407587350373, "learning_rate": 1.3841747225205873e-06, "loss": 0.0001, "step": 13594 }, { "epoch": 0.8761358510021267, "grad_norm": 0.013464757423937554, "learning_rate": 1.3834586466165414e-06, "loss": 0.0, "step": 13595 }, { "epoch": 0.8762002964490558, "grad_norm": 0.0014337143804371206, "learning_rate": 1.3827425707124955e-06, "loss": 0.0, "step": 13596 }, { "epoch": 0.876264741895985, "grad_norm": 0.4674874787515268, "learning_rate": 1.3820264948084498e-06, "loss": 0.0015, "step": 13597 }, { "epoch": 0.8763291873429142, "grad_norm": 0.00098483335952236, "learning_rate": 1.381310418904404e-06, "loss": 0.0, "step": 13598 }, { "epoch": 0.8763936327898434, "grad_norm": 0.09651554507659374, "learning_rate": 1.3805943430003582e-06, "loss": 0.0002, "step": 13599 }, { "epoch": 0.8764580782367726, "grad_norm": 0.05975753060859266, "learning_rate": 1.3798782670963123e-06, "loss": 0.0006, "step": 13600 }, { "epoch": 0.8765225236837018, "grad_norm": 0.0008181424472536412, "learning_rate": 1.3791621911922666e-06, "loss": 0.0, "step": 13601 }, { "epoch": 0.8765869691306309, "grad_norm": 0.0007309612300930143, "learning_rate": 1.3784461152882206e-06, "loss": 0.0, "step": 13602 }, { "epoch": 0.8766514145775601, "grad_norm": 0.0004453427397565943, "learning_rate": 1.3777300393841747e-06, "loss": 0.0, "step": 13603 }, { "epoch": 0.8767158600244893, "grad_norm": 0.0013161070099909815, "learning_rate": 1.377013963480129e-06, "loss": 0.0, "step": 13604 }, { "epoch": 0.8767803054714185, "grad_norm": 8.2985330607004e-05, "learning_rate": 1.3762978875760833e-06, "loss": 0.0, "step": 13605 }, { "epoch": 0.8768447509183476, "grad_norm": 0.0014163201861122309, "learning_rate": 1.3755818116720374e-06, "loss": 0.0, "step": 13606 }, { "epoch": 0.8769091963652768, "grad_norm": 0.006504134348321924, "learning_rate": 1.3748657357679915e-06, "loss": 0.0, "step": 13607 }, { "epoch": 0.8769736418122059, "grad_norm": 0.004427846073773377, "learning_rate": 1.3741496598639456e-06, "loss": 0.0, "step": 13608 }, { "epoch": 0.8770380872591351, "grad_norm": 8.069661349110574e-05, "learning_rate": 1.3734335839598997e-06, "loss": 0.0, "step": 13609 }, { "epoch": 0.8771025327060643, "grad_norm": 0.00013724066858743323, "learning_rate": 1.3727175080558542e-06, "loss": 0.0, "step": 13610 }, { "epoch": 0.8771669781529935, "grad_norm": 1.0793487189499922e-05, "learning_rate": 1.3720014321518083e-06, "loss": 0.0, "step": 13611 }, { "epoch": 0.8772314235999227, "grad_norm": 0.016116090551314, "learning_rate": 1.3712853562477624e-06, "loss": 0.0001, "step": 13612 }, { "epoch": 0.8772958690468519, "grad_norm": 0.0044309911209259294, "learning_rate": 1.3705692803437164e-06, "loss": 0.0, "step": 13613 }, { "epoch": 0.877360314493781, "grad_norm": 0.00038663612199369644, "learning_rate": 1.3698532044396705e-06, "loss": 0.0, "step": 13614 }, { "epoch": 0.8774247599407102, "grad_norm": 0.00034108201632122604, "learning_rate": 1.369137128535625e-06, "loss": 0.0, "step": 13615 }, { "epoch": 0.8774892053876394, "grad_norm": 0.0007516695032004047, "learning_rate": 1.3684210526315791e-06, "loss": 0.0, "step": 13616 }, { "epoch": 0.8775536508345685, "grad_norm": 0.000810677722890353, "learning_rate": 1.3677049767275332e-06, "loss": 0.0, "step": 13617 }, { "epoch": 0.8776180962814977, "grad_norm": 0.0004293978015281151, "learning_rate": 1.3669889008234873e-06, "loss": 0.0, "step": 13618 }, { "epoch": 0.8776825417284269, "grad_norm": 0.09996970768416442, "learning_rate": 1.3662728249194416e-06, "loss": 0.0018, "step": 13619 }, { "epoch": 0.877746987175356, "grad_norm": 0.009210777517721869, "learning_rate": 1.3655567490153957e-06, "loss": 0.0, "step": 13620 }, { "epoch": 0.8778114326222852, "grad_norm": 2.0031004060201308e-05, "learning_rate": 1.36484067311135e-06, "loss": 0.0, "step": 13621 }, { "epoch": 0.8778758780692144, "grad_norm": 0.0002399283084197288, "learning_rate": 1.364124597207304e-06, "loss": 0.0, "step": 13622 }, { "epoch": 0.8779403235161436, "grad_norm": 0.0014824010634681267, "learning_rate": 1.3634085213032584e-06, "loss": 0.0, "step": 13623 }, { "epoch": 0.8780047689630728, "grad_norm": 0.0006106176764788257, "learning_rate": 1.3626924453992125e-06, "loss": 0.0, "step": 13624 }, { "epoch": 0.878069214410002, "grad_norm": 0.009545374588239326, "learning_rate": 1.3619763694951665e-06, "loss": 0.0, "step": 13625 }, { "epoch": 0.8781336598569311, "grad_norm": 0.00035692053622342487, "learning_rate": 1.3612602935911206e-06, "loss": 0.0, "step": 13626 }, { "epoch": 0.8781981053038603, "grad_norm": 0.0086509276995505, "learning_rate": 1.3605442176870751e-06, "loss": 0.0, "step": 13627 }, { "epoch": 0.8782625507507894, "grad_norm": 0.00010314129543442811, "learning_rate": 1.3598281417830292e-06, "loss": 0.0, "step": 13628 }, { "epoch": 0.8783269961977186, "grad_norm": 0.01060045892036667, "learning_rate": 1.3591120658789833e-06, "loss": 0.0, "step": 13629 }, { "epoch": 0.8783914416446478, "grad_norm": 0.0026332486628218815, "learning_rate": 1.3583959899749374e-06, "loss": 0.0, "step": 13630 }, { "epoch": 0.878455887091577, "grad_norm": 0.004919190051721374, "learning_rate": 1.3576799140708915e-06, "loss": 0.0, "step": 13631 }, { "epoch": 0.8785203325385061, "grad_norm": 0.005739669680137454, "learning_rate": 1.3569638381668456e-06, "loss": 0.0, "step": 13632 }, { "epoch": 0.8785847779854353, "grad_norm": 0.12541555085615214, "learning_rate": 1.3562477622628e-06, "loss": 0.0016, "step": 13633 }, { "epoch": 0.8786492234323645, "grad_norm": 0.009859135451052567, "learning_rate": 1.3555316863587542e-06, "loss": 0.0001, "step": 13634 }, { "epoch": 0.8787136688792937, "grad_norm": 0.28341660181746003, "learning_rate": 1.3548156104547083e-06, "loss": 0.0006, "step": 13635 }, { "epoch": 0.8787781143262229, "grad_norm": 0.0016875942051804806, "learning_rate": 1.3540995345506624e-06, "loss": 0.0, "step": 13636 }, { "epoch": 0.8788425597731521, "grad_norm": 0.0005612762262681204, "learning_rate": 1.3533834586466167e-06, "loss": 0.0, "step": 13637 }, { "epoch": 0.8789070052200812, "grad_norm": 0.01833501293870889, "learning_rate": 1.3526673827425707e-06, "loss": 0.0, "step": 13638 }, { "epoch": 0.8789714506670103, "grad_norm": 0.0016657878041175568, "learning_rate": 1.351951306838525e-06, "loss": 0.0, "step": 13639 }, { "epoch": 0.8790358961139395, "grad_norm": 0.008593055425421249, "learning_rate": 1.3512352309344791e-06, "loss": 0.0, "step": 13640 }, { "epoch": 0.8791003415608687, "grad_norm": 0.022741383577491645, "learning_rate": 1.3505191550304334e-06, "loss": 0.0002, "step": 13641 }, { "epoch": 0.8791647870077979, "grad_norm": 0.00012513328344494333, "learning_rate": 1.3498030791263875e-06, "loss": 0.0, "step": 13642 }, { "epoch": 0.8792292324547271, "grad_norm": 0.0015158990746431459, "learning_rate": 1.3490870032223416e-06, "loss": 0.0, "step": 13643 }, { "epoch": 0.8792936779016562, "grad_norm": 0.0001142028649185917, "learning_rate": 1.3483709273182957e-06, "loss": 0.0, "step": 13644 }, { "epoch": 0.8793581233485854, "grad_norm": 0.00018615067818089466, "learning_rate": 1.3476548514142502e-06, "loss": 0.0, "step": 13645 }, { "epoch": 0.8794225687955146, "grad_norm": 0.0016702490090621004, "learning_rate": 1.3469387755102043e-06, "loss": 0.0, "step": 13646 }, { "epoch": 0.8794870142424438, "grad_norm": 0.014081349773854846, "learning_rate": 1.3462226996061584e-06, "loss": 0.0, "step": 13647 }, { "epoch": 0.879551459689373, "grad_norm": 0.35911500897104526, "learning_rate": 1.3455066237021125e-06, "loss": 0.0003, "step": 13648 }, { "epoch": 0.8796159051363022, "grad_norm": 0.0004700586450096311, "learning_rate": 1.3447905477980665e-06, "loss": 0.0, "step": 13649 }, { "epoch": 0.8796803505832312, "grad_norm": 0.007744483732701911, "learning_rate": 1.3440744718940208e-06, "loss": 0.0001, "step": 13650 }, { "epoch": 0.8797447960301604, "grad_norm": 0.0002823123017943778, "learning_rate": 1.3433583959899751e-06, "loss": 0.0, "step": 13651 }, { "epoch": 0.8798092414770896, "grad_norm": 0.0020454451113791103, "learning_rate": 1.3426423200859292e-06, "loss": 0.0, "step": 13652 }, { "epoch": 0.8798736869240188, "grad_norm": 0.011770034371449568, "learning_rate": 1.3419262441818833e-06, "loss": 0.0, "step": 13653 }, { "epoch": 0.879938132370948, "grad_norm": 0.0009106569832223424, "learning_rate": 1.3412101682778376e-06, "loss": 0.0, "step": 13654 }, { "epoch": 0.8800025778178772, "grad_norm": 0.00044701932244403723, "learning_rate": 1.3404940923737917e-06, "loss": 0.0, "step": 13655 }, { "epoch": 0.8800670232648063, "grad_norm": 0.0013901423332051905, "learning_rate": 1.339778016469746e-06, "loss": 0.0, "step": 13656 }, { "epoch": 0.8801314687117355, "grad_norm": 0.024266204133080428, "learning_rate": 1.3390619405657e-06, "loss": 0.0, "step": 13657 }, { "epoch": 0.8801959141586647, "grad_norm": 0.003448365666168964, "learning_rate": 1.3383458646616544e-06, "loss": 0.0, "step": 13658 }, { "epoch": 0.8802603596055939, "grad_norm": 0.25593555568473547, "learning_rate": 1.3376297887576085e-06, "loss": 0.0009, "step": 13659 }, { "epoch": 0.8803248050525231, "grad_norm": 0.00023793433604057834, "learning_rate": 1.3369137128535626e-06, "loss": 0.0, "step": 13660 }, { "epoch": 0.8803892504994522, "grad_norm": 0.024291282911824568, "learning_rate": 1.3361976369495166e-06, "loss": 0.0, "step": 13661 }, { "epoch": 0.8804536959463813, "grad_norm": 0.18787491587994204, "learning_rate": 1.3354815610454712e-06, "loss": 0.0014, "step": 13662 }, { "epoch": 0.8805181413933105, "grad_norm": 0.00011337929585352012, "learning_rate": 1.3347654851414252e-06, "loss": 0.0, "step": 13663 }, { "epoch": 0.8805825868402397, "grad_norm": 0.00015175064550529223, "learning_rate": 1.3340494092373793e-06, "loss": 0.0, "step": 13664 }, { "epoch": 0.8806470322871689, "grad_norm": 0.012094931872168792, "learning_rate": 1.3333333333333334e-06, "loss": 0.0, "step": 13665 }, { "epoch": 0.8807114777340981, "grad_norm": 0.008657847704660484, "learning_rate": 1.3326172574292875e-06, "loss": 0.0001, "step": 13666 }, { "epoch": 0.8807759231810273, "grad_norm": 0.0029345671366418132, "learning_rate": 1.3319011815252416e-06, "loss": 0.0, "step": 13667 }, { "epoch": 0.8808403686279564, "grad_norm": 0.00023515681443448538, "learning_rate": 1.331185105621196e-06, "loss": 0.0, "step": 13668 }, { "epoch": 0.8809048140748856, "grad_norm": 0.0001054609110638124, "learning_rate": 1.3304690297171502e-06, "loss": 0.0, "step": 13669 }, { "epoch": 0.8809692595218148, "grad_norm": 0.0027009243656194505, "learning_rate": 1.3297529538131043e-06, "loss": 0.0, "step": 13670 }, { "epoch": 0.881033704968744, "grad_norm": 0.00012685416549973138, "learning_rate": 1.3290368779090584e-06, "loss": 0.0, "step": 13671 }, { "epoch": 0.8810981504156732, "grad_norm": 0.3780272996016023, "learning_rate": 1.3283208020050127e-06, "loss": 0.0023, "step": 13672 }, { "epoch": 0.8811625958626023, "grad_norm": 0.10550351364688096, "learning_rate": 1.3276047261009667e-06, "loss": 0.0001, "step": 13673 }, { "epoch": 0.8812270413095314, "grad_norm": 0.0005343506373210069, "learning_rate": 1.326888650196921e-06, "loss": 0.0, "step": 13674 }, { "epoch": 0.8812914867564606, "grad_norm": 0.006842115762950535, "learning_rate": 1.3261725742928751e-06, "loss": 0.0, "step": 13675 }, { "epoch": 0.8813559322033898, "grad_norm": 0.0013787853414366827, "learning_rate": 1.3254564983888294e-06, "loss": 0.0, "step": 13676 }, { "epoch": 0.881420377650319, "grad_norm": 0.6943399292392509, "learning_rate": 1.3247404224847835e-06, "loss": 0.0052, "step": 13677 }, { "epoch": 0.8814848230972482, "grad_norm": 0.001786099160118304, "learning_rate": 1.3240243465807376e-06, "loss": 0.0, "step": 13678 }, { "epoch": 0.8815492685441774, "grad_norm": 0.0029717481777851596, "learning_rate": 1.3233082706766917e-06, "loss": 0.0, "step": 13679 }, { "epoch": 0.8816137139911066, "grad_norm": 6.38256307935207e-05, "learning_rate": 1.3225921947726462e-06, "loss": 0.0, "step": 13680 }, { "epoch": 0.8816781594380357, "grad_norm": 0.0002884480060583603, "learning_rate": 1.3218761188686003e-06, "loss": 0.0, "step": 13681 }, { "epoch": 0.8817426048849649, "grad_norm": 0.1623689515082662, "learning_rate": 1.3211600429645544e-06, "loss": 0.0002, "step": 13682 }, { "epoch": 0.8818070503318941, "grad_norm": 0.04362992253845935, "learning_rate": 1.3204439670605085e-06, "loss": 0.0001, "step": 13683 }, { "epoch": 0.8818714957788232, "grad_norm": 0.22108830118373307, "learning_rate": 1.3197278911564625e-06, "loss": 0.0004, "step": 13684 }, { "epoch": 0.8819359412257524, "grad_norm": 0.0003036379952213748, "learning_rate": 1.3190118152524166e-06, "loss": 0.0, "step": 13685 }, { "epoch": 0.8820003866726815, "grad_norm": 0.008215158033441384, "learning_rate": 1.3182957393483711e-06, "loss": 0.0001, "step": 13686 }, { "epoch": 0.8820648321196107, "grad_norm": 0.000746369885476597, "learning_rate": 1.3175796634443252e-06, "loss": 0.0, "step": 13687 }, { "epoch": 0.8821292775665399, "grad_norm": 0.2331175370722083, "learning_rate": 1.3168635875402793e-06, "loss": 0.0001, "step": 13688 }, { "epoch": 0.8821937230134691, "grad_norm": 3.3127325739645425e-05, "learning_rate": 1.3161475116362334e-06, "loss": 0.0, "step": 13689 }, { "epoch": 0.8822581684603983, "grad_norm": 0.00046292881256141657, "learning_rate": 1.3154314357321877e-06, "loss": 0.0, "step": 13690 }, { "epoch": 0.8823226139073275, "grad_norm": 2.4879745552462693e-05, "learning_rate": 1.314715359828142e-06, "loss": 0.0, "step": 13691 }, { "epoch": 0.8823870593542567, "grad_norm": 0.024843685636272067, "learning_rate": 1.313999283924096e-06, "loss": 0.0, "step": 13692 }, { "epoch": 0.8824515048011858, "grad_norm": 0.0033880595142095345, "learning_rate": 1.3132832080200502e-06, "loss": 0.0, "step": 13693 }, { "epoch": 0.882515950248115, "grad_norm": 0.005841345105160331, "learning_rate": 1.3125671321160045e-06, "loss": 0.0, "step": 13694 }, { "epoch": 0.8825803956950441, "grad_norm": 0.0034708352770504813, "learning_rate": 1.3118510562119586e-06, "loss": 0.0, "step": 13695 }, { "epoch": 0.8826448411419733, "grad_norm": 0.0011531345218125334, "learning_rate": 1.3111349803079127e-06, "loss": 0.0, "step": 13696 }, { "epoch": 0.8827092865889025, "grad_norm": 0.0008902594119790399, "learning_rate": 1.310418904403867e-06, "loss": 0.0, "step": 13697 }, { "epoch": 0.8827737320358316, "grad_norm": 0.02761833022301556, "learning_rate": 1.3097028284998212e-06, "loss": 0.0, "step": 13698 }, { "epoch": 0.8828381774827608, "grad_norm": 7.322041241513758e-05, "learning_rate": 1.3089867525957753e-06, "loss": 0.0, "step": 13699 }, { "epoch": 0.88290262292969, "grad_norm": 0.0003351661766753556, "learning_rate": 1.3082706766917294e-06, "loss": 0.0, "step": 13700 }, { "epoch": 0.8829670683766192, "grad_norm": 0.13306850415458327, "learning_rate": 1.3075546007876835e-06, "loss": 0.0003, "step": 13701 }, { "epoch": 0.8830315138235484, "grad_norm": 0.0009763088853932016, "learning_rate": 1.3068385248836376e-06, "loss": 0.0, "step": 13702 }, { "epoch": 0.8830959592704776, "grad_norm": 0.030336426954044204, "learning_rate": 1.3061224489795921e-06, "loss": 0.0002, "step": 13703 }, { "epoch": 0.8831604047174068, "grad_norm": 0.3518987319355953, "learning_rate": 1.3054063730755462e-06, "loss": 0.0013, "step": 13704 }, { "epoch": 0.8832248501643359, "grad_norm": 0.00021259481191901802, "learning_rate": 1.3046902971715003e-06, "loss": 0.0, "step": 13705 }, { "epoch": 0.883289295611265, "grad_norm": 0.0012099330641390034, "learning_rate": 1.3039742212674544e-06, "loss": 0.0, "step": 13706 }, { "epoch": 0.8833537410581942, "grad_norm": 0.01886221954292911, "learning_rate": 1.3032581453634085e-06, "loss": 0.0, "step": 13707 }, { "epoch": 0.8834181865051234, "grad_norm": 0.09828483459776614, "learning_rate": 1.3025420694593628e-06, "loss": 0.0001, "step": 13708 }, { "epoch": 0.8834826319520526, "grad_norm": 0.0005948211234448418, "learning_rate": 1.301825993555317e-06, "loss": 0.0, "step": 13709 }, { "epoch": 0.8835470773989818, "grad_norm": 0.00020818992887561486, "learning_rate": 1.3011099176512711e-06, "loss": 0.0, "step": 13710 }, { "epoch": 0.8836115228459109, "grad_norm": 0.1842981683540968, "learning_rate": 1.3003938417472252e-06, "loss": 0.0011, "step": 13711 }, { "epoch": 0.8836759682928401, "grad_norm": 0.008536273547773218, "learning_rate": 1.2996777658431795e-06, "loss": 0.0001, "step": 13712 }, { "epoch": 0.8837404137397693, "grad_norm": 0.00032051570706314344, "learning_rate": 1.2989616899391336e-06, "loss": 0.0, "step": 13713 }, { "epoch": 0.8838048591866985, "grad_norm": 0.17510439577339187, "learning_rate": 1.2982456140350877e-06, "loss": 0.0005, "step": 13714 }, { "epoch": 0.8838693046336277, "grad_norm": 0.01028719993330639, "learning_rate": 1.297529538131042e-06, "loss": 0.0, "step": 13715 }, { "epoch": 0.8839337500805569, "grad_norm": 0.08046575674952673, "learning_rate": 1.2968134622269963e-06, "loss": 0.0017, "step": 13716 }, { "epoch": 0.8839981955274859, "grad_norm": 0.0006212313252662225, "learning_rate": 1.2960973863229504e-06, "loss": 0.0, "step": 13717 }, { "epoch": 0.8840626409744151, "grad_norm": 0.00033610867341460235, "learning_rate": 1.2953813104189045e-06, "loss": 0.0, "step": 13718 }, { "epoch": 0.8841270864213443, "grad_norm": 0.00023716963033959892, "learning_rate": 1.2946652345148586e-06, "loss": 0.0, "step": 13719 }, { "epoch": 0.8841915318682735, "grad_norm": 0.0016286595639973542, "learning_rate": 1.2939491586108126e-06, "loss": 0.0, "step": 13720 }, { "epoch": 0.8842559773152027, "grad_norm": 0.031614340354932574, "learning_rate": 1.2932330827067672e-06, "loss": 0.0001, "step": 13721 }, { "epoch": 0.8843204227621319, "grad_norm": 0.00017104715739226403, "learning_rate": 1.2925170068027212e-06, "loss": 0.0, "step": 13722 }, { "epoch": 0.884384868209061, "grad_norm": 9.70356569071254e-05, "learning_rate": 1.2918009308986753e-06, "loss": 0.0, "step": 13723 }, { "epoch": 0.8844493136559902, "grad_norm": 0.3967907666004509, "learning_rate": 1.2910848549946294e-06, "loss": 0.0011, "step": 13724 }, { "epoch": 0.8845137591029194, "grad_norm": 0.0008351767774307439, "learning_rate": 1.2903687790905837e-06, "loss": 0.0, "step": 13725 }, { "epoch": 0.8845782045498486, "grad_norm": 0.0010768701346120296, "learning_rate": 1.289652703186538e-06, "loss": 0.0, "step": 13726 }, { "epoch": 0.8846426499967778, "grad_norm": 0.00035160258067471443, "learning_rate": 1.288936627282492e-06, "loss": 0.0, "step": 13727 }, { "epoch": 0.8847070954437068, "grad_norm": 0.21524670698834755, "learning_rate": 1.2882205513784462e-06, "loss": 0.0003, "step": 13728 }, { "epoch": 0.884771540890636, "grad_norm": 7.928518740038473e-05, "learning_rate": 1.2875044754744005e-06, "loss": 0.0, "step": 13729 }, { "epoch": 0.8848359863375652, "grad_norm": 0.7751929273734448, "learning_rate": 1.2867883995703546e-06, "loss": 0.0052, "step": 13730 }, { "epoch": 0.8849004317844944, "grad_norm": 0.000505047569173897, "learning_rate": 1.2860723236663087e-06, "loss": 0.0, "step": 13731 }, { "epoch": 0.8849648772314236, "grad_norm": 0.0017112831956432482, "learning_rate": 1.285356247762263e-06, "loss": 0.0, "step": 13732 }, { "epoch": 0.8850293226783528, "grad_norm": 2.923113262558036e-05, "learning_rate": 1.2846401718582173e-06, "loss": 0.0, "step": 13733 }, { "epoch": 0.885093768125282, "grad_norm": 0.08377971496993508, "learning_rate": 1.2839240959541713e-06, "loss": 0.0017, "step": 13734 }, { "epoch": 0.8851582135722111, "grad_norm": 0.0018758007745659337, "learning_rate": 1.2832080200501254e-06, "loss": 0.0, "step": 13735 }, { "epoch": 0.8852226590191403, "grad_norm": 0.005519941912064853, "learning_rate": 1.2824919441460795e-06, "loss": 0.0, "step": 13736 }, { "epoch": 0.8852871044660695, "grad_norm": 0.007801686846673793, "learning_rate": 1.2817758682420336e-06, "loss": 0.0001, "step": 13737 }, { "epoch": 0.8853515499129987, "grad_norm": 0.3482728911432587, "learning_rate": 1.2810597923379881e-06, "loss": 0.0008, "step": 13738 }, { "epoch": 0.8854159953599279, "grad_norm": 0.0009555235755133371, "learning_rate": 1.2803437164339422e-06, "loss": 0.0, "step": 13739 }, { "epoch": 0.885480440806857, "grad_norm": 0.002469717287478994, "learning_rate": 1.2796276405298963e-06, "loss": 0.0, "step": 13740 }, { "epoch": 0.8855448862537861, "grad_norm": 0.0026660388959545405, "learning_rate": 1.2789115646258504e-06, "loss": 0.0, "step": 13741 }, { "epoch": 0.8856093317007153, "grad_norm": 0.005242530312289297, "learning_rate": 1.2781954887218045e-06, "loss": 0.0, "step": 13742 }, { "epoch": 0.8856737771476445, "grad_norm": 9.516788258618955e-05, "learning_rate": 1.2774794128177588e-06, "loss": 0.0, "step": 13743 }, { "epoch": 0.8857382225945737, "grad_norm": 2.566322068723762e-05, "learning_rate": 1.276763336913713e-06, "loss": 0.0, "step": 13744 }, { "epoch": 0.8858026680415029, "grad_norm": 0.00032851554042659717, "learning_rate": 1.2760472610096671e-06, "loss": 0.0, "step": 13745 }, { "epoch": 0.8858671134884321, "grad_norm": 0.2881616395959894, "learning_rate": 1.2753311851056212e-06, "loss": 0.0024, "step": 13746 }, { "epoch": 0.8859315589353612, "grad_norm": 0.0005407381204699677, "learning_rate": 1.2746151092015755e-06, "loss": 0.0, "step": 13747 }, { "epoch": 0.8859960043822904, "grad_norm": 0.00013134327426654014, "learning_rate": 1.2738990332975296e-06, "loss": 0.0, "step": 13748 }, { "epoch": 0.8860604498292196, "grad_norm": 1.0938324603394522e-05, "learning_rate": 1.2731829573934837e-06, "loss": 0.0, "step": 13749 }, { "epoch": 0.8861248952761488, "grad_norm": 0.004271410915227554, "learning_rate": 1.272466881489438e-06, "loss": 0.0, "step": 13750 }, { "epoch": 0.8861893407230779, "grad_norm": 0.0026957620892503614, "learning_rate": 1.2717508055853923e-06, "loss": 0.0, "step": 13751 }, { "epoch": 0.886253786170007, "grad_norm": 0.0006152756950276613, "learning_rate": 1.2710347296813464e-06, "loss": 0.0, "step": 13752 }, { "epoch": 0.8863182316169362, "grad_norm": 2.2124446079129158e-05, "learning_rate": 1.2703186537773005e-06, "loss": 0.0, "step": 13753 }, { "epoch": 0.8863826770638654, "grad_norm": 0.05126629319637686, "learning_rate": 1.2696025778732546e-06, "loss": 0.0001, "step": 13754 }, { "epoch": 0.8864471225107946, "grad_norm": 0.017536264153307488, "learning_rate": 1.2688865019692087e-06, "loss": 0.0001, "step": 13755 }, { "epoch": 0.8865115679577238, "grad_norm": 0.04960957108481395, "learning_rate": 1.2681704260651632e-06, "loss": 0.0, "step": 13756 }, { "epoch": 0.886576013404653, "grad_norm": 0.005953873003306027, "learning_rate": 1.2674543501611172e-06, "loss": 0.0001, "step": 13757 }, { "epoch": 0.8866404588515822, "grad_norm": 0.0004576293244010673, "learning_rate": 1.2667382742570713e-06, "loss": 0.0, "step": 13758 }, { "epoch": 0.8867049042985113, "grad_norm": 0.002728465719264111, "learning_rate": 1.2660221983530254e-06, "loss": 0.0, "step": 13759 }, { "epoch": 0.8867693497454405, "grad_norm": 0.0020225571803567754, "learning_rate": 1.2653061224489795e-06, "loss": 0.0, "step": 13760 }, { "epoch": 0.8868337951923697, "grad_norm": 0.14241467212327003, "learning_rate": 1.264590046544934e-06, "loss": 0.0004, "step": 13761 }, { "epoch": 0.8868982406392988, "grad_norm": 0.0014042617102855625, "learning_rate": 1.2638739706408881e-06, "loss": 0.0, "step": 13762 }, { "epoch": 0.886962686086228, "grad_norm": 0.029573956164921412, "learning_rate": 1.2631578947368422e-06, "loss": 0.0, "step": 13763 }, { "epoch": 0.8870271315331572, "grad_norm": 0.0030826232614705414, "learning_rate": 1.2624418188327963e-06, "loss": 0.0, "step": 13764 }, { "epoch": 0.8870915769800863, "grad_norm": 0.000188945426545367, "learning_rate": 1.2617257429287506e-06, "loss": 0.0, "step": 13765 }, { "epoch": 0.8871560224270155, "grad_norm": 0.008708895375226261, "learning_rate": 1.2610096670247047e-06, "loss": 0.0, "step": 13766 }, { "epoch": 0.8872204678739447, "grad_norm": 0.17166571446342943, "learning_rate": 1.260293591120659e-06, "loss": 0.0007, "step": 13767 }, { "epoch": 0.8872849133208739, "grad_norm": 0.0806280824807589, "learning_rate": 1.259577515216613e-06, "loss": 0.0008, "step": 13768 }, { "epoch": 0.8873493587678031, "grad_norm": 0.00011126534919590591, "learning_rate": 1.2588614393125673e-06, "loss": 0.0, "step": 13769 }, { "epoch": 0.8874138042147323, "grad_norm": 0.0013592761131405947, "learning_rate": 1.2581453634085214e-06, "loss": 0.0, "step": 13770 }, { "epoch": 0.8874782496616614, "grad_norm": 0.011567165436922357, "learning_rate": 1.2574292875044755e-06, "loss": 0.0, "step": 13771 }, { "epoch": 0.8875426951085906, "grad_norm": 0.0010506584973813378, "learning_rate": 1.2567132116004296e-06, "loss": 0.0, "step": 13772 }, { "epoch": 0.8876071405555197, "grad_norm": 0.001634937844128216, "learning_rate": 1.2559971356963841e-06, "loss": 0.0015, "step": 13773 }, { "epoch": 0.8876715860024489, "grad_norm": 0.0007561086547375215, "learning_rate": 1.2552810597923382e-06, "loss": 0.0, "step": 13774 }, { "epoch": 0.8877360314493781, "grad_norm": 0.00021426457144976387, "learning_rate": 1.2545649838882923e-06, "loss": 0.0, "step": 13775 }, { "epoch": 0.8878004768963073, "grad_norm": 0.0017384501229145124, "learning_rate": 1.2538489079842464e-06, "loss": 0.0, "step": 13776 }, { "epoch": 0.8878649223432364, "grad_norm": 0.46608758452350224, "learning_rate": 1.2531328320802005e-06, "loss": 0.003, "step": 13777 }, { "epoch": 0.8879293677901656, "grad_norm": 0.003180161390498234, "learning_rate": 1.2524167561761546e-06, "loss": 0.0, "step": 13778 }, { "epoch": 0.8879938132370948, "grad_norm": 0.0005952020863398482, "learning_rate": 1.251700680272109e-06, "loss": 0.0, "step": 13779 }, { "epoch": 0.888058258684024, "grad_norm": 0.0008731909170846803, "learning_rate": 1.2509846043680632e-06, "loss": 0.0, "step": 13780 }, { "epoch": 0.8881227041309532, "grad_norm": 0.003438820389549209, "learning_rate": 1.2502685284640172e-06, "loss": 0.0, "step": 13781 }, { "epoch": 0.8881871495778824, "grad_norm": 0.00016075516900061808, "learning_rate": 1.2495524525599713e-06, "loss": 0.0, "step": 13782 }, { "epoch": 0.8882515950248115, "grad_norm": 0.005191481164772671, "learning_rate": 1.2488363766559256e-06, "loss": 0.0, "step": 13783 }, { "epoch": 0.8883160404717406, "grad_norm": 0.0009777725279008903, "learning_rate": 1.2481203007518797e-06, "loss": 0.0, "step": 13784 }, { "epoch": 0.8883804859186698, "grad_norm": 0.18591865636404056, "learning_rate": 1.247404224847834e-06, "loss": 0.0002, "step": 13785 }, { "epoch": 0.888444931365599, "grad_norm": 0.016696817939353084, "learning_rate": 1.246688148943788e-06, "loss": 0.0001, "step": 13786 }, { "epoch": 0.8885093768125282, "grad_norm": 0.005742868049428506, "learning_rate": 1.2459720730397424e-06, "loss": 0.0001, "step": 13787 }, { "epoch": 0.8885738222594574, "grad_norm": 0.0033753578995078114, "learning_rate": 1.2452559971356965e-06, "loss": 0.0, "step": 13788 }, { "epoch": 0.8886382677063865, "grad_norm": 0.001299590697601426, "learning_rate": 1.2445399212316508e-06, "loss": 0.0, "step": 13789 }, { "epoch": 0.8887027131533157, "grad_norm": 0.01598992051312426, "learning_rate": 1.2438238453276049e-06, "loss": 0.0002, "step": 13790 }, { "epoch": 0.8887671586002449, "grad_norm": 0.7328409038968464, "learning_rate": 1.243107769423559e-06, "loss": 0.0008, "step": 13791 }, { "epoch": 0.8888316040471741, "grad_norm": 0.00030574028092213986, "learning_rate": 1.2423916935195133e-06, "loss": 0.0, "step": 13792 }, { "epoch": 0.8888960494941033, "grad_norm": 0.0005879745614522931, "learning_rate": 1.2416756176154673e-06, "loss": 0.0, "step": 13793 }, { "epoch": 0.8889604949410325, "grad_norm": 0.010634271394679871, "learning_rate": 1.2409595417114214e-06, "loss": 0.0, "step": 13794 }, { "epoch": 0.8890249403879615, "grad_norm": 0.0029319469486168174, "learning_rate": 1.2402434658073757e-06, "loss": 0.0, "step": 13795 }, { "epoch": 0.8890893858348907, "grad_norm": 0.00044063952636015224, "learning_rate": 1.2395273899033298e-06, "loss": 0.0, "step": 13796 }, { "epoch": 0.8891538312818199, "grad_norm": 0.03374082286006838, "learning_rate": 1.238811313999284e-06, "loss": 0.0, "step": 13797 }, { "epoch": 0.8892182767287491, "grad_norm": 0.0031583981479104645, "learning_rate": 1.2380952380952382e-06, "loss": 0.0, "step": 13798 }, { "epoch": 0.8892827221756783, "grad_norm": 0.0029201196111990247, "learning_rate": 1.2373791621911923e-06, "loss": 0.0, "step": 13799 }, { "epoch": 0.8893471676226075, "grad_norm": 0.0002927000163405691, "learning_rate": 1.2366630862871466e-06, "loss": 0.0, "step": 13800 }, { "epoch": 0.8894116130695366, "grad_norm": 0.00024142619848897449, "learning_rate": 1.2359470103831007e-06, "loss": 0.0, "step": 13801 }, { "epoch": 0.8894760585164658, "grad_norm": 0.7436967061286708, "learning_rate": 1.235230934479055e-06, "loss": 0.0079, "step": 13802 }, { "epoch": 0.889540503963395, "grad_norm": 0.0003735531843330027, "learning_rate": 1.234514858575009e-06, "loss": 0.0, "step": 13803 }, { "epoch": 0.8896049494103242, "grad_norm": 0.004015970444965455, "learning_rate": 1.2337987826709634e-06, "loss": 0.0, "step": 13804 }, { "epoch": 0.8896693948572534, "grad_norm": 0.00976236445414446, "learning_rate": 1.2330827067669174e-06, "loss": 0.0001, "step": 13805 }, { "epoch": 0.8897338403041825, "grad_norm": 0.2136054593067004, "learning_rate": 1.2323666308628715e-06, "loss": 0.0032, "step": 13806 }, { "epoch": 0.8897982857511116, "grad_norm": 0.0038618296947997015, "learning_rate": 1.2316505549588258e-06, "loss": 0.0, "step": 13807 }, { "epoch": 0.8898627311980408, "grad_norm": 0.025397306718503647, "learning_rate": 1.23093447905478e-06, "loss": 0.0001, "step": 13808 }, { "epoch": 0.88992717664497, "grad_norm": 0.009758540280719333, "learning_rate": 1.230218403150734e-06, "loss": 0.0, "step": 13809 }, { "epoch": 0.8899916220918992, "grad_norm": 0.00016259548961602632, "learning_rate": 1.2295023272466883e-06, "loss": 0.0, "step": 13810 }, { "epoch": 0.8900560675388284, "grad_norm": 0.0006507345876399534, "learning_rate": 1.2287862513426424e-06, "loss": 0.0, "step": 13811 }, { "epoch": 0.8901205129857576, "grad_norm": 0.1792454153276647, "learning_rate": 1.2280701754385965e-06, "loss": 0.0003, "step": 13812 }, { "epoch": 0.8901849584326867, "grad_norm": 0.00969342986091718, "learning_rate": 1.2273540995345508e-06, "loss": 0.0, "step": 13813 }, { "epoch": 0.8902494038796159, "grad_norm": 0.0008400377772548953, "learning_rate": 1.2266380236305049e-06, "loss": 0.0, "step": 13814 }, { "epoch": 0.8903138493265451, "grad_norm": 0.00017475221910045208, "learning_rate": 1.2259219477264592e-06, "loss": 0.0, "step": 13815 }, { "epoch": 0.8903782947734743, "grad_norm": 0.00044403956214980725, "learning_rate": 1.2252058718224132e-06, "loss": 0.0, "step": 13816 }, { "epoch": 0.8904427402204035, "grad_norm": 0.0698061899771694, "learning_rate": 1.2244897959183673e-06, "loss": 0.0001, "step": 13817 }, { "epoch": 0.8905071856673326, "grad_norm": 0.025812009727729505, "learning_rate": 1.2237737200143216e-06, "loss": 0.0, "step": 13818 }, { "epoch": 0.8905716311142617, "grad_norm": 0.36327571249402313, "learning_rate": 1.2230576441102757e-06, "loss": 0.0034, "step": 13819 }, { "epoch": 0.8906360765611909, "grad_norm": 0.2055305667179693, "learning_rate": 1.22234156820623e-06, "loss": 0.0007, "step": 13820 }, { "epoch": 0.8907005220081201, "grad_norm": 0.01355179297404914, "learning_rate": 1.2216254923021841e-06, "loss": 0.0, "step": 13821 }, { "epoch": 0.8907649674550493, "grad_norm": 0.02007575542711268, "learning_rate": 1.2209094163981384e-06, "loss": 0.0002, "step": 13822 }, { "epoch": 0.8908294129019785, "grad_norm": 0.07990338578217246, "learning_rate": 1.2201933404940925e-06, "loss": 0.0002, "step": 13823 }, { "epoch": 0.8908938583489077, "grad_norm": 0.0023076275997369897, "learning_rate": 1.2194772645900468e-06, "loss": 0.0, "step": 13824 }, { "epoch": 0.8909583037958368, "grad_norm": 0.009210954462485512, "learning_rate": 1.2187611886860009e-06, "loss": 0.0, "step": 13825 }, { "epoch": 0.891022749242766, "grad_norm": 0.004081220907520122, "learning_rate": 1.218045112781955e-06, "loss": 0.0, "step": 13826 }, { "epoch": 0.8910871946896952, "grad_norm": 0.008870903584263718, "learning_rate": 1.2173290368779093e-06, "loss": 0.0001, "step": 13827 }, { "epoch": 0.8911516401366244, "grad_norm": 2.2225392181746675, "learning_rate": 1.2166129609738633e-06, "loss": 0.0042, "step": 13828 }, { "epoch": 0.8912160855835535, "grad_norm": 7.365705023488865e-05, "learning_rate": 1.2158968850698174e-06, "loss": 0.0, "step": 13829 }, { "epoch": 0.8912805310304827, "grad_norm": 8.679196143845854e-05, "learning_rate": 1.2151808091657717e-06, "loss": 0.0, "step": 13830 }, { "epoch": 0.8913449764774118, "grad_norm": 0.0025323594558665664, "learning_rate": 1.2144647332617258e-06, "loss": 0.0, "step": 13831 }, { "epoch": 0.891409421924341, "grad_norm": 0.12459235478699489, "learning_rate": 1.21374865735768e-06, "loss": 0.0001, "step": 13832 }, { "epoch": 0.8914738673712702, "grad_norm": 0.011818195017482032, "learning_rate": 1.2130325814536342e-06, "loss": 0.0, "step": 13833 }, { "epoch": 0.8915383128181994, "grad_norm": 0.14417296540437205, "learning_rate": 1.2123165055495883e-06, "loss": 0.0003, "step": 13834 }, { "epoch": 0.8916027582651286, "grad_norm": 0.010910567690735996, "learning_rate": 1.2116004296455424e-06, "loss": 0.0001, "step": 13835 }, { "epoch": 0.8916672037120578, "grad_norm": 7.052803181361057e-05, "learning_rate": 1.2108843537414967e-06, "loss": 0.0, "step": 13836 }, { "epoch": 0.891731649158987, "grad_norm": 0.17583592412288143, "learning_rate": 1.2101682778374508e-06, "loss": 0.0003, "step": 13837 }, { "epoch": 0.8917960946059161, "grad_norm": 0.023835514292793722, "learning_rate": 1.209452201933405e-06, "loss": 0.0, "step": 13838 }, { "epoch": 0.8918605400528453, "grad_norm": 0.0015865956615176357, "learning_rate": 1.2087361260293592e-06, "loss": 0.0, "step": 13839 }, { "epoch": 0.8919249854997744, "grad_norm": 0.2847122236501794, "learning_rate": 1.2080200501253135e-06, "loss": 0.0024, "step": 13840 }, { "epoch": 0.8919894309467036, "grad_norm": 0.00039617673521535684, "learning_rate": 1.2073039742212675e-06, "loss": 0.0, "step": 13841 }, { "epoch": 0.8920538763936328, "grad_norm": 0.0015060300582539626, "learning_rate": 1.2065878983172218e-06, "loss": 0.0, "step": 13842 }, { "epoch": 0.892118321840562, "grad_norm": 7.080614102933089e-05, "learning_rate": 1.205871822413176e-06, "loss": 0.0, "step": 13843 }, { "epoch": 0.8921827672874911, "grad_norm": 0.087443366093727, "learning_rate": 1.20515574650913e-06, "loss": 0.0004, "step": 13844 }, { "epoch": 0.8922472127344203, "grad_norm": 0.0008844615606733612, "learning_rate": 1.2044396706050843e-06, "loss": 0.0, "step": 13845 }, { "epoch": 0.8923116581813495, "grad_norm": 0.012319154787187002, "learning_rate": 1.2037235947010384e-06, "loss": 0.0, "step": 13846 }, { "epoch": 0.8923761036282787, "grad_norm": 0.010120784738954119, "learning_rate": 1.2030075187969925e-06, "loss": 0.0, "step": 13847 }, { "epoch": 0.8924405490752079, "grad_norm": 0.015798697562626713, "learning_rate": 1.2022914428929468e-06, "loss": 0.0001, "step": 13848 }, { "epoch": 0.892504994522137, "grad_norm": 0.0012742288041931434, "learning_rate": 1.2015753669889009e-06, "loss": 0.0, "step": 13849 }, { "epoch": 0.8925694399690662, "grad_norm": 0.020043164353019335, "learning_rate": 1.2008592910848552e-06, "loss": 0.0, "step": 13850 }, { "epoch": 0.8926338854159953, "grad_norm": 0.03441415644112191, "learning_rate": 1.2001432151808093e-06, "loss": 0.0, "step": 13851 }, { "epoch": 0.8926983308629245, "grad_norm": 0.00010035991690649003, "learning_rate": 1.1994271392767633e-06, "loss": 0.0, "step": 13852 }, { "epoch": 0.8927627763098537, "grad_norm": 0.0003660068627871594, "learning_rate": 1.1987110633727176e-06, "loss": 0.0, "step": 13853 }, { "epoch": 0.8928272217567829, "grad_norm": 0.013529318495661974, "learning_rate": 1.1979949874686717e-06, "loss": 0.0, "step": 13854 }, { "epoch": 0.892891667203712, "grad_norm": 0.00016863076810777927, "learning_rate": 1.1972789115646258e-06, "loss": 0.0, "step": 13855 }, { "epoch": 0.8929561126506412, "grad_norm": 0.004578985770439536, "learning_rate": 1.1965628356605801e-06, "loss": 0.0, "step": 13856 }, { "epoch": 0.8930205580975704, "grad_norm": 0.6975465310207425, "learning_rate": 1.1958467597565342e-06, "loss": 0.0011, "step": 13857 }, { "epoch": 0.8930850035444996, "grad_norm": 0.000828732726699813, "learning_rate": 1.1951306838524885e-06, "loss": 0.0, "step": 13858 }, { "epoch": 0.8931494489914288, "grad_norm": 0.0008080449898315412, "learning_rate": 1.1944146079484426e-06, "loss": 0.0, "step": 13859 }, { "epoch": 0.893213894438358, "grad_norm": 0.00027180681347506474, "learning_rate": 1.1936985320443969e-06, "loss": 0.0, "step": 13860 }, { "epoch": 0.8932783398852872, "grad_norm": 0.018442260354453323, "learning_rate": 1.192982456140351e-06, "loss": 0.0001, "step": 13861 }, { "epoch": 0.8933427853322162, "grad_norm": 0.4737637997431658, "learning_rate": 1.1922663802363053e-06, "loss": 0.0008, "step": 13862 }, { "epoch": 0.8934072307791454, "grad_norm": 0.0038302988254236064, "learning_rate": 1.1915503043322594e-06, "loss": 0.0, "step": 13863 }, { "epoch": 0.8934716762260746, "grad_norm": 0.0001033771169610538, "learning_rate": 1.1908342284282134e-06, "loss": 0.0, "step": 13864 }, { "epoch": 0.8935361216730038, "grad_norm": 0.00024783354701006966, "learning_rate": 1.1901181525241677e-06, "loss": 0.0, "step": 13865 }, { "epoch": 0.893600567119933, "grad_norm": 0.004368677639749437, "learning_rate": 1.1894020766201218e-06, "loss": 0.0, "step": 13866 }, { "epoch": 0.8936650125668621, "grad_norm": 0.0018244756781653027, "learning_rate": 1.188686000716076e-06, "loss": 0.0, "step": 13867 }, { "epoch": 0.8937294580137913, "grad_norm": 0.00023026815644767366, "learning_rate": 1.1879699248120302e-06, "loss": 0.0, "step": 13868 }, { "epoch": 0.8937939034607205, "grad_norm": 0.003097842611027994, "learning_rate": 1.1872538489079843e-06, "loss": 0.0, "step": 13869 }, { "epoch": 0.8938583489076497, "grad_norm": 0.03308828563419499, "learning_rate": 1.1865377730039384e-06, "loss": 0.0001, "step": 13870 }, { "epoch": 0.8939227943545789, "grad_norm": 0.0006239056166978975, "learning_rate": 1.1858216970998927e-06, "loss": 0.0, "step": 13871 }, { "epoch": 0.8939872398015081, "grad_norm": 1.2440699958126985e-05, "learning_rate": 1.1851056211958468e-06, "loss": 0.0, "step": 13872 }, { "epoch": 0.8940516852484371, "grad_norm": 5.976930241618748e-05, "learning_rate": 1.184389545291801e-06, "loss": 0.0, "step": 13873 }, { "epoch": 0.8941161306953663, "grad_norm": 0.0028886406204160628, "learning_rate": 1.1836734693877552e-06, "loss": 0.0, "step": 13874 }, { "epoch": 0.8941805761422955, "grad_norm": 0.004454665917691752, "learning_rate": 1.1829573934837095e-06, "loss": 0.0, "step": 13875 }, { "epoch": 0.8942450215892247, "grad_norm": 0.5158161040383212, "learning_rate": 1.1822413175796635e-06, "loss": 0.0029, "step": 13876 }, { "epoch": 0.8943094670361539, "grad_norm": 0.02061518097300911, "learning_rate": 1.1815252416756178e-06, "loss": 0.0015, "step": 13877 }, { "epoch": 0.8943739124830831, "grad_norm": 1.7711709279277926e-05, "learning_rate": 1.180809165771572e-06, "loss": 0.0, "step": 13878 }, { "epoch": 0.8944383579300123, "grad_norm": 0.18675871185998966, "learning_rate": 1.180093089867526e-06, "loss": 0.0006, "step": 13879 }, { "epoch": 0.8945028033769414, "grad_norm": 0.00019593830987744068, "learning_rate": 1.1793770139634803e-06, "loss": 0.0, "step": 13880 }, { "epoch": 0.8945672488238706, "grad_norm": 0.00010785606641666989, "learning_rate": 1.1786609380594344e-06, "loss": 0.0, "step": 13881 }, { "epoch": 0.8946316942707998, "grad_norm": 0.00014090590342756955, "learning_rate": 1.1779448621553885e-06, "loss": 0.0, "step": 13882 }, { "epoch": 0.894696139717729, "grad_norm": 0.015730361982695604, "learning_rate": 1.1772287862513428e-06, "loss": 0.0001, "step": 13883 }, { "epoch": 0.8947605851646581, "grad_norm": 0.165150147923511, "learning_rate": 1.1765127103472969e-06, "loss": 0.0006, "step": 13884 }, { "epoch": 0.8948250306115872, "grad_norm": 0.001520522969766945, "learning_rate": 1.175796634443251e-06, "loss": 0.0, "step": 13885 }, { "epoch": 0.8948894760585164, "grad_norm": 0.03972055794668295, "learning_rate": 1.1750805585392053e-06, "loss": 0.0001, "step": 13886 }, { "epoch": 0.8949539215054456, "grad_norm": 0.0003771624240822517, "learning_rate": 1.1743644826351593e-06, "loss": 0.0, "step": 13887 }, { "epoch": 0.8950183669523748, "grad_norm": 0.11441051028315816, "learning_rate": 1.1736484067311136e-06, "loss": 0.0001, "step": 13888 }, { "epoch": 0.895082812399304, "grad_norm": 0.007310869051213783, "learning_rate": 1.1729323308270677e-06, "loss": 0.0, "step": 13889 }, { "epoch": 0.8951472578462332, "grad_norm": 0.0001329414092162519, "learning_rate": 1.1722162549230218e-06, "loss": 0.0, "step": 13890 }, { "epoch": 0.8952117032931624, "grad_norm": 0.0014470808931624586, "learning_rate": 1.1715001790189761e-06, "loss": 0.0, "step": 13891 }, { "epoch": 0.8952761487400915, "grad_norm": 0.0004422742882779398, "learning_rate": 1.1707841031149302e-06, "loss": 0.0, "step": 13892 }, { "epoch": 0.8953405941870207, "grad_norm": 0.0030581721309221434, "learning_rate": 1.1700680272108845e-06, "loss": 0.0, "step": 13893 }, { "epoch": 0.8954050396339499, "grad_norm": 2.8451644614638217e-05, "learning_rate": 1.1693519513068386e-06, "loss": 0.0, "step": 13894 }, { "epoch": 0.8954694850808791, "grad_norm": 0.0007229926952766978, "learning_rate": 1.1686358754027929e-06, "loss": 0.0, "step": 13895 }, { "epoch": 0.8955339305278082, "grad_norm": 0.0029705727247215812, "learning_rate": 1.167919799498747e-06, "loss": 0.0, "step": 13896 }, { "epoch": 0.8955983759747373, "grad_norm": 0.019346182931325447, "learning_rate": 1.1672037235947013e-06, "loss": 0.0001, "step": 13897 }, { "epoch": 0.8956628214216665, "grad_norm": 0.0002895578797201431, "learning_rate": 1.1664876476906554e-06, "loss": 0.0, "step": 13898 }, { "epoch": 0.8957272668685957, "grad_norm": 0.0006637398536619145, "learning_rate": 1.1657715717866095e-06, "loss": 0.0, "step": 13899 }, { "epoch": 0.8957917123155249, "grad_norm": 0.00019846648889674596, "learning_rate": 1.1650554958825638e-06, "loss": 0.0, "step": 13900 }, { "epoch": 0.8958561577624541, "grad_norm": 0.00031634464297703157, "learning_rate": 1.1643394199785178e-06, "loss": 0.0, "step": 13901 }, { "epoch": 0.8959206032093833, "grad_norm": 0.004311220159345927, "learning_rate": 1.163623344074472e-06, "loss": 0.0, "step": 13902 }, { "epoch": 0.8959850486563125, "grad_norm": 1.0598536802465255, "learning_rate": 1.1629072681704262e-06, "loss": 0.0031, "step": 13903 }, { "epoch": 0.8960494941032416, "grad_norm": 0.0008670161043206829, "learning_rate": 1.1621911922663803e-06, "loss": 0.0, "step": 13904 }, { "epoch": 0.8961139395501708, "grad_norm": 0.0011511655880350254, "learning_rate": 1.1614751163623344e-06, "loss": 0.0, "step": 13905 }, { "epoch": 0.8961783849971, "grad_norm": 0.003973540317480649, "learning_rate": 1.1607590404582887e-06, "loss": 0.0, "step": 13906 }, { "epoch": 0.8962428304440291, "grad_norm": 0.14830795080849515, "learning_rate": 1.1600429645542428e-06, "loss": 0.0018, "step": 13907 }, { "epoch": 0.8963072758909583, "grad_norm": 0.0007323064595173414, "learning_rate": 1.1593268886501969e-06, "loss": 0.0, "step": 13908 }, { "epoch": 0.8963717213378875, "grad_norm": 0.00023526852555263698, "learning_rate": 1.1586108127461512e-06, "loss": 0.0, "step": 13909 }, { "epoch": 0.8964361667848166, "grad_norm": 0.002325935800464206, "learning_rate": 1.1578947368421053e-06, "loss": 0.0, "step": 13910 }, { "epoch": 0.8965006122317458, "grad_norm": 0.4110362286054091, "learning_rate": 1.1571786609380596e-06, "loss": 0.0021, "step": 13911 }, { "epoch": 0.896565057678675, "grad_norm": 0.021074825949622938, "learning_rate": 1.1564625850340136e-06, "loss": 0.0, "step": 13912 }, { "epoch": 0.8966295031256042, "grad_norm": 0.0045007280964542195, "learning_rate": 1.155746509129968e-06, "loss": 0.0, "step": 13913 }, { "epoch": 0.8966939485725334, "grad_norm": 0.0007198015241596646, "learning_rate": 1.155030433225922e-06, "loss": 0.0, "step": 13914 }, { "epoch": 0.8967583940194626, "grad_norm": 0.019581690119028813, "learning_rate": 1.1543143573218763e-06, "loss": 0.0, "step": 13915 }, { "epoch": 0.8968228394663917, "grad_norm": 0.00037558322132740917, "learning_rate": 1.1535982814178304e-06, "loss": 0.0, "step": 13916 }, { "epoch": 0.8968872849133209, "grad_norm": 0.01912842353742819, "learning_rate": 1.1528822055137845e-06, "loss": 0.0001, "step": 13917 }, { "epoch": 0.89695173036025, "grad_norm": 0.00896517446981582, "learning_rate": 1.1521661296097388e-06, "loss": 0.0, "step": 13918 }, { "epoch": 0.8970161758071792, "grad_norm": 0.002951599669487491, "learning_rate": 1.1514500537056929e-06, "loss": 0.0, "step": 13919 }, { "epoch": 0.8970806212541084, "grad_norm": 0.0033833929402165733, "learning_rate": 1.150733977801647e-06, "loss": 0.0, "step": 13920 }, { "epoch": 0.8971450667010376, "grad_norm": 0.0063744704204274924, "learning_rate": 1.1500179018976013e-06, "loss": 0.0, "step": 13921 }, { "epoch": 0.8972095121479667, "grad_norm": 0.0013080568903183537, "learning_rate": 1.1493018259935554e-06, "loss": 0.0, "step": 13922 }, { "epoch": 0.8972739575948959, "grad_norm": 0.005177324218002255, "learning_rate": 1.1485857500895097e-06, "loss": 0.0, "step": 13923 }, { "epoch": 0.8973384030418251, "grad_norm": 0.00016433341066363132, "learning_rate": 1.1478696741854637e-06, "loss": 0.0, "step": 13924 }, { "epoch": 0.8974028484887543, "grad_norm": 0.005040815180212994, "learning_rate": 1.1471535982814178e-06, "loss": 0.0, "step": 13925 }, { "epoch": 0.8974672939356835, "grad_norm": 0.19144006352602805, "learning_rate": 1.1464375223773721e-06, "loss": 0.0004, "step": 13926 }, { "epoch": 0.8975317393826127, "grad_norm": 0.005144720003540155, "learning_rate": 1.1457214464733262e-06, "loss": 0.0, "step": 13927 }, { "epoch": 0.8975961848295418, "grad_norm": 0.005620512736102895, "learning_rate": 1.1450053705692803e-06, "loss": 0.0, "step": 13928 }, { "epoch": 0.8976606302764709, "grad_norm": 0.02314239599743946, "learning_rate": 1.1442892946652346e-06, "loss": 0.0001, "step": 13929 }, { "epoch": 0.8977250757234001, "grad_norm": 3.417519190465539e-05, "learning_rate": 1.1435732187611887e-06, "loss": 0.0, "step": 13930 }, { "epoch": 0.8977895211703293, "grad_norm": 0.15925896314589388, "learning_rate": 1.142857142857143e-06, "loss": 0.0025, "step": 13931 }, { "epoch": 0.8978539666172585, "grad_norm": 0.002229307091129153, "learning_rate": 1.142141066953097e-06, "loss": 0.0, "step": 13932 }, { "epoch": 0.8979184120641877, "grad_norm": 0.0027272175086743445, "learning_rate": 1.1414249910490514e-06, "loss": 0.0, "step": 13933 }, { "epoch": 0.8979828575111168, "grad_norm": 0.03168294190225496, "learning_rate": 1.1407089151450055e-06, "loss": 0.0, "step": 13934 }, { "epoch": 0.898047302958046, "grad_norm": 0.0009569713106729391, "learning_rate": 1.1399928392409598e-06, "loss": 0.0, "step": 13935 }, { "epoch": 0.8981117484049752, "grad_norm": 0.0008837372743535399, "learning_rate": 1.1392767633369138e-06, "loss": 0.0, "step": 13936 }, { "epoch": 0.8981761938519044, "grad_norm": 0.00016719703676270603, "learning_rate": 1.138560687432868e-06, "loss": 0.0, "step": 13937 }, { "epoch": 0.8982406392988336, "grad_norm": 0.0015464604635017866, "learning_rate": 1.1378446115288222e-06, "loss": 0.0, "step": 13938 }, { "epoch": 0.8983050847457628, "grad_norm": 0.005463581930813619, "learning_rate": 1.1371285356247763e-06, "loss": 0.0, "step": 13939 }, { "epoch": 0.8983695301926918, "grad_norm": 0.004126363956869999, "learning_rate": 1.1364124597207304e-06, "loss": 0.0, "step": 13940 }, { "epoch": 0.898433975639621, "grad_norm": 0.00044599899445592567, "learning_rate": 1.1356963838166847e-06, "loss": 0.0, "step": 13941 }, { "epoch": 0.8984984210865502, "grad_norm": 0.0006417580259393078, "learning_rate": 1.1349803079126388e-06, "loss": 0.0, "step": 13942 }, { "epoch": 0.8985628665334794, "grad_norm": 0.0028110095254215103, "learning_rate": 1.1342642320085929e-06, "loss": 0.0, "step": 13943 }, { "epoch": 0.8986273119804086, "grad_norm": 0.0008243069210426579, "learning_rate": 1.1335481561045472e-06, "loss": 0.0, "step": 13944 }, { "epoch": 0.8986917574273378, "grad_norm": 0.015608134890901078, "learning_rate": 1.1328320802005013e-06, "loss": 0.0001, "step": 13945 }, { "epoch": 0.8987562028742669, "grad_norm": 0.00016418725029251164, "learning_rate": 1.1321160042964556e-06, "loss": 0.0, "step": 13946 }, { "epoch": 0.8988206483211961, "grad_norm": 0.0017409146369892678, "learning_rate": 1.1313999283924096e-06, "loss": 0.0, "step": 13947 }, { "epoch": 0.8988850937681253, "grad_norm": 5.412282878661023e-05, "learning_rate": 1.130683852488364e-06, "loss": 0.0, "step": 13948 }, { "epoch": 0.8989495392150545, "grad_norm": 0.0004645559859257255, "learning_rate": 1.129967776584318e-06, "loss": 0.0, "step": 13949 }, { "epoch": 0.8990139846619837, "grad_norm": 0.002543894375187707, "learning_rate": 1.1292517006802723e-06, "loss": 0.0, "step": 13950 }, { "epoch": 0.8990784301089128, "grad_norm": 0.00021699801752490995, "learning_rate": 1.1285356247762264e-06, "loss": 0.0, "step": 13951 }, { "epoch": 0.8991428755558419, "grad_norm": 0.00015121543904727036, "learning_rate": 1.1278195488721805e-06, "loss": 0.0, "step": 13952 }, { "epoch": 0.8992073210027711, "grad_norm": 0.010239032263425, "learning_rate": 1.1271034729681348e-06, "loss": 0.0, "step": 13953 }, { "epoch": 0.8992717664497003, "grad_norm": 0.018982114515024993, "learning_rate": 1.1263873970640889e-06, "loss": 0.0, "step": 13954 }, { "epoch": 0.8993362118966295, "grad_norm": 0.0005564598211045715, "learning_rate": 1.125671321160043e-06, "loss": 0.0, "step": 13955 }, { "epoch": 0.8994006573435587, "grad_norm": 0.0008152348454253998, "learning_rate": 1.1249552452559973e-06, "loss": 0.0, "step": 13956 }, { "epoch": 0.8994651027904879, "grad_norm": 0.142068940030953, "learning_rate": 1.1242391693519514e-06, "loss": 0.0002, "step": 13957 }, { "epoch": 0.899529548237417, "grad_norm": 0.017551017927371366, "learning_rate": 1.1235230934479057e-06, "loss": 0.0001, "step": 13958 }, { "epoch": 0.8995939936843462, "grad_norm": 0.13314984757949708, "learning_rate": 1.1228070175438598e-06, "loss": 0.0002, "step": 13959 }, { "epoch": 0.8996584391312754, "grad_norm": 0.003780466996403599, "learning_rate": 1.1220909416398138e-06, "loss": 0.0, "step": 13960 }, { "epoch": 0.8997228845782046, "grad_norm": 3.8431651826954105e-05, "learning_rate": 1.1213748657357681e-06, "loss": 0.0, "step": 13961 }, { "epoch": 0.8997873300251337, "grad_norm": 0.0007425784620714796, "learning_rate": 1.1206587898317222e-06, "loss": 0.0, "step": 13962 }, { "epoch": 0.8998517754720629, "grad_norm": 0.0002841375235135086, "learning_rate": 1.1199427139276763e-06, "loss": 0.0, "step": 13963 }, { "epoch": 0.899916220918992, "grad_norm": 0.0030655431826020446, "learning_rate": 1.1192266380236306e-06, "loss": 0.0, "step": 13964 }, { "epoch": 0.8999806663659212, "grad_norm": 0.0006356895919749247, "learning_rate": 1.1185105621195847e-06, "loss": 0.0, "step": 13965 }, { "epoch": 0.9000451118128504, "grad_norm": 0.020512951337609962, "learning_rate": 1.117794486215539e-06, "loss": 0.0001, "step": 13966 }, { "epoch": 0.9001095572597796, "grad_norm": 0.04083187946678933, "learning_rate": 1.117078410311493e-06, "loss": 0.0003, "step": 13967 }, { "epoch": 0.9001740027067088, "grad_norm": 0.0001550509178908707, "learning_rate": 1.1163623344074474e-06, "loss": 0.0, "step": 13968 }, { "epoch": 0.900238448153638, "grad_norm": 0.0004273088370013827, "learning_rate": 1.1156462585034015e-06, "loss": 0.0, "step": 13969 }, { "epoch": 0.9003028936005671, "grad_norm": 0.0003448674989961782, "learning_rate": 1.1149301825993558e-06, "loss": 0.0, "step": 13970 }, { "epoch": 0.9003673390474963, "grad_norm": 0.0008768537809582048, "learning_rate": 1.1142141066953099e-06, "loss": 0.0, "step": 13971 }, { "epoch": 0.9004317844944255, "grad_norm": 5.592022632896799e-05, "learning_rate": 1.113498030791264e-06, "loss": 0.0, "step": 13972 }, { "epoch": 0.9004962299413547, "grad_norm": 0.6882594855306482, "learning_rate": 1.1127819548872182e-06, "loss": 0.0027, "step": 13973 }, { "epoch": 0.9005606753882838, "grad_norm": 0.009598311999210143, "learning_rate": 1.1120658789831723e-06, "loss": 0.0, "step": 13974 }, { "epoch": 0.900625120835213, "grad_norm": 0.02725082065057163, "learning_rate": 1.1113498030791264e-06, "loss": 0.0, "step": 13975 }, { "epoch": 0.9006895662821421, "grad_norm": 0.00868479275831414, "learning_rate": 1.1106337271750807e-06, "loss": 0.0, "step": 13976 }, { "epoch": 0.9007540117290713, "grad_norm": 0.007083911823828566, "learning_rate": 1.1099176512710348e-06, "loss": 0.0001, "step": 13977 }, { "epoch": 0.9008184571760005, "grad_norm": 0.002803608504870315, "learning_rate": 1.1092015753669889e-06, "loss": 0.0, "step": 13978 }, { "epoch": 0.9008829026229297, "grad_norm": 0.0006188193145472269, "learning_rate": 1.1084854994629432e-06, "loss": 0.0, "step": 13979 }, { "epoch": 0.9009473480698589, "grad_norm": 0.024428739904172637, "learning_rate": 1.1077694235588973e-06, "loss": 0.0002, "step": 13980 }, { "epoch": 0.9010117935167881, "grad_norm": 0.0003124864897644293, "learning_rate": 1.1070533476548514e-06, "loss": 0.0, "step": 13981 }, { "epoch": 0.9010762389637172, "grad_norm": 0.10956738785973091, "learning_rate": 1.1063372717508057e-06, "loss": 0.0001, "step": 13982 }, { "epoch": 0.9011406844106464, "grad_norm": 0.02323807997631617, "learning_rate": 1.1056211958467597e-06, "loss": 0.0, "step": 13983 }, { "epoch": 0.9012051298575756, "grad_norm": 7.911839137377666e-05, "learning_rate": 1.104905119942714e-06, "loss": 0.0, "step": 13984 }, { "epoch": 0.9012695753045047, "grad_norm": 1.153643374127301e-05, "learning_rate": 1.1041890440386681e-06, "loss": 0.0, "step": 13985 }, { "epoch": 0.9013340207514339, "grad_norm": 0.03433700359423184, "learning_rate": 1.1034729681346224e-06, "loss": 0.0, "step": 13986 }, { "epoch": 0.9013984661983631, "grad_norm": 0.0001273740311295764, "learning_rate": 1.1027568922305765e-06, "loss": 0.0, "step": 13987 }, { "epoch": 0.9014629116452922, "grad_norm": 0.00029643403967102475, "learning_rate": 1.1020408163265308e-06, "loss": 0.0, "step": 13988 }, { "epoch": 0.9015273570922214, "grad_norm": 0.00167223813672472, "learning_rate": 1.101324740422485e-06, "loss": 0.0, "step": 13989 }, { "epoch": 0.9015918025391506, "grad_norm": 0.049248185514050316, "learning_rate": 1.100608664518439e-06, "loss": 0.0, "step": 13990 }, { "epoch": 0.9016562479860798, "grad_norm": 0.002362216149147042, "learning_rate": 1.0998925886143933e-06, "loss": 0.0, "step": 13991 }, { "epoch": 0.901720693433009, "grad_norm": 0.006003178554491009, "learning_rate": 1.0991765127103474e-06, "loss": 0.0001, "step": 13992 }, { "epoch": 0.9017851388799382, "grad_norm": 0.22652499118620095, "learning_rate": 1.0984604368063015e-06, "loss": 0.0002, "step": 13993 }, { "epoch": 0.9018495843268673, "grad_norm": 0.02536765650686301, "learning_rate": 1.0977443609022558e-06, "loss": 0.0002, "step": 13994 }, { "epoch": 0.9019140297737965, "grad_norm": 0.04242994603591199, "learning_rate": 1.0970282849982098e-06, "loss": 0.0001, "step": 13995 }, { "epoch": 0.9019784752207256, "grad_norm": 0.007177177779843645, "learning_rate": 1.0963122090941641e-06, "loss": 0.0, "step": 13996 }, { "epoch": 0.9020429206676548, "grad_norm": 0.180822076514872, "learning_rate": 1.0955961331901182e-06, "loss": 0.002, "step": 13997 }, { "epoch": 0.902107366114584, "grad_norm": 0.0034121207623937933, "learning_rate": 1.0948800572860723e-06, "loss": 0.0, "step": 13998 }, { "epoch": 0.9021718115615132, "grad_norm": 0.004040904780187532, "learning_rate": 1.0941639813820266e-06, "loss": 0.0, "step": 13999 }, { "epoch": 0.9022362570084423, "grad_norm": 0.000310484319187804, "learning_rate": 1.0934479054779807e-06, "loss": 0.0, "step": 14000 }, { "epoch": 0.9023007024553715, "grad_norm": 0.01932705216887116, "learning_rate": 1.0927318295739348e-06, "loss": 0.0001, "step": 14001 }, { "epoch": 0.9023651479023007, "grad_norm": 0.0008629699703404948, "learning_rate": 1.092015753669889e-06, "loss": 0.0, "step": 14002 }, { "epoch": 0.9024295933492299, "grad_norm": 0.0003779540630065977, "learning_rate": 1.0912996777658432e-06, "loss": 0.0, "step": 14003 }, { "epoch": 0.9024940387961591, "grad_norm": 0.004033788037230716, "learning_rate": 1.0905836018617975e-06, "loss": 0.0, "step": 14004 }, { "epoch": 0.9025584842430883, "grad_norm": 0.00012087015154678254, "learning_rate": 1.0898675259577516e-06, "loss": 0.0, "step": 14005 }, { "epoch": 0.9026229296900175, "grad_norm": 0.0031684532813541473, "learning_rate": 1.0891514500537059e-06, "loss": 0.0, "step": 14006 }, { "epoch": 0.9026873751369465, "grad_norm": 0.7884746287853464, "learning_rate": 1.08843537414966e-06, "loss": 0.0014, "step": 14007 }, { "epoch": 0.9027518205838757, "grad_norm": 2.669712599619591e-05, "learning_rate": 1.0877192982456142e-06, "loss": 0.0, "step": 14008 }, { "epoch": 0.9028162660308049, "grad_norm": 0.0009535648759989387, "learning_rate": 1.0870032223415683e-06, "loss": 0.0, "step": 14009 }, { "epoch": 0.9028807114777341, "grad_norm": 0.002809913133161104, "learning_rate": 1.0862871464375224e-06, "loss": 0.0, "step": 14010 }, { "epoch": 0.9029451569246633, "grad_norm": 0.0012233205019003342, "learning_rate": 1.0855710705334767e-06, "loss": 0.0, "step": 14011 }, { "epoch": 0.9030096023715924, "grad_norm": 0.005354862179131867, "learning_rate": 1.0848549946294308e-06, "loss": 0.0, "step": 14012 }, { "epoch": 0.9030740478185216, "grad_norm": 0.0001383134804233665, "learning_rate": 1.0841389187253849e-06, "loss": 0.0, "step": 14013 }, { "epoch": 0.9031384932654508, "grad_norm": 0.0012410408689786621, "learning_rate": 1.0834228428213392e-06, "loss": 0.0, "step": 14014 }, { "epoch": 0.90320293871238, "grad_norm": 0.00015145828171334654, "learning_rate": 1.0827067669172933e-06, "loss": 0.0, "step": 14015 }, { "epoch": 0.9032673841593092, "grad_norm": 0.0029233848246608997, "learning_rate": 1.0819906910132474e-06, "loss": 0.0, "step": 14016 }, { "epoch": 0.9033318296062384, "grad_norm": 6.762058916841936e-06, "learning_rate": 1.0812746151092017e-06, "loss": 0.0, "step": 14017 }, { "epoch": 0.9033962750531674, "grad_norm": 0.14695861977500713, "learning_rate": 1.0805585392051558e-06, "loss": 0.0018, "step": 14018 }, { "epoch": 0.9034607205000966, "grad_norm": 0.0027973229930890954, "learning_rate": 1.07984246330111e-06, "loss": 0.0, "step": 14019 }, { "epoch": 0.9035251659470258, "grad_norm": 0.004426821608431362, "learning_rate": 1.0791263873970641e-06, "loss": 0.0, "step": 14020 }, { "epoch": 0.903589611393955, "grad_norm": 0.9546233422531074, "learning_rate": 1.0784103114930184e-06, "loss": 0.0039, "step": 14021 }, { "epoch": 0.9036540568408842, "grad_norm": 0.0015999861394946403, "learning_rate": 1.0776942355889725e-06, "loss": 0.0, "step": 14022 }, { "epoch": 0.9037185022878134, "grad_norm": 0.8040267676053892, "learning_rate": 1.0769781596849268e-06, "loss": 0.0073, "step": 14023 }, { "epoch": 0.9037829477347425, "grad_norm": 0.1496658198797121, "learning_rate": 1.076262083780881e-06, "loss": 0.0009, "step": 14024 }, { "epoch": 0.9038473931816717, "grad_norm": 0.00028597161914835925, "learning_rate": 1.075546007876835e-06, "loss": 0.0, "step": 14025 }, { "epoch": 0.9039118386286009, "grad_norm": 0.0010954895000369828, "learning_rate": 1.0748299319727893e-06, "loss": 0.0, "step": 14026 }, { "epoch": 0.9039762840755301, "grad_norm": 0.00033146946288850447, "learning_rate": 1.0741138560687434e-06, "loss": 0.0, "step": 14027 }, { "epoch": 0.9040407295224593, "grad_norm": 0.0071175550409161665, "learning_rate": 1.0733977801646975e-06, "loss": 0.0, "step": 14028 }, { "epoch": 0.9041051749693884, "grad_norm": 0.01369357303133858, "learning_rate": 1.0726817042606518e-06, "loss": 0.0, "step": 14029 }, { "epoch": 0.9041696204163175, "grad_norm": 0.009619704462363344, "learning_rate": 1.0719656283566059e-06, "loss": 0.0, "step": 14030 }, { "epoch": 0.9042340658632467, "grad_norm": 0.0007094879850284002, "learning_rate": 1.0712495524525602e-06, "loss": 0.0, "step": 14031 }, { "epoch": 0.9042985113101759, "grad_norm": 0.013750855588743177, "learning_rate": 1.0705334765485142e-06, "loss": 0.0001, "step": 14032 }, { "epoch": 0.9043629567571051, "grad_norm": 0.00027600823917411867, "learning_rate": 1.0698174006444683e-06, "loss": 0.0, "step": 14033 }, { "epoch": 0.9044274022040343, "grad_norm": 0.01503473339922653, "learning_rate": 1.0691013247404226e-06, "loss": 0.0, "step": 14034 }, { "epoch": 0.9044918476509635, "grad_norm": 2.9273438306325014e-05, "learning_rate": 1.0683852488363767e-06, "loss": 0.0, "step": 14035 }, { "epoch": 0.9045562930978926, "grad_norm": 0.001225609437622927, "learning_rate": 1.0676691729323308e-06, "loss": 0.0, "step": 14036 }, { "epoch": 0.9046207385448218, "grad_norm": 0.000198674450168846, "learning_rate": 1.066953097028285e-06, "loss": 0.0, "step": 14037 }, { "epoch": 0.904685183991751, "grad_norm": 0.0008732649309423083, "learning_rate": 1.0662370211242392e-06, "loss": 0.0, "step": 14038 }, { "epoch": 0.9047496294386802, "grad_norm": 8.002756539824641e-05, "learning_rate": 1.0655209452201935e-06, "loss": 0.0, "step": 14039 }, { "epoch": 0.9048140748856093, "grad_norm": 0.01401901919825385, "learning_rate": 1.0648048693161476e-06, "loss": 0.0, "step": 14040 }, { "epoch": 0.9048785203325385, "grad_norm": 0.014786355131894208, "learning_rate": 1.0640887934121019e-06, "loss": 0.0, "step": 14041 }, { "epoch": 0.9049429657794676, "grad_norm": 0.004576884247605246, "learning_rate": 1.063372717508056e-06, "loss": 0.0, "step": 14042 }, { "epoch": 0.9050074112263968, "grad_norm": 0.05576671370867512, "learning_rate": 1.0626566416040103e-06, "loss": 0.0016, "step": 14043 }, { "epoch": 0.905071856673326, "grad_norm": 0.49583190856322845, "learning_rate": 1.0619405656999643e-06, "loss": 0.0008, "step": 14044 }, { "epoch": 0.9051363021202552, "grad_norm": 0.003267573240087532, "learning_rate": 1.0612244897959184e-06, "loss": 0.0, "step": 14045 }, { "epoch": 0.9052007475671844, "grad_norm": 0.000656877303059541, "learning_rate": 1.0605084138918727e-06, "loss": 0.0, "step": 14046 }, { "epoch": 0.9052651930141136, "grad_norm": 0.39314117668237164, "learning_rate": 1.0597923379878268e-06, "loss": 0.0009, "step": 14047 }, { "epoch": 0.9053296384610428, "grad_norm": 0.000852336283270037, "learning_rate": 1.059076262083781e-06, "loss": 0.0, "step": 14048 }, { "epoch": 0.9053940839079719, "grad_norm": 0.00025226461633294403, "learning_rate": 1.0583601861797352e-06, "loss": 0.0, "step": 14049 }, { "epoch": 0.9054585293549011, "grad_norm": 9.980164147839169e-05, "learning_rate": 1.0576441102756893e-06, "loss": 0.0, "step": 14050 }, { "epoch": 0.9055229748018303, "grad_norm": 0.0008063489059367108, "learning_rate": 1.0569280343716434e-06, "loss": 0.0, "step": 14051 }, { "epoch": 0.9055874202487594, "grad_norm": 0.006412100452888351, "learning_rate": 1.0562119584675977e-06, "loss": 0.0001, "step": 14052 }, { "epoch": 0.9056518656956886, "grad_norm": 0.0010209184892873981, "learning_rate": 1.0554958825635518e-06, "loss": 0.0, "step": 14053 }, { "epoch": 0.9057163111426177, "grad_norm": 1.1038661163874564, "learning_rate": 1.0547798066595058e-06, "loss": 0.0033, "step": 14054 }, { "epoch": 0.9057807565895469, "grad_norm": 0.005365736820451497, "learning_rate": 1.0540637307554601e-06, "loss": 0.0, "step": 14055 }, { "epoch": 0.9058452020364761, "grad_norm": 0.0005929740637682495, "learning_rate": 1.0533476548514142e-06, "loss": 0.0, "step": 14056 }, { "epoch": 0.9059096474834053, "grad_norm": 0.0018838975597551804, "learning_rate": 1.0526315789473685e-06, "loss": 0.0, "step": 14057 }, { "epoch": 0.9059740929303345, "grad_norm": 0.00012933569279299136, "learning_rate": 1.0519155030433226e-06, "loss": 0.0, "step": 14058 }, { "epoch": 0.9060385383772637, "grad_norm": 0.0006639789205916543, "learning_rate": 1.051199427139277e-06, "loss": 0.0, "step": 14059 }, { "epoch": 0.9061029838241929, "grad_norm": 0.0002403311090842733, "learning_rate": 1.050483351235231e-06, "loss": 0.0, "step": 14060 }, { "epoch": 0.906167429271122, "grad_norm": 0.0005676848754295945, "learning_rate": 1.0497672753311853e-06, "loss": 0.0, "step": 14061 }, { "epoch": 0.9062318747180512, "grad_norm": 0.0001549777960638785, "learning_rate": 1.0490511994271394e-06, "loss": 0.0, "step": 14062 }, { "epoch": 0.9062963201649803, "grad_norm": 0.008584136048833318, "learning_rate": 1.0483351235230935e-06, "loss": 0.0001, "step": 14063 }, { "epoch": 0.9063607656119095, "grad_norm": 0.05943516934136947, "learning_rate": 1.0476190476190478e-06, "loss": 0.0001, "step": 14064 }, { "epoch": 0.9064252110588387, "grad_norm": 0.003992473211187499, "learning_rate": 1.0469029717150019e-06, "loss": 0.0, "step": 14065 }, { "epoch": 0.9064896565057678, "grad_norm": 0.0011531323035489564, "learning_rate": 1.0461868958109562e-06, "loss": 0.0, "step": 14066 }, { "epoch": 0.906554101952697, "grad_norm": 0.15580797256069312, "learning_rate": 1.0454708199069102e-06, "loss": 0.0003, "step": 14067 }, { "epoch": 0.9066185473996262, "grad_norm": 0.000671488923623265, "learning_rate": 1.0447547440028643e-06, "loss": 0.0, "step": 14068 }, { "epoch": 0.9066829928465554, "grad_norm": 0.144156905549645, "learning_rate": 1.0440386680988186e-06, "loss": 0.0005, "step": 14069 }, { "epoch": 0.9067474382934846, "grad_norm": 0.018324477936150364, "learning_rate": 1.0433225921947727e-06, "loss": 0.0001, "step": 14070 }, { "epoch": 0.9068118837404138, "grad_norm": 0.004034000423565145, "learning_rate": 1.0426065162907268e-06, "loss": 0.0, "step": 14071 }, { "epoch": 0.906876329187343, "grad_norm": 0.0016369275846041295, "learning_rate": 1.041890440386681e-06, "loss": 0.0, "step": 14072 }, { "epoch": 0.9069407746342721, "grad_norm": 0.02112292707345774, "learning_rate": 1.0411743644826352e-06, "loss": 0.0001, "step": 14073 }, { "epoch": 0.9070052200812012, "grad_norm": 0.0006955568561556969, "learning_rate": 1.0404582885785893e-06, "loss": 0.0, "step": 14074 }, { "epoch": 0.9070696655281304, "grad_norm": 0.4678265376205017, "learning_rate": 1.0397422126745436e-06, "loss": 0.0008, "step": 14075 }, { "epoch": 0.9071341109750596, "grad_norm": 1.7344620141548626, "learning_rate": 1.0390261367704977e-06, "loss": 0.0136, "step": 14076 }, { "epoch": 0.9071985564219888, "grad_norm": 0.020784053798210578, "learning_rate": 1.038310060866452e-06, "loss": 0.0002, "step": 14077 }, { "epoch": 0.907263001868918, "grad_norm": 0.3764581147815863, "learning_rate": 1.037593984962406e-06, "loss": 0.0031, "step": 14078 }, { "epoch": 0.9073274473158471, "grad_norm": 0.015363784519826553, "learning_rate": 1.0368779090583603e-06, "loss": 0.0, "step": 14079 }, { "epoch": 0.9073918927627763, "grad_norm": 9.048272690266486e-05, "learning_rate": 1.0361618331543144e-06, "loss": 0.0, "step": 14080 }, { "epoch": 0.9074563382097055, "grad_norm": 0.4679583858813428, "learning_rate": 1.0354457572502687e-06, "loss": 0.0033, "step": 14081 }, { "epoch": 0.9075207836566347, "grad_norm": 0.6642780402944453, "learning_rate": 1.0347296813462228e-06, "loss": 0.0019, "step": 14082 }, { "epoch": 0.9075852291035639, "grad_norm": 0.0001042014709199235, "learning_rate": 1.034013605442177e-06, "loss": 0.0, "step": 14083 }, { "epoch": 0.9076496745504931, "grad_norm": 0.001710960297502053, "learning_rate": 1.0332975295381312e-06, "loss": 0.0, "step": 14084 }, { "epoch": 0.9077141199974221, "grad_norm": 0.008412103343040999, "learning_rate": 1.0325814536340853e-06, "loss": 0.0, "step": 14085 }, { "epoch": 0.9077785654443513, "grad_norm": 0.0004960663945197895, "learning_rate": 1.0318653777300394e-06, "loss": 0.0, "step": 14086 }, { "epoch": 0.9078430108912805, "grad_norm": 0.0005570506348725557, "learning_rate": 1.0311493018259937e-06, "loss": 0.0, "step": 14087 }, { "epoch": 0.9079074563382097, "grad_norm": 6.0842231279498473e-05, "learning_rate": 1.0304332259219478e-06, "loss": 0.0, "step": 14088 }, { "epoch": 0.9079719017851389, "grad_norm": 0.005005108687200161, "learning_rate": 1.0297171500179019e-06, "loss": 0.0, "step": 14089 }, { "epoch": 0.908036347232068, "grad_norm": 0.006442428525242369, "learning_rate": 1.0290010741138562e-06, "loss": 0.0, "step": 14090 }, { "epoch": 0.9081007926789972, "grad_norm": 0.1994985317478551, "learning_rate": 1.0282849982098102e-06, "loss": 0.0003, "step": 14091 }, { "epoch": 0.9081652381259264, "grad_norm": 0.000303360637107461, "learning_rate": 1.0275689223057645e-06, "loss": 0.0, "step": 14092 }, { "epoch": 0.9082296835728556, "grad_norm": 0.08736249878582208, "learning_rate": 1.0268528464017186e-06, "loss": 0.0003, "step": 14093 }, { "epoch": 0.9082941290197848, "grad_norm": 0.0004485341202632126, "learning_rate": 1.026136770497673e-06, "loss": 0.0, "step": 14094 }, { "epoch": 0.908358574466714, "grad_norm": 0.008703583972100429, "learning_rate": 1.025420694593627e-06, "loss": 0.0, "step": 14095 }, { "epoch": 0.908423019913643, "grad_norm": 0.0042350023156911215, "learning_rate": 1.0247046186895813e-06, "loss": 0.0, "step": 14096 }, { "epoch": 0.9084874653605722, "grad_norm": 0.0005834744678831924, "learning_rate": 1.0239885427855354e-06, "loss": 0.0, "step": 14097 }, { "epoch": 0.9085519108075014, "grad_norm": 0.00024838361865375634, "learning_rate": 1.0232724668814895e-06, "loss": 0.0, "step": 14098 }, { "epoch": 0.9086163562544306, "grad_norm": 0.0010090785760577745, "learning_rate": 1.0225563909774438e-06, "loss": 0.0, "step": 14099 }, { "epoch": 0.9086808017013598, "grad_norm": 0.00846511578975414, "learning_rate": 1.0218403150733979e-06, "loss": 0.0, "step": 14100 }, { "epoch": 0.908745247148289, "grad_norm": 0.003092826092203375, "learning_rate": 1.021124239169352e-06, "loss": 0.0, "step": 14101 }, { "epoch": 0.9088096925952182, "grad_norm": 0.004059035298200513, "learning_rate": 1.0204081632653063e-06, "loss": 0.0, "step": 14102 }, { "epoch": 0.9088741380421473, "grad_norm": 0.031057954187540666, "learning_rate": 1.0196920873612603e-06, "loss": 0.0002, "step": 14103 }, { "epoch": 0.9089385834890765, "grad_norm": 0.007398061002103076, "learning_rate": 1.0189760114572146e-06, "loss": 0.0, "step": 14104 }, { "epoch": 0.9090030289360057, "grad_norm": 0.0037488336929485094, "learning_rate": 1.0182599355531687e-06, "loss": 0.0, "step": 14105 }, { "epoch": 0.9090674743829349, "grad_norm": 0.0013617477461019352, "learning_rate": 1.0175438596491228e-06, "loss": 0.0, "step": 14106 }, { "epoch": 0.909131919829864, "grad_norm": 0.0011576876094899014, "learning_rate": 1.0168277837450771e-06, "loss": 0.0, "step": 14107 }, { "epoch": 0.9091963652767932, "grad_norm": 0.012976751798772565, "learning_rate": 1.0161117078410312e-06, "loss": 0.0, "step": 14108 }, { "epoch": 0.9092608107237223, "grad_norm": 0.0016032709037401551, "learning_rate": 1.0153956319369853e-06, "loss": 0.0, "step": 14109 }, { "epoch": 0.9093252561706515, "grad_norm": 0.0405984077396907, "learning_rate": 1.0146795560329396e-06, "loss": 0.0, "step": 14110 }, { "epoch": 0.9093897016175807, "grad_norm": 9.815901939114581e-05, "learning_rate": 1.0139634801288937e-06, "loss": 0.0, "step": 14111 }, { "epoch": 0.9094541470645099, "grad_norm": 0.0007421945859217288, "learning_rate": 1.013247404224848e-06, "loss": 0.0, "step": 14112 }, { "epoch": 0.9095185925114391, "grad_norm": 0.0044882259964135664, "learning_rate": 1.012531328320802e-06, "loss": 0.0, "step": 14113 }, { "epoch": 0.9095830379583683, "grad_norm": 0.37177406992972634, "learning_rate": 1.0118152524167564e-06, "loss": 0.0012, "step": 14114 }, { "epoch": 0.9096474834052974, "grad_norm": 0.0072924378929102186, "learning_rate": 1.0110991765127104e-06, "loss": 0.0, "step": 14115 }, { "epoch": 0.9097119288522266, "grad_norm": 0.0006270749704708487, "learning_rate": 1.0103831006086647e-06, "loss": 0.0, "step": 14116 }, { "epoch": 0.9097763742991558, "grad_norm": 0.006042327240725415, "learning_rate": 1.0096670247046188e-06, "loss": 0.0, "step": 14117 }, { "epoch": 0.909840819746085, "grad_norm": 0.00024216543431723143, "learning_rate": 1.008950948800573e-06, "loss": 0.0, "step": 14118 }, { "epoch": 0.9099052651930141, "grad_norm": 0.11918004743883662, "learning_rate": 1.0082348728965272e-06, "loss": 0.0003, "step": 14119 }, { "epoch": 0.9099697106399433, "grad_norm": 0.007700771472075251, "learning_rate": 1.0075187969924813e-06, "loss": 0.0, "step": 14120 }, { "epoch": 0.9100341560868724, "grad_norm": 0.001777663954785876, "learning_rate": 1.0068027210884354e-06, "loss": 0.0, "step": 14121 }, { "epoch": 0.9100986015338016, "grad_norm": 0.004718985009600268, "learning_rate": 1.0060866451843897e-06, "loss": 0.0, "step": 14122 }, { "epoch": 0.9101630469807308, "grad_norm": 0.006394187944082488, "learning_rate": 1.0053705692803438e-06, "loss": 0.0, "step": 14123 }, { "epoch": 0.91022749242766, "grad_norm": 0.04192140346511436, "learning_rate": 1.0046544933762979e-06, "loss": 0.0001, "step": 14124 }, { "epoch": 0.9102919378745892, "grad_norm": 0.020396504587853657, "learning_rate": 1.0039384174722522e-06, "loss": 0.0001, "step": 14125 }, { "epoch": 0.9103563833215184, "grad_norm": 9.335500306033411e-05, "learning_rate": 1.0032223415682062e-06, "loss": 0.0, "step": 14126 }, { "epoch": 0.9104208287684475, "grad_norm": 0.001052229329982989, "learning_rate": 1.0025062656641603e-06, "loss": 0.0, "step": 14127 }, { "epoch": 0.9104852742153767, "grad_norm": 0.00044013625982259485, "learning_rate": 1.0017901897601146e-06, "loss": 0.0, "step": 14128 }, { "epoch": 0.9105497196623059, "grad_norm": 0.07613235756810091, "learning_rate": 1.0010741138560687e-06, "loss": 0.0008, "step": 14129 }, { "epoch": 0.910614165109235, "grad_norm": 0.0015174921341569256, "learning_rate": 1.000358037952023e-06, "loss": 0.0, "step": 14130 }, { "epoch": 0.9106786105561642, "grad_norm": 8.869528142380125e-05, "learning_rate": 9.99641962047977e-07, "loss": 0.0, "step": 14131 }, { "epoch": 0.9107430560030934, "grad_norm": 0.0018528149454641407, "learning_rate": 9.989258861439314e-07, "loss": 0.0, "step": 14132 }, { "epoch": 0.9108075014500225, "grad_norm": 0.0013100437281134071, "learning_rate": 9.982098102398855e-07, "loss": 0.0, "step": 14133 }, { "epoch": 0.9108719468969517, "grad_norm": 0.0038834947242926132, "learning_rate": 9.974937343358398e-07, "loss": 0.0, "step": 14134 }, { "epoch": 0.9109363923438809, "grad_norm": 0.003056712747435566, "learning_rate": 9.967776584317939e-07, "loss": 0.0, "step": 14135 }, { "epoch": 0.9110008377908101, "grad_norm": 0.022727158280140002, "learning_rate": 9.96061582527748e-07, "loss": 0.0001, "step": 14136 }, { "epoch": 0.9110652832377393, "grad_norm": 0.000252373752242693, "learning_rate": 9.953455066237023e-07, "loss": 0.0, "step": 14137 }, { "epoch": 0.9111297286846685, "grad_norm": 0.005239760950976765, "learning_rate": 9.946294307196563e-07, "loss": 0.0, "step": 14138 }, { "epoch": 0.9111941741315976, "grad_norm": 0.07205721082201468, "learning_rate": 9.939133548156106e-07, "loss": 0.0001, "step": 14139 }, { "epoch": 0.9112586195785268, "grad_norm": 0.001012468350364244, "learning_rate": 9.931972789115647e-07, "loss": 0.0, "step": 14140 }, { "epoch": 0.9113230650254559, "grad_norm": 0.020371513483527344, "learning_rate": 9.924812030075188e-07, "loss": 0.0001, "step": 14141 }, { "epoch": 0.9113875104723851, "grad_norm": 0.5710255593894316, "learning_rate": 9.917651271034731e-07, "loss": 0.0038, "step": 14142 }, { "epoch": 0.9114519559193143, "grad_norm": 0.003678925592998401, "learning_rate": 9.910490511994272e-07, "loss": 0.0, "step": 14143 }, { "epoch": 0.9115164013662435, "grad_norm": 0.0010209924279294914, "learning_rate": 9.903329752953813e-07, "loss": 0.0, "step": 14144 }, { "epoch": 0.9115808468131726, "grad_norm": 0.002210604517047804, "learning_rate": 9.896168993913356e-07, "loss": 0.0, "step": 14145 }, { "epoch": 0.9116452922601018, "grad_norm": 0.004980856990466322, "learning_rate": 9.889008234872897e-07, "loss": 0.0, "step": 14146 }, { "epoch": 0.911709737707031, "grad_norm": 0.005197732728780156, "learning_rate": 9.881847475832438e-07, "loss": 0.0, "step": 14147 }, { "epoch": 0.9117741831539602, "grad_norm": 0.0006288855310315486, "learning_rate": 9.87468671679198e-07, "loss": 0.0, "step": 14148 }, { "epoch": 0.9118386286008894, "grad_norm": 0.2870331747440102, "learning_rate": 9.867525957751522e-07, "loss": 0.0006, "step": 14149 }, { "epoch": 0.9119030740478186, "grad_norm": 0.009226923949112864, "learning_rate": 9.860365198711065e-07, "loss": 0.0, "step": 14150 }, { "epoch": 0.9119675194947477, "grad_norm": 0.00021664613314001927, "learning_rate": 9.853204439670605e-07, "loss": 0.0, "step": 14151 }, { "epoch": 0.9120319649416768, "grad_norm": 0.07574482256106105, "learning_rate": 9.846043680630148e-07, "loss": 0.0001, "step": 14152 }, { "epoch": 0.912096410388606, "grad_norm": 0.0005642558123564372, "learning_rate": 9.83888292158969e-07, "loss": 0.0, "step": 14153 }, { "epoch": 0.9121608558355352, "grad_norm": 0.00046067530354494283, "learning_rate": 9.831722162549232e-07, "loss": 0.0, "step": 14154 }, { "epoch": 0.9122253012824644, "grad_norm": 0.0019799729249998076, "learning_rate": 9.824561403508773e-07, "loss": 0.0, "step": 14155 }, { "epoch": 0.9122897467293936, "grad_norm": 0.0037062705460312938, "learning_rate": 9.817400644468314e-07, "loss": 0.0, "step": 14156 }, { "epoch": 0.9123541921763227, "grad_norm": 0.21694178506613537, "learning_rate": 9.810239885427857e-07, "loss": 0.0012, "step": 14157 }, { "epoch": 0.9124186376232519, "grad_norm": 2.7329962563106694e-05, "learning_rate": 9.803079126387398e-07, "loss": 0.0, "step": 14158 }, { "epoch": 0.9124830830701811, "grad_norm": 0.017096042360411064, "learning_rate": 9.795918367346939e-07, "loss": 0.0001, "step": 14159 }, { "epoch": 0.9125475285171103, "grad_norm": 0.0022227801850019, "learning_rate": 9.788757608306482e-07, "loss": 0.0, "step": 14160 }, { "epoch": 0.9126119739640395, "grad_norm": 0.00022310574290482324, "learning_rate": 9.781596849266023e-07, "loss": 0.0, "step": 14161 }, { "epoch": 0.9126764194109687, "grad_norm": 0.0001928504205817919, "learning_rate": 9.774436090225563e-07, "loss": 0.0, "step": 14162 }, { "epoch": 0.9127408648578977, "grad_norm": 0.0003620854373529105, "learning_rate": 9.767275331185106e-07, "loss": 0.0, "step": 14163 }, { "epoch": 0.9128053103048269, "grad_norm": 0.0003246124005665155, "learning_rate": 9.760114572144647e-07, "loss": 0.0, "step": 14164 }, { "epoch": 0.9128697557517561, "grad_norm": 0.00037746760821441995, "learning_rate": 9.75295381310419e-07, "loss": 0.0, "step": 14165 }, { "epoch": 0.9129342011986853, "grad_norm": 0.0021465206942596557, "learning_rate": 9.745793054063731e-07, "loss": 0.0, "step": 14166 }, { "epoch": 0.9129986466456145, "grad_norm": 0.009832061962162885, "learning_rate": 9.738632295023274e-07, "loss": 0.0, "step": 14167 }, { "epoch": 0.9130630920925437, "grad_norm": 0.011101602739312185, "learning_rate": 9.731471535982815e-07, "loss": 0.0001, "step": 14168 }, { "epoch": 0.9131275375394728, "grad_norm": 0.002187157704356501, "learning_rate": 9.724310776942358e-07, "loss": 0.0, "step": 14169 }, { "epoch": 0.913191982986402, "grad_norm": 0.01536422494149394, "learning_rate": 9.717150017901899e-07, "loss": 0.0001, "step": 14170 }, { "epoch": 0.9132564284333312, "grad_norm": 0.14876547142920768, "learning_rate": 9.70998925886144e-07, "loss": 0.0003, "step": 14171 }, { "epoch": 0.9133208738802604, "grad_norm": 0.0063150968108142535, "learning_rate": 9.702828499820983e-07, "loss": 0.0001, "step": 14172 }, { "epoch": 0.9133853193271896, "grad_norm": 0.0011484692990875677, "learning_rate": 9.695667740780524e-07, "loss": 0.0, "step": 14173 }, { "epoch": 0.9134497647741187, "grad_norm": 0.00021408494086622607, "learning_rate": 9.688506981740067e-07, "loss": 0.0, "step": 14174 }, { "epoch": 0.9135142102210478, "grad_norm": 0.005474376006443933, "learning_rate": 9.681346222699607e-07, "loss": 0.0, "step": 14175 }, { "epoch": 0.913578655667977, "grad_norm": 0.0009916894050175195, "learning_rate": 9.674185463659148e-07, "loss": 0.0, "step": 14176 }, { "epoch": 0.9136431011149062, "grad_norm": 0.004285597191006188, "learning_rate": 9.667024704618691e-07, "loss": 0.0, "step": 14177 }, { "epoch": 0.9137075465618354, "grad_norm": 6.016757867062615e-05, "learning_rate": 9.659863945578232e-07, "loss": 0.0, "step": 14178 }, { "epoch": 0.9137719920087646, "grad_norm": 0.00013869091499201224, "learning_rate": 9.652703186537773e-07, "loss": 0.0, "step": 14179 }, { "epoch": 0.9138364374556938, "grad_norm": 0.008156751396045343, "learning_rate": 9.645542427497316e-07, "loss": 0.0001, "step": 14180 }, { "epoch": 0.913900882902623, "grad_norm": 0.0010459882012239957, "learning_rate": 9.638381668456857e-07, "loss": 0.0, "step": 14181 }, { "epoch": 0.9139653283495521, "grad_norm": 0.05334048614408453, "learning_rate": 9.631220909416398e-07, "loss": 0.0001, "step": 14182 }, { "epoch": 0.9140297737964813, "grad_norm": 0.0015840045943365014, "learning_rate": 9.62406015037594e-07, "loss": 0.0, "step": 14183 }, { "epoch": 0.9140942192434105, "grad_norm": 0.7855814830833372, "learning_rate": 9.616899391335482e-07, "loss": 0.0021, "step": 14184 }, { "epoch": 0.9141586646903396, "grad_norm": 0.00020580164141785254, "learning_rate": 9.609738632295025e-07, "loss": 0.0, "step": 14185 }, { "epoch": 0.9142231101372688, "grad_norm": 0.0011450973071098205, "learning_rate": 9.602577873254565e-07, "loss": 0.0, "step": 14186 }, { "epoch": 0.9142875555841979, "grad_norm": 0.008593922428411569, "learning_rate": 9.595417114214108e-07, "loss": 0.0, "step": 14187 }, { "epoch": 0.9143520010311271, "grad_norm": 0.0016867673517670938, "learning_rate": 9.58825635517365e-07, "loss": 0.0, "step": 14188 }, { "epoch": 0.9144164464780563, "grad_norm": 0.0015443399622426112, "learning_rate": 9.581095596133192e-07, "loss": 0.0, "step": 14189 }, { "epoch": 0.9144808919249855, "grad_norm": 0.02063164477752474, "learning_rate": 9.573934837092733e-07, "loss": 0.0001, "step": 14190 }, { "epoch": 0.9145453373719147, "grad_norm": 0.0024049854891011677, "learning_rate": 9.566774078052274e-07, "loss": 0.0, "step": 14191 }, { "epoch": 0.9146097828188439, "grad_norm": 0.01716273295228107, "learning_rate": 9.559613319011817e-07, "loss": 0.0002, "step": 14192 }, { "epoch": 0.914674228265773, "grad_norm": 0.12328758990372396, "learning_rate": 9.552452559971358e-07, "loss": 0.0012, "step": 14193 }, { "epoch": 0.9147386737127022, "grad_norm": 0.04222494037999026, "learning_rate": 9.545291800930899e-07, "loss": 0.0, "step": 14194 }, { "epoch": 0.9148031191596314, "grad_norm": 0.0006967093892120433, "learning_rate": 9.538131041890442e-07, "loss": 0.0, "step": 14195 }, { "epoch": 0.9148675646065606, "grad_norm": 0.10183633970320935, "learning_rate": 9.530970282849983e-07, "loss": 0.0014, "step": 14196 }, { "epoch": 0.9149320100534897, "grad_norm": 0.04194055060462203, "learning_rate": 9.523809523809525e-07, "loss": 0.0017, "step": 14197 }, { "epoch": 0.9149964555004189, "grad_norm": 0.2690406148430233, "learning_rate": 9.516648764769066e-07, "loss": 0.0005, "step": 14198 }, { "epoch": 0.915060900947348, "grad_norm": 0.00021367249271819308, "learning_rate": 9.509488005728608e-07, "loss": 0.0, "step": 14199 }, { "epoch": 0.9151253463942772, "grad_norm": 0.0014708756146450935, "learning_rate": 9.502327246688149e-07, "loss": 0.0, "step": 14200 }, { "epoch": 0.9151897918412064, "grad_norm": 0.0002496282942443766, "learning_rate": 9.495166487647692e-07, "loss": 0.0, "step": 14201 }, { "epoch": 0.9152542372881356, "grad_norm": 0.016152459797736374, "learning_rate": 9.488005728607233e-07, "loss": 0.0, "step": 14202 }, { "epoch": 0.9153186827350648, "grad_norm": 0.010979395241874716, "learning_rate": 9.480844969566774e-07, "loss": 0.0, "step": 14203 }, { "epoch": 0.915383128181994, "grad_norm": 0.0009375031145412428, "learning_rate": 9.473684210526317e-07, "loss": 0.0, "step": 14204 }, { "epoch": 0.9154475736289232, "grad_norm": 0.01797304484362447, "learning_rate": 9.466523451485858e-07, "loss": 0.0001, "step": 14205 }, { "epoch": 0.9155120190758523, "grad_norm": 0.0002960165775091309, "learning_rate": 9.4593626924454e-07, "loss": 0.0, "step": 14206 }, { "epoch": 0.9155764645227815, "grad_norm": 0.00013216728878975067, "learning_rate": 9.452201933404942e-07, "loss": 0.0, "step": 14207 }, { "epoch": 0.9156409099697106, "grad_norm": 0.0016359036208272025, "learning_rate": 9.445041174364484e-07, "loss": 0.0, "step": 14208 }, { "epoch": 0.9157053554166398, "grad_norm": 0.002744799936497066, "learning_rate": 9.437880415324025e-07, "loss": 0.0, "step": 14209 }, { "epoch": 0.915769800863569, "grad_norm": 0.00023160867892980746, "learning_rate": 9.430719656283567e-07, "loss": 0.0, "step": 14210 }, { "epoch": 0.9158342463104981, "grad_norm": 0.009411741817506578, "learning_rate": 9.423558897243108e-07, "loss": 0.0, "step": 14211 }, { "epoch": 0.9158986917574273, "grad_norm": 0.001378941707229188, "learning_rate": 9.416398138202651e-07, "loss": 0.0, "step": 14212 }, { "epoch": 0.9159631372043565, "grad_norm": 0.025205632894264283, "learning_rate": 9.409237379162192e-07, "loss": 0.0001, "step": 14213 }, { "epoch": 0.9160275826512857, "grad_norm": 0.39096405573360754, "learning_rate": 9.402076620121733e-07, "loss": 0.001, "step": 14214 }, { "epoch": 0.9160920280982149, "grad_norm": 0.00014380302746555912, "learning_rate": 9.394915861081276e-07, "loss": 0.0, "step": 14215 }, { "epoch": 0.9161564735451441, "grad_norm": 0.002670300688032658, "learning_rate": 9.387755102040817e-07, "loss": 0.0, "step": 14216 }, { "epoch": 0.9162209189920733, "grad_norm": 0.11628580086246942, "learning_rate": 9.380594343000359e-07, "loss": 0.0004, "step": 14217 }, { "epoch": 0.9162853644390024, "grad_norm": 0.0002583038235821294, "learning_rate": 9.373433583959901e-07, "loss": 0.0, "step": 14218 }, { "epoch": 0.9163498098859315, "grad_norm": 0.022131033830474392, "learning_rate": 9.366272824919443e-07, "loss": 0.0, "step": 14219 }, { "epoch": 0.9164142553328607, "grad_norm": 0.0015146300558742055, "learning_rate": 9.359112065878984e-07, "loss": 0.0, "step": 14220 }, { "epoch": 0.9164787007797899, "grad_norm": 4.8640373473175795e-05, "learning_rate": 9.351951306838527e-07, "loss": 0.0, "step": 14221 }, { "epoch": 0.9165431462267191, "grad_norm": 0.4697828516869718, "learning_rate": 9.344790547798067e-07, "loss": 0.003, "step": 14222 }, { "epoch": 0.9166075916736482, "grad_norm": 0.057128720813026404, "learning_rate": 9.337629788757608e-07, "loss": 0.0016, "step": 14223 }, { "epoch": 0.9166720371205774, "grad_norm": 0.03367085797727787, "learning_rate": 9.330469029717151e-07, "loss": 0.0001, "step": 14224 }, { "epoch": 0.9167364825675066, "grad_norm": 0.0030701623211639023, "learning_rate": 9.323308270676692e-07, "loss": 0.0, "step": 14225 }, { "epoch": 0.9168009280144358, "grad_norm": 0.0019137412471503506, "learning_rate": 9.316147511636234e-07, "loss": 0.0, "step": 14226 }, { "epoch": 0.916865373461365, "grad_norm": 0.0006687030619872437, "learning_rate": 9.308986752595776e-07, "loss": 0.0, "step": 14227 }, { "epoch": 0.9169298189082942, "grad_norm": 1.7368992487315262, "learning_rate": 9.301825993555318e-07, "loss": 0.0119, "step": 14228 }, { "epoch": 0.9169942643552234, "grad_norm": 0.0003233422494308578, "learning_rate": 9.294665234514859e-07, "loss": 0.0, "step": 14229 }, { "epoch": 0.9170587098021524, "grad_norm": 0.0069097439182549385, "learning_rate": 9.287504475474402e-07, "loss": 0.0, "step": 14230 }, { "epoch": 0.9171231552490816, "grad_norm": 0.0029392214582604566, "learning_rate": 9.280343716433943e-07, "loss": 0.0, "step": 14231 }, { "epoch": 0.9171876006960108, "grad_norm": 0.0067482743978787415, "learning_rate": 9.273182957393484e-07, "loss": 0.0, "step": 14232 }, { "epoch": 0.91725204614294, "grad_norm": 0.0020425040118576547, "learning_rate": 9.266022198353027e-07, "loss": 0.0, "step": 14233 }, { "epoch": 0.9173164915898692, "grad_norm": 0.043513331215841396, "learning_rate": 9.258861439312567e-07, "loss": 0.0001, "step": 14234 }, { "epoch": 0.9173809370367983, "grad_norm": 0.0005674751602963122, "learning_rate": 9.251700680272109e-07, "loss": 0.0, "step": 14235 }, { "epoch": 0.9174453824837275, "grad_norm": 0.00017620281295303855, "learning_rate": 9.244539921231651e-07, "loss": 0.0, "step": 14236 }, { "epoch": 0.9175098279306567, "grad_norm": 0.001052507181908841, "learning_rate": 9.237379162191193e-07, "loss": 0.0, "step": 14237 }, { "epoch": 0.9175742733775859, "grad_norm": 0.14251254807613536, "learning_rate": 9.230218403150734e-07, "loss": 0.0019, "step": 14238 }, { "epoch": 0.9176387188245151, "grad_norm": 0.0016286015558987282, "learning_rate": 9.223057644110277e-07, "loss": 0.0, "step": 14239 }, { "epoch": 0.9177031642714443, "grad_norm": 0.00010581374894291006, "learning_rate": 9.215896885069818e-07, "loss": 0.0, "step": 14240 }, { "epoch": 0.9177676097183733, "grad_norm": 0.0015797488393140531, "learning_rate": 9.208736126029359e-07, "loss": 0.0, "step": 14241 }, { "epoch": 0.9178320551653025, "grad_norm": 0.0008123762794186068, "learning_rate": 9.201575366988902e-07, "loss": 0.0, "step": 14242 }, { "epoch": 0.9178965006122317, "grad_norm": 0.008421636058342962, "learning_rate": 9.194414607948443e-07, "loss": 0.0, "step": 14243 }, { "epoch": 0.9179609460591609, "grad_norm": 0.000672241911371534, "learning_rate": 9.187253848907985e-07, "loss": 0.0, "step": 14244 }, { "epoch": 0.9180253915060901, "grad_norm": 0.0005130752034765722, "learning_rate": 9.180093089867527e-07, "loss": 0.0, "step": 14245 }, { "epoch": 0.9180898369530193, "grad_norm": 6.957164338496054e-05, "learning_rate": 9.172932330827068e-07, "loss": 0.0, "step": 14246 }, { "epoch": 0.9181542823999485, "grad_norm": 0.004817899785118754, "learning_rate": 9.16577157178661e-07, "loss": 0.0, "step": 14247 }, { "epoch": 0.9182187278468776, "grad_norm": 0.0004698135349077357, "learning_rate": 9.158610812746152e-07, "loss": 0.0, "step": 14248 }, { "epoch": 0.9182831732938068, "grad_norm": 0.022730519540926336, "learning_rate": 9.151450053705693e-07, "loss": 0.0002, "step": 14249 }, { "epoch": 0.918347618740736, "grad_norm": 0.0039667025926992784, "learning_rate": 9.144289294665236e-07, "loss": 0.0, "step": 14250 }, { "epoch": 0.9184120641876652, "grad_norm": 0.002694283624502539, "learning_rate": 9.137128535624777e-07, "loss": 0.0, "step": 14251 }, { "epoch": 0.9184765096345943, "grad_norm": 0.012668603875843228, "learning_rate": 9.129967776584318e-07, "loss": 0.0001, "step": 14252 }, { "epoch": 0.9185409550815234, "grad_norm": 0.00016267248176073663, "learning_rate": 9.122807017543861e-07, "loss": 0.0, "step": 14253 }, { "epoch": 0.9186054005284526, "grad_norm": 0.16982769822718005, "learning_rate": 9.115646258503402e-07, "loss": 0.0005, "step": 14254 }, { "epoch": 0.9186698459753818, "grad_norm": 0.0004709426465504047, "learning_rate": 9.108485499462944e-07, "loss": 0.0, "step": 14255 }, { "epoch": 0.918734291422311, "grad_norm": 0.0028317174475590155, "learning_rate": 9.101324740422486e-07, "loss": 0.0, "step": 14256 }, { "epoch": 0.9187987368692402, "grad_norm": 0.00039515144590099115, "learning_rate": 9.094163981382028e-07, "loss": 0.0, "step": 14257 }, { "epoch": 0.9188631823161694, "grad_norm": 0.034502345692150146, "learning_rate": 9.087003222341568e-07, "loss": 0.0, "step": 14258 }, { "epoch": 0.9189276277630986, "grad_norm": 0.013624407437962447, "learning_rate": 9.079842463301111e-07, "loss": 0.0001, "step": 14259 }, { "epoch": 0.9189920732100277, "grad_norm": 0.31261428888863013, "learning_rate": 9.072681704260652e-07, "loss": 0.0011, "step": 14260 }, { "epoch": 0.9190565186569569, "grad_norm": 0.27199733765961076, "learning_rate": 9.065520945220194e-07, "loss": 0.0005, "step": 14261 }, { "epoch": 0.9191209641038861, "grad_norm": 0.04721813969246109, "learning_rate": 9.058360186179736e-07, "loss": 0.0016, "step": 14262 }, { "epoch": 0.9191854095508152, "grad_norm": 0.0005272377698462465, "learning_rate": 9.051199427139278e-07, "loss": 0.0, "step": 14263 }, { "epoch": 0.9192498549977444, "grad_norm": 0.003632628093650156, "learning_rate": 9.044038668098819e-07, "loss": 0.0, "step": 14264 }, { "epoch": 0.9193143004446735, "grad_norm": 0.08249185312389225, "learning_rate": 9.036877909058362e-07, "loss": 0.0001, "step": 14265 }, { "epoch": 0.9193787458916027, "grad_norm": 0.01769209298130543, "learning_rate": 9.029717150017903e-07, "loss": 0.0001, "step": 14266 }, { "epoch": 0.9194431913385319, "grad_norm": 0.007572932169365242, "learning_rate": 9.022556390977444e-07, "loss": 0.0, "step": 14267 }, { "epoch": 0.9195076367854611, "grad_norm": 0.0006129075108329983, "learning_rate": 9.015395631936987e-07, "loss": 0.0, "step": 14268 }, { "epoch": 0.9195720822323903, "grad_norm": 0.15367907565903965, "learning_rate": 9.008234872896527e-07, "loss": 0.0019, "step": 14269 }, { "epoch": 0.9196365276793195, "grad_norm": 0.00012896780691753785, "learning_rate": 9.001074113856069e-07, "loss": 0.0, "step": 14270 }, { "epoch": 0.9197009731262487, "grad_norm": 0.04012686418183218, "learning_rate": 8.993913354815611e-07, "loss": 0.0001, "step": 14271 }, { "epoch": 0.9197654185731778, "grad_norm": 0.009376539846461099, "learning_rate": 8.986752595775153e-07, "loss": 0.0, "step": 14272 }, { "epoch": 0.919829864020107, "grad_norm": 0.10376941411654185, "learning_rate": 8.979591836734694e-07, "loss": 0.0001, "step": 14273 }, { "epoch": 0.9198943094670362, "grad_norm": 0.004417011550164127, "learning_rate": 8.972431077694237e-07, "loss": 0.0, "step": 14274 }, { "epoch": 0.9199587549139653, "grad_norm": 0.0010578230684077303, "learning_rate": 8.965270318653778e-07, "loss": 0.0, "step": 14275 }, { "epoch": 0.9200232003608945, "grad_norm": 0.005991646645697451, "learning_rate": 8.958109559613319e-07, "loss": 0.0, "step": 14276 }, { "epoch": 0.9200876458078237, "grad_norm": 0.0013921188764913148, "learning_rate": 8.950948800572862e-07, "loss": 0.0, "step": 14277 }, { "epoch": 0.9201520912547528, "grad_norm": 0.003431294612354495, "learning_rate": 8.943788041532403e-07, "loss": 0.0, "step": 14278 }, { "epoch": 0.920216536701682, "grad_norm": 0.0009395657251499541, "learning_rate": 8.936627282491945e-07, "loss": 0.0, "step": 14279 }, { "epoch": 0.9202809821486112, "grad_norm": 0.21383604058028396, "learning_rate": 8.929466523451487e-07, "loss": 0.0007, "step": 14280 }, { "epoch": 0.9203454275955404, "grad_norm": 0.07478550921576256, "learning_rate": 8.922305764411029e-07, "loss": 0.0002, "step": 14281 }, { "epoch": 0.9204098730424696, "grad_norm": 0.0018806258634827169, "learning_rate": 8.915145005370569e-07, "loss": 0.0, "step": 14282 }, { "epoch": 0.9204743184893988, "grad_norm": 0.0006642417777200569, "learning_rate": 8.907984246330112e-07, "loss": 0.0, "step": 14283 }, { "epoch": 0.9205387639363279, "grad_norm": 0.033905118569600774, "learning_rate": 8.900823487289653e-07, "loss": 0.0001, "step": 14284 }, { "epoch": 0.9206032093832571, "grad_norm": 0.00029521834576314433, "learning_rate": 8.893662728249196e-07, "loss": 0.0, "step": 14285 }, { "epoch": 0.9206676548301862, "grad_norm": 0.0026862544176196293, "learning_rate": 8.886501969208737e-07, "loss": 0.0, "step": 14286 }, { "epoch": 0.9207321002771154, "grad_norm": 0.029072616489818775, "learning_rate": 8.879341210168278e-07, "loss": 0.0015, "step": 14287 }, { "epoch": 0.9207965457240446, "grad_norm": 0.005206056469858584, "learning_rate": 8.872180451127821e-07, "loss": 0.0, "step": 14288 }, { "epoch": 0.9208609911709738, "grad_norm": 0.004492865880830228, "learning_rate": 8.865019692087362e-07, "loss": 0.0, "step": 14289 }, { "epoch": 0.9209254366179029, "grad_norm": 0.04546572249122386, "learning_rate": 8.857858933046904e-07, "loss": 0.0, "step": 14290 }, { "epoch": 0.9209898820648321, "grad_norm": 0.005094889517667441, "learning_rate": 8.850698174006446e-07, "loss": 0.0001, "step": 14291 }, { "epoch": 0.9210543275117613, "grad_norm": 0.6241890175700587, "learning_rate": 8.843537414965988e-07, "loss": 0.0136, "step": 14292 }, { "epoch": 0.9211187729586905, "grad_norm": 0.489125871665835, "learning_rate": 8.836376655925528e-07, "loss": 0.0015, "step": 14293 }, { "epoch": 0.9211832184056197, "grad_norm": 0.09846547066195413, "learning_rate": 8.829215896885071e-07, "loss": 0.0002, "step": 14294 }, { "epoch": 0.9212476638525489, "grad_norm": 0.0005087791308881729, "learning_rate": 8.822055137844612e-07, "loss": 0.0, "step": 14295 }, { "epoch": 0.921312109299478, "grad_norm": 0.0017957632932818469, "learning_rate": 8.814894378804153e-07, "loss": 0.0, "step": 14296 }, { "epoch": 0.9213765547464071, "grad_norm": 0.04921608342410101, "learning_rate": 8.807733619763696e-07, "loss": 0.0004, "step": 14297 }, { "epoch": 0.9214410001933363, "grad_norm": 0.0911446236348484, "learning_rate": 8.800572860723237e-07, "loss": 0.0016, "step": 14298 }, { "epoch": 0.9215054456402655, "grad_norm": 0.0002452915588548585, "learning_rate": 8.793412101682779e-07, "loss": 0.0, "step": 14299 }, { "epoch": 0.9215698910871947, "grad_norm": 0.0007380774980053137, "learning_rate": 8.786251342642321e-07, "loss": 0.0, "step": 14300 }, { "epoch": 0.9216343365341239, "grad_norm": 0.000311741694295826, "learning_rate": 8.779090583601863e-07, "loss": 0.0, "step": 14301 }, { "epoch": 0.921698781981053, "grad_norm": 0.0879874067442999, "learning_rate": 8.771929824561404e-07, "loss": 0.001, "step": 14302 }, { "epoch": 0.9217632274279822, "grad_norm": 0.005910103099318323, "learning_rate": 8.764769065520947e-07, "loss": 0.0, "step": 14303 }, { "epoch": 0.9218276728749114, "grad_norm": 0.00033256577001777654, "learning_rate": 8.757608306480488e-07, "loss": 0.0, "step": 14304 }, { "epoch": 0.9218921183218406, "grad_norm": 0.009101280903348382, "learning_rate": 8.750447547440028e-07, "loss": 0.0, "step": 14305 }, { "epoch": 0.9219565637687698, "grad_norm": 0.0008341859623354918, "learning_rate": 8.743286788399571e-07, "loss": 0.0, "step": 14306 }, { "epoch": 0.922021009215699, "grad_norm": 0.0005060169378157814, "learning_rate": 8.736126029359112e-07, "loss": 0.0, "step": 14307 }, { "epoch": 0.922085454662628, "grad_norm": 0.2939196223695395, "learning_rate": 8.728965270318654e-07, "loss": 0.001, "step": 14308 }, { "epoch": 0.9221499001095572, "grad_norm": 0.00019054155313982495, "learning_rate": 8.721804511278196e-07, "loss": 0.0, "step": 14309 }, { "epoch": 0.9222143455564864, "grad_norm": 0.011363912084689533, "learning_rate": 8.714643752237738e-07, "loss": 0.0001, "step": 14310 }, { "epoch": 0.9222787910034156, "grad_norm": 0.0007240625442157176, "learning_rate": 8.707482993197279e-07, "loss": 0.0, "step": 14311 }, { "epoch": 0.9223432364503448, "grad_norm": 0.22208887725504373, "learning_rate": 8.700322234156822e-07, "loss": 0.0003, "step": 14312 }, { "epoch": 0.922407681897274, "grad_norm": 0.013504279937124145, "learning_rate": 8.693161475116363e-07, "loss": 0.0, "step": 14313 }, { "epoch": 0.9224721273442031, "grad_norm": 0.0020501837505711128, "learning_rate": 8.686000716075904e-07, "loss": 0.0, "step": 14314 }, { "epoch": 0.9225365727911323, "grad_norm": 0.28737357044486633, "learning_rate": 8.678839957035447e-07, "loss": 0.0003, "step": 14315 }, { "epoch": 0.9226010182380615, "grad_norm": 0.0010953903883233731, "learning_rate": 8.671679197994988e-07, "loss": 0.0, "step": 14316 }, { "epoch": 0.9226654636849907, "grad_norm": 0.009895230589149193, "learning_rate": 8.664518438954529e-07, "loss": 0.0001, "step": 14317 }, { "epoch": 0.9227299091319199, "grad_norm": 0.04075174082050103, "learning_rate": 8.657357679914071e-07, "loss": 0.0, "step": 14318 }, { "epoch": 0.922794354578849, "grad_norm": 0.0002692212550240471, "learning_rate": 8.650196920873613e-07, "loss": 0.0, "step": 14319 }, { "epoch": 0.9228588000257781, "grad_norm": 0.00048252134601037153, "learning_rate": 8.643036161833155e-07, "loss": 0.0, "step": 14320 }, { "epoch": 0.9229232454727073, "grad_norm": 3.825583155027952e-05, "learning_rate": 8.635875402792697e-07, "loss": 0.0, "step": 14321 }, { "epoch": 0.9229876909196365, "grad_norm": 0.0018811696074475584, "learning_rate": 8.628714643752238e-07, "loss": 0.0, "step": 14322 }, { "epoch": 0.9230521363665657, "grad_norm": 0.07775464787976358, "learning_rate": 8.621553884711781e-07, "loss": 0.0051, "step": 14323 }, { "epoch": 0.9231165818134949, "grad_norm": 0.058226357454030764, "learning_rate": 8.614393125671322e-07, "loss": 0.0001, "step": 14324 }, { "epoch": 0.9231810272604241, "grad_norm": 0.012174789048439801, "learning_rate": 8.607232366630863e-07, "loss": 0.0, "step": 14325 }, { "epoch": 0.9232454727073532, "grad_norm": 0.0017584853190584743, "learning_rate": 8.600071607590406e-07, "loss": 0.0, "step": 14326 }, { "epoch": 0.9233099181542824, "grad_norm": 0.000461639247452941, "learning_rate": 8.592910848549947e-07, "loss": 0.0, "step": 14327 }, { "epoch": 0.9233743636012116, "grad_norm": 0.07013433453961926, "learning_rate": 8.585750089509489e-07, "loss": 0.0001, "step": 14328 }, { "epoch": 0.9234388090481408, "grad_norm": 0.00018168184062102873, "learning_rate": 8.57858933046903e-07, "loss": 0.0, "step": 14329 }, { "epoch": 0.9235032544950699, "grad_norm": 0.009711667844548244, "learning_rate": 8.571428571428572e-07, "loss": 0.0, "step": 14330 }, { "epoch": 0.923567699941999, "grad_norm": 0.0006406341603395895, "learning_rate": 8.564267812388113e-07, "loss": 0.0, "step": 14331 }, { "epoch": 0.9236321453889282, "grad_norm": 8.876367947043951e-05, "learning_rate": 8.557107053347656e-07, "loss": 0.0, "step": 14332 }, { "epoch": 0.9236965908358574, "grad_norm": 0.6359557512368327, "learning_rate": 8.549946294307197e-07, "loss": 0.0067, "step": 14333 }, { "epoch": 0.9237610362827866, "grad_norm": 0.00493237461485779, "learning_rate": 8.542785535266739e-07, "loss": 0.0, "step": 14334 }, { "epoch": 0.9238254817297158, "grad_norm": 0.0052610262877635415, "learning_rate": 8.535624776226281e-07, "loss": 0.0, "step": 14335 }, { "epoch": 0.923889927176645, "grad_norm": 0.00011238967086745099, "learning_rate": 8.528464017185823e-07, "loss": 0.0, "step": 14336 }, { "epoch": 0.9239543726235742, "grad_norm": 2.3785792728021552e-05, "learning_rate": 8.521303258145364e-07, "loss": 0.0, "step": 14337 }, { "epoch": 0.9240188180705033, "grad_norm": 0.000555459237647623, "learning_rate": 8.514142499104907e-07, "loss": 0.0, "step": 14338 }, { "epoch": 0.9240832635174325, "grad_norm": 0.010765654901809371, "learning_rate": 8.506981740064448e-07, "loss": 0.0, "step": 14339 }, { "epoch": 0.9241477089643617, "grad_norm": 0.004778807092981911, "learning_rate": 8.499820981023989e-07, "loss": 0.0, "step": 14340 }, { "epoch": 0.9242121544112908, "grad_norm": 0.005520193338135902, "learning_rate": 8.492660221983531e-07, "loss": 0.0, "step": 14341 }, { "epoch": 0.92427659985822, "grad_norm": 0.3407595779824848, "learning_rate": 8.485499462943072e-07, "loss": 0.0006, "step": 14342 }, { "epoch": 0.9243410453051492, "grad_norm": 4.109653675093666e-05, "learning_rate": 8.478338703902614e-07, "loss": 0.0, "step": 14343 }, { "epoch": 0.9244054907520783, "grad_norm": 0.05114493215860188, "learning_rate": 8.471177944862156e-07, "loss": 0.0001, "step": 14344 }, { "epoch": 0.9244699361990075, "grad_norm": 0.004551564714074149, "learning_rate": 8.464017185821698e-07, "loss": 0.0, "step": 14345 }, { "epoch": 0.9245343816459367, "grad_norm": 0.0021971180571567555, "learning_rate": 8.456856426781239e-07, "loss": 0.0, "step": 14346 }, { "epoch": 0.9245988270928659, "grad_norm": 0.0005614679583030613, "learning_rate": 8.449695667740782e-07, "loss": 0.0, "step": 14347 }, { "epoch": 0.9246632725397951, "grad_norm": 0.3467760408850226, "learning_rate": 8.442534908700323e-07, "loss": 0.0017, "step": 14348 }, { "epoch": 0.9247277179867243, "grad_norm": 0.08151521610210832, "learning_rate": 8.435374149659864e-07, "loss": 0.0008, "step": 14349 }, { "epoch": 0.9247921634336534, "grad_norm": 0.004019518333800433, "learning_rate": 8.428213390619407e-07, "loss": 0.0, "step": 14350 }, { "epoch": 0.9248566088805826, "grad_norm": 0.07447157949814238, "learning_rate": 8.421052631578948e-07, "loss": 0.0001, "step": 14351 }, { "epoch": 0.9249210543275118, "grad_norm": 0.014641115226128458, "learning_rate": 8.41389187253849e-07, "loss": 0.0, "step": 14352 }, { "epoch": 0.9249854997744409, "grad_norm": 0.3754926862321877, "learning_rate": 8.406731113498031e-07, "loss": 0.0031, "step": 14353 }, { "epoch": 0.9250499452213701, "grad_norm": 0.0006381091658297029, "learning_rate": 8.399570354457573e-07, "loss": 0.0, "step": 14354 }, { "epoch": 0.9251143906682993, "grad_norm": 0.0007651448783783858, "learning_rate": 8.392409595417115e-07, "loss": 0.0, "step": 14355 }, { "epoch": 0.9251788361152284, "grad_norm": 0.010261725668583554, "learning_rate": 8.385248836376657e-07, "loss": 0.0, "step": 14356 }, { "epoch": 0.9252432815621576, "grad_norm": 0.004215161724131631, "learning_rate": 8.378088077336198e-07, "loss": 0.0, "step": 14357 }, { "epoch": 0.9253077270090868, "grad_norm": 0.010846641072357954, "learning_rate": 8.370927318295741e-07, "loss": 0.0, "step": 14358 }, { "epoch": 0.925372172456016, "grad_norm": 0.0036661152239055466, "learning_rate": 8.363766559255282e-07, "loss": 0.0, "step": 14359 }, { "epoch": 0.9254366179029452, "grad_norm": 0.2835533081727429, "learning_rate": 8.356605800214823e-07, "loss": 0.0023, "step": 14360 }, { "epoch": 0.9255010633498744, "grad_norm": 0.0005097153361995352, "learning_rate": 8.349445041174366e-07, "loss": 0.0, "step": 14361 }, { "epoch": 0.9255655087968035, "grad_norm": 0.0035745185335236363, "learning_rate": 8.342284282133907e-07, "loss": 0.0, "step": 14362 }, { "epoch": 0.9256299542437327, "grad_norm": 0.0022912225414757144, "learning_rate": 8.335123523093449e-07, "loss": 0.0, "step": 14363 }, { "epoch": 0.9256943996906618, "grad_norm": 0.006479848061025811, "learning_rate": 8.327962764052991e-07, "loss": 0.0, "step": 14364 }, { "epoch": 0.925758845137591, "grad_norm": 0.04159580182992835, "learning_rate": 8.320802005012532e-07, "loss": 0.0001, "step": 14365 }, { "epoch": 0.9258232905845202, "grad_norm": 0.003863711422317911, "learning_rate": 8.313641245972073e-07, "loss": 0.0, "step": 14366 }, { "epoch": 0.9258877360314494, "grad_norm": 0.00010527621767379967, "learning_rate": 8.306480486931616e-07, "loss": 0.0, "step": 14367 }, { "epoch": 0.9259521814783785, "grad_norm": 0.0008107817368530498, "learning_rate": 8.299319727891157e-07, "loss": 0.0, "step": 14368 }, { "epoch": 0.9260166269253077, "grad_norm": 0.39974754607215024, "learning_rate": 8.292158968850698e-07, "loss": 0.0026, "step": 14369 }, { "epoch": 0.9260810723722369, "grad_norm": 0.0003916170677837437, "learning_rate": 8.284998209810241e-07, "loss": 0.0, "step": 14370 }, { "epoch": 0.9261455178191661, "grad_norm": 0.00032176926961688867, "learning_rate": 8.277837450769782e-07, "loss": 0.0, "step": 14371 }, { "epoch": 0.9262099632660953, "grad_norm": 0.004748618602019491, "learning_rate": 8.270676691729324e-07, "loss": 0.0, "step": 14372 }, { "epoch": 0.9262744087130245, "grad_norm": 0.0005727854899369785, "learning_rate": 8.263515932688866e-07, "loss": 0.0, "step": 14373 }, { "epoch": 0.9263388541599537, "grad_norm": 9.207616447138975e-05, "learning_rate": 8.256355173648408e-07, "loss": 0.0, "step": 14374 }, { "epoch": 0.9264032996068827, "grad_norm": 0.00040550139835061086, "learning_rate": 8.249194414607949e-07, "loss": 0.0, "step": 14375 }, { "epoch": 0.9264677450538119, "grad_norm": 0.0011182196861263627, "learning_rate": 8.242033655567492e-07, "loss": 0.0, "step": 14376 }, { "epoch": 0.9265321905007411, "grad_norm": 0.002358272684080987, "learning_rate": 8.234872896527032e-07, "loss": 0.0, "step": 14377 }, { "epoch": 0.9265966359476703, "grad_norm": 0.12104582607336277, "learning_rate": 8.227712137486573e-07, "loss": 0.0001, "step": 14378 }, { "epoch": 0.9266610813945995, "grad_norm": 0.007726797377316758, "learning_rate": 8.220551378446116e-07, "loss": 0.0, "step": 14379 }, { "epoch": 0.9267255268415286, "grad_norm": 0.008248464069637097, "learning_rate": 8.213390619405657e-07, "loss": 0.0, "step": 14380 }, { "epoch": 0.9267899722884578, "grad_norm": 0.0001702704616484478, "learning_rate": 8.206229860365199e-07, "loss": 0.0, "step": 14381 }, { "epoch": 0.926854417735387, "grad_norm": 0.000260795053944009, "learning_rate": 8.199069101324741e-07, "loss": 0.0, "step": 14382 }, { "epoch": 0.9269188631823162, "grad_norm": 0.0007138246285888987, "learning_rate": 8.191908342284283e-07, "loss": 0.0, "step": 14383 }, { "epoch": 0.9269833086292454, "grad_norm": 0.00015103320664588784, "learning_rate": 8.184747583243824e-07, "loss": 0.0, "step": 14384 }, { "epoch": 0.9270477540761746, "grad_norm": 0.6980205264079195, "learning_rate": 8.177586824203367e-07, "loss": 0.0055, "step": 14385 }, { "epoch": 0.9271121995231036, "grad_norm": 0.06006724016241198, "learning_rate": 8.170426065162908e-07, "loss": 0.0001, "step": 14386 }, { "epoch": 0.9271766449700328, "grad_norm": 0.1601703742242303, "learning_rate": 8.163265306122449e-07, "loss": 0.0037, "step": 14387 }, { "epoch": 0.927241090416962, "grad_norm": 0.0006902069417974614, "learning_rate": 8.156104547081992e-07, "loss": 0.0, "step": 14388 }, { "epoch": 0.9273055358638912, "grad_norm": 0.001034304148302098, "learning_rate": 8.148943788041532e-07, "loss": 0.0, "step": 14389 }, { "epoch": 0.9273699813108204, "grad_norm": 0.0006792645530657716, "learning_rate": 8.141783029001074e-07, "loss": 0.0, "step": 14390 }, { "epoch": 0.9274344267577496, "grad_norm": 0.006263223817885295, "learning_rate": 8.134622269960616e-07, "loss": 0.0, "step": 14391 }, { "epoch": 0.9274988722046787, "grad_norm": 0.0018722316210985034, "learning_rate": 8.127461510920158e-07, "loss": 0.0, "step": 14392 }, { "epoch": 0.9275633176516079, "grad_norm": 0.000974320736434331, "learning_rate": 8.1203007518797e-07, "loss": 0.0, "step": 14393 }, { "epoch": 0.9276277630985371, "grad_norm": 0.0003239978115912819, "learning_rate": 8.113139992839242e-07, "loss": 0.0, "step": 14394 }, { "epoch": 0.9276922085454663, "grad_norm": 0.0013150495822469358, "learning_rate": 8.105979233798783e-07, "loss": 0.0, "step": 14395 }, { "epoch": 0.9277566539923955, "grad_norm": 0.015568682946537292, "learning_rate": 8.098818474758326e-07, "loss": 0.0, "step": 14396 }, { "epoch": 0.9278210994393246, "grad_norm": 0.059350822962951, "learning_rate": 8.091657715717867e-07, "loss": 0.0, "step": 14397 }, { "epoch": 0.9278855448862537, "grad_norm": 0.019161179937591782, "learning_rate": 8.084496956677408e-07, "loss": 0.0, "step": 14398 }, { "epoch": 0.9279499903331829, "grad_norm": 0.0008154856476937402, "learning_rate": 8.077336197636951e-07, "loss": 0.0, "step": 14399 }, { "epoch": 0.9280144357801121, "grad_norm": 0.0017092046185970635, "learning_rate": 8.070175438596491e-07, "loss": 0.0, "step": 14400 }, { "epoch": 0.9280788812270413, "grad_norm": 0.0223845070364754, "learning_rate": 8.063014679556033e-07, "loss": 0.0001, "step": 14401 }, { "epoch": 0.9281433266739705, "grad_norm": 0.08140399362746532, "learning_rate": 8.055853920515575e-07, "loss": 0.0007, "step": 14402 }, { "epoch": 0.9282077721208997, "grad_norm": 0.007940254101373572, "learning_rate": 8.048693161475117e-07, "loss": 0.0, "step": 14403 }, { "epoch": 0.9282722175678289, "grad_norm": 0.03842277464554137, "learning_rate": 8.041532402434658e-07, "loss": 0.0, "step": 14404 }, { "epoch": 0.928336663014758, "grad_norm": 0.0015349968048687594, "learning_rate": 8.034371643394201e-07, "loss": 0.0, "step": 14405 }, { "epoch": 0.9284011084616872, "grad_norm": 0.7215145680998309, "learning_rate": 8.027210884353742e-07, "loss": 0.0047, "step": 14406 }, { "epoch": 0.9284655539086164, "grad_norm": 0.14605131986601827, "learning_rate": 8.020050125313284e-07, "loss": 0.0, "step": 14407 }, { "epoch": 0.9285299993555455, "grad_norm": 0.045908688354199964, "learning_rate": 8.012889366272826e-07, "loss": 0.0001, "step": 14408 }, { "epoch": 0.9285944448024747, "grad_norm": 0.002024083815106119, "learning_rate": 8.005728607232368e-07, "loss": 0.0, "step": 14409 }, { "epoch": 0.9286588902494038, "grad_norm": 0.002112488711979832, "learning_rate": 7.998567848191909e-07, "loss": 0.0, "step": 14410 }, { "epoch": 0.928723335696333, "grad_norm": 0.008628272653972625, "learning_rate": 7.991407089151452e-07, "loss": 0.0, "step": 14411 }, { "epoch": 0.9287877811432622, "grad_norm": 0.09060448977613139, "learning_rate": 7.984246330110993e-07, "loss": 0.0002, "step": 14412 }, { "epoch": 0.9288522265901914, "grad_norm": 0.0005132101275217304, "learning_rate": 7.977085571070533e-07, "loss": 0.0, "step": 14413 }, { "epoch": 0.9289166720371206, "grad_norm": 0.1643300826197386, "learning_rate": 7.969924812030076e-07, "loss": 0.0003, "step": 14414 }, { "epoch": 0.9289811174840498, "grad_norm": 0.0016093713965732953, "learning_rate": 7.962764052989617e-07, "loss": 0.0015, "step": 14415 }, { "epoch": 0.929045562930979, "grad_norm": 0.21922135659460637, "learning_rate": 7.955603293949159e-07, "loss": 0.0008, "step": 14416 }, { "epoch": 0.9291100083779081, "grad_norm": 0.0008585809892495993, "learning_rate": 7.948442534908701e-07, "loss": 0.0, "step": 14417 }, { "epoch": 0.9291744538248373, "grad_norm": 0.009192170655711953, "learning_rate": 7.941281775868243e-07, "loss": 0.0, "step": 14418 }, { "epoch": 0.9292388992717664, "grad_norm": 0.25046316098969706, "learning_rate": 7.934121016827784e-07, "loss": 0.0013, "step": 14419 }, { "epoch": 0.9293033447186956, "grad_norm": 0.0051350095650668295, "learning_rate": 7.926960257787327e-07, "loss": 0.0, "step": 14420 }, { "epoch": 0.9293677901656248, "grad_norm": 0.0026750066483972193, "learning_rate": 7.919799498746868e-07, "loss": 0.0, "step": 14421 }, { "epoch": 0.929432235612554, "grad_norm": 0.004276538716577033, "learning_rate": 7.912638739706409e-07, "loss": 0.0, "step": 14422 }, { "epoch": 0.9294966810594831, "grad_norm": 0.3635859288520741, "learning_rate": 7.905477980665952e-07, "loss": 0.0028, "step": 14423 }, { "epoch": 0.9295611265064123, "grad_norm": 0.01937078856667039, "learning_rate": 7.898317221625492e-07, "loss": 0.0, "step": 14424 }, { "epoch": 0.9296255719533415, "grad_norm": 0.001685193181962873, "learning_rate": 7.891156462585034e-07, "loss": 0.0, "step": 14425 }, { "epoch": 0.9296900174002707, "grad_norm": 0.0005174008097750863, "learning_rate": 7.883995703544576e-07, "loss": 0.0, "step": 14426 }, { "epoch": 0.9297544628471999, "grad_norm": 3.9323004013979845e-05, "learning_rate": 7.876834944504118e-07, "loss": 0.0, "step": 14427 }, { "epoch": 0.929818908294129, "grad_norm": 0.23817010538682193, "learning_rate": 7.86967418546366e-07, "loss": 0.0003, "step": 14428 }, { "epoch": 0.9298833537410582, "grad_norm": 0.014149205828395605, "learning_rate": 7.862513426423202e-07, "loss": 0.0, "step": 14429 }, { "epoch": 0.9299477991879874, "grad_norm": 0.0005149379627849445, "learning_rate": 7.855352667382743e-07, "loss": 0.0, "step": 14430 }, { "epoch": 0.9300122446349165, "grad_norm": 0.31164079805275147, "learning_rate": 7.848191908342286e-07, "loss": 0.001, "step": 14431 }, { "epoch": 0.9300766900818457, "grad_norm": 0.027360878841657936, "learning_rate": 7.841031149301827e-07, "loss": 0.0, "step": 14432 }, { "epoch": 0.9301411355287749, "grad_norm": 0.0019418651000683484, "learning_rate": 7.833870390261368e-07, "loss": 0.0, "step": 14433 }, { "epoch": 0.930205580975704, "grad_norm": 0.00013151979960604563, "learning_rate": 7.826709631220911e-07, "loss": 0.0, "step": 14434 }, { "epoch": 0.9302700264226332, "grad_norm": 0.0005116016903126348, "learning_rate": 7.819548872180452e-07, "loss": 0.0, "step": 14435 }, { "epoch": 0.9303344718695624, "grad_norm": 0.003360318785380491, "learning_rate": 7.812388113139993e-07, "loss": 0.0, "step": 14436 }, { "epoch": 0.9303989173164916, "grad_norm": 0.005443713674886183, "learning_rate": 7.805227354099535e-07, "loss": 0.0, "step": 14437 }, { "epoch": 0.9304633627634208, "grad_norm": 0.006804981826999122, "learning_rate": 7.798066595059077e-07, "loss": 0.0, "step": 14438 }, { "epoch": 0.93052780821035, "grad_norm": 0.004543146095431098, "learning_rate": 7.790905836018618e-07, "loss": 0.0, "step": 14439 }, { "epoch": 0.9305922536572792, "grad_norm": 0.004907974032332147, "learning_rate": 7.783745076978161e-07, "loss": 0.0, "step": 14440 }, { "epoch": 0.9306566991042083, "grad_norm": 0.007826499670387036, "learning_rate": 7.776584317937702e-07, "loss": 0.0, "step": 14441 }, { "epoch": 0.9307211445511374, "grad_norm": 0.0004948724082326526, "learning_rate": 7.769423558897243e-07, "loss": 0.0, "step": 14442 }, { "epoch": 0.9307855899980666, "grad_norm": 0.008689859857442889, "learning_rate": 7.762262799856786e-07, "loss": 0.0, "step": 14443 }, { "epoch": 0.9308500354449958, "grad_norm": 0.003751375962738023, "learning_rate": 7.755102040816327e-07, "loss": 0.0, "step": 14444 }, { "epoch": 0.930914480891925, "grad_norm": 0.00013363970816859745, "learning_rate": 7.747941281775869e-07, "loss": 0.0, "step": 14445 }, { "epoch": 0.9309789263388542, "grad_norm": 0.0018097887452774059, "learning_rate": 7.740780522735411e-07, "loss": 0.0, "step": 14446 }, { "epoch": 0.9310433717857833, "grad_norm": 0.0018097887452774059, "learning_rate": 7.740780522735411e-07, "loss": 0.0306, "step": 14447 }, { "epoch": 0.9311078172327125, "grad_norm": 0.743557799372613, "learning_rate": 7.733619763694953e-07, "loss": 0.0037, "step": 14448 }, { "epoch": 0.9311722626796417, "grad_norm": 0.003228825487809219, "learning_rate": 7.726459004654493e-07, "loss": 0.0, "step": 14449 }, { "epoch": 0.9312367081265709, "grad_norm": 0.0332350349660026, "learning_rate": 7.719298245614036e-07, "loss": 0.0001, "step": 14450 }, { "epoch": 0.9313011535735001, "grad_norm": 0.004896196076213735, "learning_rate": 7.712137486573577e-07, "loss": 0.0, "step": 14451 }, { "epoch": 0.9313655990204293, "grad_norm": 6.021259437749865e-05, "learning_rate": 7.704976727533118e-07, "loss": 0.0, "step": 14452 }, { "epoch": 0.9314300444673583, "grad_norm": 0.003628188229278082, "learning_rate": 7.697815968492661e-07, "loss": 0.0, "step": 14453 }, { "epoch": 0.9314944899142875, "grad_norm": 0.0017509759768460378, "learning_rate": 7.690655209452202e-07, "loss": 0.0, "step": 14454 }, { "epoch": 0.9315589353612167, "grad_norm": 0.6015600476896165, "learning_rate": 7.683494450411744e-07, "loss": 0.0004, "step": 14455 }, { "epoch": 0.9316233808081459, "grad_norm": 0.0020188174805083393, "learning_rate": 7.676333691371286e-07, "loss": 0.0, "step": 14456 }, { "epoch": 0.9316878262550751, "grad_norm": 0.014562413827033343, "learning_rate": 7.669172932330828e-07, "loss": 0.0, "step": 14457 }, { "epoch": 0.9317522717020043, "grad_norm": 0.0032687377442210427, "learning_rate": 7.662012173290369e-07, "loss": 0.0, "step": 14458 }, { "epoch": 0.9318167171489334, "grad_norm": 3.5950292092738695e-05, "learning_rate": 7.654851414249912e-07, "loss": 0.0, "step": 14459 }, { "epoch": 0.9318811625958626, "grad_norm": 0.007673081489013223, "learning_rate": 7.647690655209453e-07, "loss": 0.0, "step": 14460 }, { "epoch": 0.9319456080427918, "grad_norm": 0.2343050216474507, "learning_rate": 7.640529896168993e-07, "loss": 0.0024, "step": 14461 }, { "epoch": 0.932010053489721, "grad_norm": 0.0435925417565156, "learning_rate": 7.633369137128536e-07, "loss": 0.0005, "step": 14462 }, { "epoch": 0.9320744989366502, "grad_norm": 0.20216371414031317, "learning_rate": 7.626208378088077e-07, "loss": 0.0003, "step": 14463 }, { "epoch": 0.9321389443835792, "grad_norm": 0.0002489122222914264, "learning_rate": 7.61904761904762e-07, "loss": 0.0, "step": 14464 }, { "epoch": 0.9322033898305084, "grad_norm": 0.00018979941206031793, "learning_rate": 7.611886860007161e-07, "loss": 0.0, "step": 14465 }, { "epoch": 0.9322678352774376, "grad_norm": 0.0036174353999676827, "learning_rate": 7.604726100966703e-07, "loss": 0.0, "step": 14466 }, { "epoch": 0.9323322807243668, "grad_norm": 0.00010448488517973264, "learning_rate": 7.597565341926245e-07, "loss": 0.0, "step": 14467 }, { "epoch": 0.932396726171296, "grad_norm": 0.06316045030658364, "learning_rate": 7.590404582885787e-07, "loss": 0.0004, "step": 14468 }, { "epoch": 0.9324611716182252, "grad_norm": 0.5535720570292619, "learning_rate": 7.583243823845328e-07, "loss": 0.0008, "step": 14469 }, { "epoch": 0.9325256170651544, "grad_norm": 0.009089972948693839, "learning_rate": 7.576083064804871e-07, "loss": 0.0, "step": 14470 }, { "epoch": 0.9325900625120835, "grad_norm": 0.009528753366977356, "learning_rate": 7.568922305764412e-07, "loss": 0.0, "step": 14471 }, { "epoch": 0.9326545079590127, "grad_norm": 0.08273048072921844, "learning_rate": 7.561761546723953e-07, "loss": 0.0001, "step": 14472 }, { "epoch": 0.9327189534059419, "grad_norm": 0.0024652483846048165, "learning_rate": 7.554600787683496e-07, "loss": 0.0, "step": 14473 }, { "epoch": 0.9327833988528711, "grad_norm": 0.04121828109839269, "learning_rate": 7.547440028643036e-07, "loss": 0.0, "step": 14474 }, { "epoch": 0.9328478442998002, "grad_norm": 0.14005019012037867, "learning_rate": 7.540279269602578e-07, "loss": 0.0002, "step": 14475 }, { "epoch": 0.9329122897467294, "grad_norm": 0.020674897633846747, "learning_rate": 7.53311851056212e-07, "loss": 0.0, "step": 14476 }, { "epoch": 0.9329767351936585, "grad_norm": 0.39202941200795505, "learning_rate": 7.525957751521662e-07, "loss": 0.0017, "step": 14477 }, { "epoch": 0.9330411806405877, "grad_norm": 0.0019212015864760914, "learning_rate": 7.518796992481203e-07, "loss": 0.0, "step": 14478 }, { "epoch": 0.9331056260875169, "grad_norm": 0.058421093156673494, "learning_rate": 7.511636233440746e-07, "loss": 0.0001, "step": 14479 }, { "epoch": 0.9331700715344461, "grad_norm": 2.9589337738041364, "learning_rate": 7.504475474400287e-07, "loss": 0.019, "step": 14480 }, { "epoch": 0.9332345169813753, "grad_norm": 0.0026238409074990647, "learning_rate": 7.497314715359829e-07, "loss": 0.0, "step": 14481 }, { "epoch": 0.9332989624283045, "grad_norm": 0.00039909083865957056, "learning_rate": 7.490153956319371e-07, "loss": 0.0, "step": 14482 }, { "epoch": 0.9333634078752336, "grad_norm": 0.061526487668434125, "learning_rate": 7.482993197278913e-07, "loss": 0.0002, "step": 14483 }, { "epoch": 0.9334278533221628, "grad_norm": 0.0009454930574369729, "learning_rate": 7.475832438238454e-07, "loss": 0.0, "step": 14484 }, { "epoch": 0.933492298769092, "grad_norm": 0.008536048096211434, "learning_rate": 7.468671679197997e-07, "loss": 0.0001, "step": 14485 }, { "epoch": 0.9335567442160211, "grad_norm": 0.01352383029196929, "learning_rate": 7.461510920157537e-07, "loss": 0.0, "step": 14486 }, { "epoch": 0.9336211896629503, "grad_norm": 5.9316654650585815e-05, "learning_rate": 7.454350161117078e-07, "loss": 0.0, "step": 14487 }, { "epoch": 0.9336856351098795, "grad_norm": 0.00803302243530583, "learning_rate": 7.447189402076621e-07, "loss": 0.0, "step": 14488 }, { "epoch": 0.9337500805568086, "grad_norm": 0.0003945896452044587, "learning_rate": 7.440028643036162e-07, "loss": 0.0, "step": 14489 }, { "epoch": 0.9338145260037378, "grad_norm": 0.030083509488064174, "learning_rate": 7.432867883995704e-07, "loss": 0.0, "step": 14490 }, { "epoch": 0.933878971450667, "grad_norm": 0.00690686336390923, "learning_rate": 7.425707124955246e-07, "loss": 0.0, "step": 14491 }, { "epoch": 0.9339434168975962, "grad_norm": 0.005939841642474873, "learning_rate": 7.418546365914788e-07, "loss": 0.0, "step": 14492 }, { "epoch": 0.9340078623445254, "grad_norm": 0.007773213962894371, "learning_rate": 7.411385606874329e-07, "loss": 0.0, "step": 14493 }, { "epoch": 0.9340723077914546, "grad_norm": 0.015595200460516845, "learning_rate": 7.404224847833872e-07, "loss": 0.0, "step": 14494 }, { "epoch": 0.9341367532383837, "grad_norm": 0.0009196707970691353, "learning_rate": 7.397064088793413e-07, "loss": 0.0, "step": 14495 }, { "epoch": 0.9342011986853129, "grad_norm": 0.25542377927708176, "learning_rate": 7.389903329752953e-07, "loss": 0.0011, "step": 14496 }, { "epoch": 0.9342656441322421, "grad_norm": 0.006353587293436875, "learning_rate": 7.382742570712496e-07, "loss": 0.0, "step": 14497 }, { "epoch": 0.9343300895791712, "grad_norm": 0.13289643888858305, "learning_rate": 7.375581811672037e-07, "loss": 0.0002, "step": 14498 }, { "epoch": 0.9343945350261004, "grad_norm": 0.002705438309291519, "learning_rate": 7.368421052631579e-07, "loss": 0.0, "step": 14499 }, { "epoch": 0.9344589804730296, "grad_norm": 0.009522138093246023, "learning_rate": 7.361260293591121e-07, "loss": 0.0, "step": 14500 }, { "epoch": 0.9345234259199587, "grad_norm": 9.089710282497509e-05, "learning_rate": 7.354099534550663e-07, "loss": 0.0, "step": 14501 }, { "epoch": 0.9345878713668879, "grad_norm": 0.0021077175463139405, "learning_rate": 7.346938775510205e-07, "loss": 0.0, "step": 14502 }, { "epoch": 0.9346523168138171, "grad_norm": 0.0025995208788616095, "learning_rate": 7.339778016469747e-07, "loss": 0.0, "step": 14503 }, { "epoch": 0.9347167622607463, "grad_norm": 0.004934568402119453, "learning_rate": 7.332617257429288e-07, "loss": 0.0001, "step": 14504 }, { "epoch": 0.9347812077076755, "grad_norm": 0.0359308569018603, "learning_rate": 7.325456498388831e-07, "loss": 0.0001, "step": 14505 }, { "epoch": 0.9348456531546047, "grad_norm": 0.00332544182917945, "learning_rate": 7.318295739348372e-07, "loss": 0.0, "step": 14506 }, { "epoch": 0.9349100986015338, "grad_norm": 0.02133744018917271, "learning_rate": 7.311134980307913e-07, "loss": 0.0002, "step": 14507 }, { "epoch": 0.934974544048463, "grad_norm": 0.0002502341843614646, "learning_rate": 7.303974221267456e-07, "loss": 0.0, "step": 14508 }, { "epoch": 0.9350389894953921, "grad_norm": 0.14950297708202429, "learning_rate": 7.296813462226996e-07, "loss": 0.0018, "step": 14509 }, { "epoch": 0.9351034349423213, "grad_norm": 0.009830572551912385, "learning_rate": 7.289652703186538e-07, "loss": 0.0, "step": 14510 }, { "epoch": 0.9351678803892505, "grad_norm": 0.006458090662713216, "learning_rate": 7.28249194414608e-07, "loss": 0.0, "step": 14511 }, { "epoch": 0.9352323258361797, "grad_norm": 0.03246659776362317, "learning_rate": 7.275331185105622e-07, "loss": 0.0, "step": 14512 }, { "epoch": 0.9352967712831088, "grad_norm": 0.0006368826106888853, "learning_rate": 7.268170426065163e-07, "loss": 0.0, "step": 14513 }, { "epoch": 0.935361216730038, "grad_norm": 0.19962360346299066, "learning_rate": 7.261009667024706e-07, "loss": 0.0023, "step": 14514 }, { "epoch": 0.9354256621769672, "grad_norm": 0.0033006617114257246, "learning_rate": 7.253848907984247e-07, "loss": 0.0, "step": 14515 }, { "epoch": 0.9354901076238964, "grad_norm": 1.7566595243507924e-05, "learning_rate": 7.246688148943788e-07, "loss": 0.0, "step": 14516 }, { "epoch": 0.9355545530708256, "grad_norm": 0.00010102185496019302, "learning_rate": 7.239527389903331e-07, "loss": 0.0, "step": 14517 }, { "epoch": 0.9356189985177548, "grad_norm": 0.012268373618383493, "learning_rate": 7.232366630862872e-07, "loss": 0.0, "step": 14518 }, { "epoch": 0.935683443964684, "grad_norm": 0.005793788340712795, "learning_rate": 7.225205871822414e-07, "loss": 0.0, "step": 14519 }, { "epoch": 0.935747889411613, "grad_norm": 0.0018801939476720873, "learning_rate": 7.218045112781956e-07, "loss": 0.0, "step": 14520 }, { "epoch": 0.9358123348585422, "grad_norm": 0.0018678091905745316, "learning_rate": 7.210884353741497e-07, "loss": 0.0, "step": 14521 }, { "epoch": 0.9358767803054714, "grad_norm": 0.03396311333269752, "learning_rate": 7.203723594701038e-07, "loss": 0.0, "step": 14522 }, { "epoch": 0.9359412257524006, "grad_norm": 0.003039178851842719, "learning_rate": 7.196562835660581e-07, "loss": 0.0, "step": 14523 }, { "epoch": 0.9360056711993298, "grad_norm": 0.0005585103175504581, "learning_rate": 7.189402076620122e-07, "loss": 0.0, "step": 14524 }, { "epoch": 0.9360701166462589, "grad_norm": 0.001817373108679384, "learning_rate": 7.182241317579663e-07, "loss": 0.0, "step": 14525 }, { "epoch": 0.9361345620931881, "grad_norm": 0.0021193474744381205, "learning_rate": 7.175080558539206e-07, "loss": 0.0, "step": 14526 }, { "epoch": 0.9361990075401173, "grad_norm": 0.002530882383435495, "learning_rate": 7.167919799498747e-07, "loss": 0.0, "step": 14527 }, { "epoch": 0.9362634529870465, "grad_norm": 0.000818128099790117, "learning_rate": 7.160759040458289e-07, "loss": 0.0, "step": 14528 }, { "epoch": 0.9363278984339757, "grad_norm": 0.009652635970240627, "learning_rate": 7.153598281417831e-07, "loss": 0.0, "step": 14529 }, { "epoch": 0.9363923438809049, "grad_norm": 3.3612020853590016e-05, "learning_rate": 7.146437522377373e-07, "loss": 0.0, "step": 14530 }, { "epoch": 0.9364567893278339, "grad_norm": 0.0013493935115245926, "learning_rate": 7.139276763336914e-07, "loss": 0.0, "step": 14531 }, { "epoch": 0.9365212347747631, "grad_norm": 0.0024142204573161376, "learning_rate": 7.132116004296457e-07, "loss": 0.0, "step": 14532 }, { "epoch": 0.9365856802216923, "grad_norm": 0.2993254916879797, "learning_rate": 7.124955245255997e-07, "loss": 0.001, "step": 14533 }, { "epoch": 0.9366501256686215, "grad_norm": 0.009092237905382923, "learning_rate": 7.117794486215538e-07, "loss": 0.0, "step": 14534 }, { "epoch": 0.9367145711155507, "grad_norm": 0.006500430507406041, "learning_rate": 7.110633727175081e-07, "loss": 0.0, "step": 14535 }, { "epoch": 0.9367790165624799, "grad_norm": 0.0011474112037967268, "learning_rate": 7.103472968134622e-07, "loss": 0.0, "step": 14536 }, { "epoch": 0.936843462009409, "grad_norm": 0.0018082425170928157, "learning_rate": 7.096312209094165e-07, "loss": 0.0, "step": 14537 }, { "epoch": 0.9369079074563382, "grad_norm": 0.04529145066339704, "learning_rate": 7.089151450053706e-07, "loss": 0.0001, "step": 14538 }, { "epoch": 0.9369723529032674, "grad_norm": 0.00024786370618296017, "learning_rate": 7.081990691013248e-07, "loss": 0.0, "step": 14539 }, { "epoch": 0.9370367983501966, "grad_norm": 0.0014020936697642379, "learning_rate": 7.07482993197279e-07, "loss": 0.0, "step": 14540 }, { "epoch": 0.9371012437971258, "grad_norm": 0.0005209691458706825, "learning_rate": 7.067669172932332e-07, "loss": 0.0, "step": 14541 }, { "epoch": 0.9371656892440549, "grad_norm": 0.0019354748003275726, "learning_rate": 7.060508413891873e-07, "loss": 0.0, "step": 14542 }, { "epoch": 0.937230134690984, "grad_norm": 0.0009135498107519807, "learning_rate": 7.053347654851416e-07, "loss": 0.0, "step": 14543 }, { "epoch": 0.9372945801379132, "grad_norm": 0.005087708128836168, "learning_rate": 7.046186895810957e-07, "loss": 0.0, "step": 14544 }, { "epoch": 0.9373590255848424, "grad_norm": 0.002904169890841453, "learning_rate": 7.039026136770497e-07, "loss": 0.0, "step": 14545 }, { "epoch": 0.9374234710317716, "grad_norm": 0.0005245864079457482, "learning_rate": 7.03186537773004e-07, "loss": 0.0, "step": 14546 }, { "epoch": 0.9374879164787008, "grad_norm": 0.00027773400644070097, "learning_rate": 7.024704618689581e-07, "loss": 0.0, "step": 14547 }, { "epoch": 0.93755236192563, "grad_norm": 0.01755280790804899, "learning_rate": 7.017543859649123e-07, "loss": 0.0, "step": 14548 }, { "epoch": 0.9376168073725591, "grad_norm": 0.00025611269007827297, "learning_rate": 7.010383100608665e-07, "loss": 0.0, "step": 14549 }, { "epoch": 0.9376812528194883, "grad_norm": 0.6315640272738314, "learning_rate": 7.003222341568207e-07, "loss": 0.0037, "step": 14550 }, { "epoch": 0.9377456982664175, "grad_norm": 0.021922899573825504, "learning_rate": 6.996061582527748e-07, "loss": 0.0002, "step": 14551 }, { "epoch": 0.9378101437133467, "grad_norm": 0.09865185582099384, "learning_rate": 6.988900823487291e-07, "loss": 0.0001, "step": 14552 }, { "epoch": 0.9378745891602758, "grad_norm": 0.012986664406985956, "learning_rate": 6.981740064446832e-07, "loss": 0.0, "step": 14553 }, { "epoch": 0.937939034607205, "grad_norm": 0.011523680815714164, "learning_rate": 6.974579305406374e-07, "loss": 0.0, "step": 14554 }, { "epoch": 0.9380034800541341, "grad_norm": 0.0031393640358416347, "learning_rate": 6.967418546365916e-07, "loss": 0.0, "step": 14555 }, { "epoch": 0.9380679255010633, "grad_norm": 0.019008252551270352, "learning_rate": 6.960257787325458e-07, "loss": 0.0001, "step": 14556 }, { "epoch": 0.9381323709479925, "grad_norm": 0.0023275455744178716, "learning_rate": 6.953097028284998e-07, "loss": 0.0, "step": 14557 }, { "epoch": 0.9381968163949217, "grad_norm": 0.005539228100833343, "learning_rate": 6.945936269244541e-07, "loss": 0.0, "step": 14558 }, { "epoch": 0.9382612618418509, "grad_norm": 0.03702531724548152, "learning_rate": 6.938775510204082e-07, "loss": 0.0001, "step": 14559 }, { "epoch": 0.9383257072887801, "grad_norm": 0.0015204518635009605, "learning_rate": 6.931614751163623e-07, "loss": 0.0, "step": 14560 }, { "epoch": 0.9383901527357092, "grad_norm": 0.02489419878730623, "learning_rate": 6.924453992123166e-07, "loss": 0.0001, "step": 14561 }, { "epoch": 0.9384545981826384, "grad_norm": 0.001340772269739388, "learning_rate": 6.917293233082707e-07, "loss": 0.0, "step": 14562 }, { "epoch": 0.9385190436295676, "grad_norm": 0.00010016378777309323, "learning_rate": 6.910132474042249e-07, "loss": 0.0, "step": 14563 }, { "epoch": 0.9385834890764967, "grad_norm": 0.0013275941617353507, "learning_rate": 6.902971715001791e-07, "loss": 0.0, "step": 14564 }, { "epoch": 0.9386479345234259, "grad_norm": 0.22654219240400406, "learning_rate": 6.895810955961333e-07, "loss": 0.001, "step": 14565 }, { "epoch": 0.9387123799703551, "grad_norm": 0.0022122313042215727, "learning_rate": 6.888650196920874e-07, "loss": 0.0, "step": 14566 }, { "epoch": 0.9387768254172842, "grad_norm": 0.004470017713398727, "learning_rate": 6.881489437880417e-07, "loss": 0.0, "step": 14567 }, { "epoch": 0.9388412708642134, "grad_norm": 0.0007068921499948373, "learning_rate": 6.874328678839958e-07, "loss": 0.0, "step": 14568 }, { "epoch": 0.9389057163111426, "grad_norm": 0.12361918294424723, "learning_rate": 6.867167919799498e-07, "loss": 0.0, "step": 14569 }, { "epoch": 0.9389701617580718, "grad_norm": 0.0011432207910996807, "learning_rate": 6.860007160759041e-07, "loss": 0.0, "step": 14570 }, { "epoch": 0.939034607205001, "grad_norm": 9.127357167783924e-05, "learning_rate": 6.852846401718582e-07, "loss": 0.0, "step": 14571 }, { "epoch": 0.9390990526519302, "grad_norm": 0.02928841687336939, "learning_rate": 6.845685642678125e-07, "loss": 0.0001, "step": 14572 }, { "epoch": 0.9391634980988594, "grad_norm": 0.002151597050746372, "learning_rate": 6.838524883637666e-07, "loss": 0.0, "step": 14573 }, { "epoch": 0.9392279435457885, "grad_norm": 0.0027989000811481177, "learning_rate": 6.831364124597208e-07, "loss": 0.0, "step": 14574 }, { "epoch": 0.9392923889927177, "grad_norm": 0.000564584986700068, "learning_rate": 6.82420336555675e-07, "loss": 0.0, "step": 14575 }, { "epoch": 0.9393568344396468, "grad_norm": 0.001180073223414386, "learning_rate": 6.817042606516292e-07, "loss": 0.0, "step": 14576 }, { "epoch": 0.939421279886576, "grad_norm": 0.005913909066126716, "learning_rate": 6.809881847475833e-07, "loss": 0.0, "step": 14577 }, { "epoch": 0.9394857253335052, "grad_norm": 0.0006904970453639949, "learning_rate": 6.802721088435376e-07, "loss": 0.0, "step": 14578 }, { "epoch": 0.9395501707804343, "grad_norm": 0.0004785256442391764, "learning_rate": 6.795560329394917e-07, "loss": 0.0, "step": 14579 }, { "epoch": 0.9396146162273635, "grad_norm": 0.0065965709683497, "learning_rate": 6.788399570354457e-07, "loss": 0.0, "step": 14580 }, { "epoch": 0.9396790616742927, "grad_norm": 0.03483474602814386, "learning_rate": 6.781238811314e-07, "loss": 0.0001, "step": 14581 }, { "epoch": 0.9397435071212219, "grad_norm": 0.0013426242906063755, "learning_rate": 6.774078052273541e-07, "loss": 0.0, "step": 14582 }, { "epoch": 0.9398079525681511, "grad_norm": 0.11016212508584611, "learning_rate": 6.766917293233083e-07, "loss": 0.0003, "step": 14583 }, { "epoch": 0.9398723980150803, "grad_norm": 0.0025457227936089575, "learning_rate": 6.759756534192625e-07, "loss": 0.0, "step": 14584 }, { "epoch": 0.9399368434620095, "grad_norm": 0.0024229772528888424, "learning_rate": 6.752595775152167e-07, "loss": 0.0, "step": 14585 }, { "epoch": 0.9400012889089386, "grad_norm": 0.030600970769373386, "learning_rate": 6.745435016111708e-07, "loss": 0.0, "step": 14586 }, { "epoch": 0.9400657343558677, "grad_norm": 0.0005166095727222362, "learning_rate": 6.738274257071251e-07, "loss": 0.0, "step": 14587 }, { "epoch": 0.9401301798027969, "grad_norm": 0.003762972511217016, "learning_rate": 6.731113498030792e-07, "loss": 0.0, "step": 14588 }, { "epoch": 0.9401946252497261, "grad_norm": 0.09675340539245941, "learning_rate": 6.723952738990333e-07, "loss": 0.0001, "step": 14589 }, { "epoch": 0.9402590706966553, "grad_norm": 0.4073272873244558, "learning_rate": 6.716791979949876e-07, "loss": 0.0027, "step": 14590 }, { "epoch": 0.9403235161435844, "grad_norm": 0.00020267804633695348, "learning_rate": 6.709631220909417e-07, "loss": 0.0, "step": 14591 }, { "epoch": 0.9403879615905136, "grad_norm": 0.004281621504404462, "learning_rate": 6.702470461868958e-07, "loss": 0.0, "step": 14592 }, { "epoch": 0.9404524070374428, "grad_norm": 0.0028956385487686137, "learning_rate": 6.6953097028285e-07, "loss": 0.0, "step": 14593 }, { "epoch": 0.940516852484372, "grad_norm": 0.2428985202888916, "learning_rate": 6.688148943788042e-07, "loss": 0.0008, "step": 14594 }, { "epoch": 0.9405812979313012, "grad_norm": 0.008918584360090404, "learning_rate": 6.680988184747583e-07, "loss": 0.0, "step": 14595 }, { "epoch": 0.9406457433782304, "grad_norm": 0.0010786200917876955, "learning_rate": 6.673827425707126e-07, "loss": 0.0, "step": 14596 }, { "epoch": 0.9407101888251596, "grad_norm": 0.001239924009437483, "learning_rate": 6.666666666666667e-07, "loss": 0.0, "step": 14597 }, { "epoch": 0.9407746342720886, "grad_norm": 0.17303675628424575, "learning_rate": 6.659505907626208e-07, "loss": 0.0006, "step": 14598 }, { "epoch": 0.9408390797190178, "grad_norm": 0.0022172739171691714, "learning_rate": 6.652345148585751e-07, "loss": 0.0, "step": 14599 }, { "epoch": 0.940903525165947, "grad_norm": 0.0009180732993219411, "learning_rate": 6.645184389545292e-07, "loss": 0.0, "step": 14600 }, { "epoch": 0.9409679706128762, "grad_norm": 0.218649577182511, "learning_rate": 6.638023630504834e-07, "loss": 0.0006, "step": 14601 }, { "epoch": 0.9410324160598054, "grad_norm": 0.001048798207710388, "learning_rate": 6.630862871464376e-07, "loss": 0.0, "step": 14602 }, { "epoch": 0.9410968615067345, "grad_norm": 0.0015205476430884322, "learning_rate": 6.623702112423918e-07, "loss": 0.0, "step": 14603 }, { "epoch": 0.9411613069536637, "grad_norm": 0.022077528704297078, "learning_rate": 6.616541353383458e-07, "loss": 0.0002, "step": 14604 }, { "epoch": 0.9412257524005929, "grad_norm": 0.0004027494800950605, "learning_rate": 6.609380594343001e-07, "loss": 0.0, "step": 14605 }, { "epoch": 0.9412901978475221, "grad_norm": 0.2511364171702047, "learning_rate": 6.602219835302542e-07, "loss": 0.0009, "step": 14606 }, { "epoch": 0.9413546432944513, "grad_norm": 0.7542887295599734, "learning_rate": 6.595059076262083e-07, "loss": 0.0064, "step": 14607 }, { "epoch": 0.9414190887413805, "grad_norm": 0.004123371457966273, "learning_rate": 6.587898317221626e-07, "loss": 0.0, "step": 14608 }, { "epoch": 0.9414835341883095, "grad_norm": 0.002666736267158426, "learning_rate": 6.580737558181167e-07, "loss": 0.0, "step": 14609 }, { "epoch": 0.9415479796352387, "grad_norm": 0.3278604758169207, "learning_rate": 6.57357679914071e-07, "loss": 0.0023, "step": 14610 }, { "epoch": 0.9416124250821679, "grad_norm": 0.0035760959578561863, "learning_rate": 6.566416040100251e-07, "loss": 0.0, "step": 14611 }, { "epoch": 0.9416768705290971, "grad_norm": 0.11170839315038376, "learning_rate": 6.559255281059793e-07, "loss": 0.0017, "step": 14612 }, { "epoch": 0.9417413159760263, "grad_norm": 0.15507088651507142, "learning_rate": 6.552094522019335e-07, "loss": 0.0003, "step": 14613 }, { "epoch": 0.9418057614229555, "grad_norm": 0.016109021664184563, "learning_rate": 6.544933762978877e-07, "loss": 0.0001, "step": 14614 }, { "epoch": 0.9418702068698847, "grad_norm": 0.027611918240110323, "learning_rate": 6.537773003938418e-07, "loss": 0.0002, "step": 14615 }, { "epoch": 0.9419346523168138, "grad_norm": 0.047448985239203184, "learning_rate": 6.530612244897961e-07, "loss": 0.0005, "step": 14616 }, { "epoch": 0.941999097763743, "grad_norm": 0.0013488067911044182, "learning_rate": 6.523451485857501e-07, "loss": 0.0, "step": 14617 }, { "epoch": 0.9420635432106722, "grad_norm": 0.011817455589975698, "learning_rate": 6.516290726817042e-07, "loss": 0.0, "step": 14618 }, { "epoch": 0.9421279886576014, "grad_norm": 0.0021414563002524058, "learning_rate": 6.509129967776585e-07, "loss": 0.0, "step": 14619 }, { "epoch": 0.9421924341045305, "grad_norm": 0.021960025706933405, "learning_rate": 6.501969208736126e-07, "loss": 0.0, "step": 14620 }, { "epoch": 0.9422568795514596, "grad_norm": 0.03452706750454334, "learning_rate": 6.494808449695668e-07, "loss": 0.0001, "step": 14621 }, { "epoch": 0.9423213249983888, "grad_norm": 0.012501871096559888, "learning_rate": 6.48764769065521e-07, "loss": 0.0, "step": 14622 }, { "epoch": 0.942385770445318, "grad_norm": 0.0062451286698885266, "learning_rate": 6.480486931614752e-07, "loss": 0.0, "step": 14623 }, { "epoch": 0.9424502158922472, "grad_norm": 0.052520878296810417, "learning_rate": 6.473326172574293e-07, "loss": 0.0001, "step": 14624 }, { "epoch": 0.9425146613391764, "grad_norm": 0.002352564833617126, "learning_rate": 6.466165413533836e-07, "loss": 0.0, "step": 14625 }, { "epoch": 0.9425791067861056, "grad_norm": 0.0006536874626899899, "learning_rate": 6.459004654493377e-07, "loss": 0.0, "step": 14626 }, { "epoch": 0.9426435522330348, "grad_norm": 0.0061842458013730735, "learning_rate": 6.451843895452919e-07, "loss": 0.0, "step": 14627 }, { "epoch": 0.9427079976799639, "grad_norm": 0.16732236779005036, "learning_rate": 6.44468313641246e-07, "loss": 0.0007, "step": 14628 }, { "epoch": 0.9427724431268931, "grad_norm": 0.0007157079141550446, "learning_rate": 6.437522377372002e-07, "loss": 0.0, "step": 14629 }, { "epoch": 0.9428368885738223, "grad_norm": 0.0006728257843449776, "learning_rate": 6.430361618331543e-07, "loss": 0.0, "step": 14630 }, { "epoch": 0.9429013340207514, "grad_norm": 0.0019405160432858116, "learning_rate": 6.423200859291086e-07, "loss": 0.0, "step": 14631 }, { "epoch": 0.9429657794676806, "grad_norm": 0.016156358466427594, "learning_rate": 6.416040100250627e-07, "loss": 0.0001, "step": 14632 }, { "epoch": 0.9430302249146097, "grad_norm": 0.0034454949082682366, "learning_rate": 6.408879341210168e-07, "loss": 0.0, "step": 14633 }, { "epoch": 0.9430946703615389, "grad_norm": 0.0012120870879107008, "learning_rate": 6.401718582169711e-07, "loss": 0.0, "step": 14634 }, { "epoch": 0.9431591158084681, "grad_norm": 0.0002194363515000686, "learning_rate": 6.394557823129252e-07, "loss": 0.0, "step": 14635 }, { "epoch": 0.9432235612553973, "grad_norm": 0.29232680880760875, "learning_rate": 6.387397064088794e-07, "loss": 0.0026, "step": 14636 }, { "epoch": 0.9432880067023265, "grad_norm": 0.0003396572970585941, "learning_rate": 6.380236305048336e-07, "loss": 0.0, "step": 14637 }, { "epoch": 0.9433524521492557, "grad_norm": 1.5725421922903065, "learning_rate": 6.373075546007878e-07, "loss": 0.0148, "step": 14638 }, { "epoch": 0.9434168975961849, "grad_norm": 0.006618061930661491, "learning_rate": 6.365914786967419e-07, "loss": 0.0, "step": 14639 }, { "epoch": 0.943481343043114, "grad_norm": 1.0823258091984502, "learning_rate": 6.358754027926962e-07, "loss": 0.0037, "step": 14640 }, { "epoch": 0.9435457884900432, "grad_norm": 0.037367184014392985, "learning_rate": 6.351593268886502e-07, "loss": 0.0003, "step": 14641 }, { "epoch": 0.9436102339369723, "grad_norm": 0.00023618925437843773, "learning_rate": 6.344432509846043e-07, "loss": 0.0, "step": 14642 }, { "epoch": 0.9436746793839015, "grad_norm": 0.019535478491815153, "learning_rate": 6.337271750805586e-07, "loss": 0.0001, "step": 14643 }, { "epoch": 0.9437391248308307, "grad_norm": 0.23517341519437462, "learning_rate": 6.330110991765127e-07, "loss": 0.0003, "step": 14644 }, { "epoch": 0.9438035702777599, "grad_norm": 0.0012437920009850324, "learning_rate": 6.32295023272467e-07, "loss": 0.0, "step": 14645 }, { "epoch": 0.943868015724689, "grad_norm": 0.005780556663777061, "learning_rate": 6.315789473684211e-07, "loss": 0.0, "step": 14646 }, { "epoch": 0.9439324611716182, "grad_norm": 0.027663328196446874, "learning_rate": 6.308628714643753e-07, "loss": 0.0001, "step": 14647 }, { "epoch": 0.9439969066185474, "grad_norm": 0.03173487718373573, "learning_rate": 6.301467955603295e-07, "loss": 0.0003, "step": 14648 }, { "epoch": 0.9440613520654766, "grad_norm": 0.0038316525115960707, "learning_rate": 6.294307196562837e-07, "loss": 0.0, "step": 14649 }, { "epoch": 0.9441257975124058, "grad_norm": 0.007783604901538253, "learning_rate": 6.287146437522378e-07, "loss": 0.0, "step": 14650 }, { "epoch": 0.944190242959335, "grad_norm": 0.2807085202444718, "learning_rate": 6.279985678481921e-07, "loss": 0.0013, "step": 14651 }, { "epoch": 0.9442546884062641, "grad_norm": 3.8764283243383565e-05, "learning_rate": 6.272824919441461e-07, "loss": 0.0, "step": 14652 }, { "epoch": 0.9443191338531933, "grad_norm": 0.33581027127862345, "learning_rate": 6.265664160401002e-07, "loss": 0.0022, "step": 14653 }, { "epoch": 0.9443835793001224, "grad_norm": 0.011455286908056026, "learning_rate": 6.258503401360545e-07, "loss": 0.0, "step": 14654 }, { "epoch": 0.9444480247470516, "grad_norm": 0.01142539022155588, "learning_rate": 6.251342642320086e-07, "loss": 0.0, "step": 14655 }, { "epoch": 0.9445124701939808, "grad_norm": 0.004852478793097276, "learning_rate": 6.244181883279628e-07, "loss": 0.0, "step": 14656 }, { "epoch": 0.94457691564091, "grad_norm": 0.0019989295768203233, "learning_rate": 6.23702112423917e-07, "loss": 0.0, "step": 14657 }, { "epoch": 0.9446413610878391, "grad_norm": 0.000900061966794239, "learning_rate": 6.229860365198712e-07, "loss": 0.0, "step": 14658 }, { "epoch": 0.9447058065347683, "grad_norm": 0.0002101536116015678, "learning_rate": 6.222699606158254e-07, "loss": 0.0, "step": 14659 }, { "epoch": 0.9447702519816975, "grad_norm": 0.008588863535994664, "learning_rate": 6.215538847117795e-07, "loss": 0.0, "step": 14660 }, { "epoch": 0.9448346974286267, "grad_norm": 0.04302587423745892, "learning_rate": 6.208378088077337e-07, "loss": 0.0001, "step": 14661 }, { "epoch": 0.9448991428755559, "grad_norm": 0.005371981311745458, "learning_rate": 6.201217329036879e-07, "loss": 0.0001, "step": 14662 }, { "epoch": 0.9449635883224851, "grad_norm": 0.018151983512062528, "learning_rate": 6.19405656999642e-07, "loss": 0.0, "step": 14663 }, { "epoch": 0.9450280337694142, "grad_norm": 0.0012138588332065569, "learning_rate": 6.186895810955961e-07, "loss": 0.0, "step": 14664 }, { "epoch": 0.9450924792163433, "grad_norm": 0.001995413386031814, "learning_rate": 6.179735051915503e-07, "loss": 0.0, "step": 14665 }, { "epoch": 0.9451569246632725, "grad_norm": 0.0007131550059482363, "learning_rate": 6.172574292875045e-07, "loss": 0.0, "step": 14666 }, { "epoch": 0.9452213701102017, "grad_norm": 0.44967737421083515, "learning_rate": 6.165413533834587e-07, "loss": 0.0004, "step": 14667 }, { "epoch": 0.9452858155571309, "grad_norm": 0.007368867849144508, "learning_rate": 6.158252774794129e-07, "loss": 0.0, "step": 14668 }, { "epoch": 0.94535026100406, "grad_norm": 0.0074857147827597816, "learning_rate": 6.15109201575367e-07, "loss": 0.0, "step": 14669 }, { "epoch": 0.9454147064509892, "grad_norm": 0.00023285440752826664, "learning_rate": 6.143931256713212e-07, "loss": 0.0, "step": 14670 }, { "epoch": 0.9454791518979184, "grad_norm": 0.000904988311691927, "learning_rate": 6.136770497672754e-07, "loss": 0.0, "step": 14671 }, { "epoch": 0.9455435973448476, "grad_norm": 0.12923123113742113, "learning_rate": 6.129609738632296e-07, "loss": 0.0003, "step": 14672 }, { "epoch": 0.9456080427917768, "grad_norm": 0.0025449127476049856, "learning_rate": 6.122448979591837e-07, "loss": 0.0, "step": 14673 }, { "epoch": 0.945672488238706, "grad_norm": 0.0006296391375895605, "learning_rate": 6.115288220551379e-07, "loss": 0.0, "step": 14674 }, { "epoch": 0.9457369336856352, "grad_norm": 0.1292198729783402, "learning_rate": 6.108127461510921e-07, "loss": 0.0004, "step": 14675 }, { "epoch": 0.9458013791325642, "grad_norm": 0.001215412270731588, "learning_rate": 6.100966702470462e-07, "loss": 0.0, "step": 14676 }, { "epoch": 0.9458658245794934, "grad_norm": 0.001039974299197542, "learning_rate": 6.093805943430004e-07, "loss": 0.0, "step": 14677 }, { "epoch": 0.9459302700264226, "grad_norm": 0.6429765156199316, "learning_rate": 6.086645184389546e-07, "loss": 0.0003, "step": 14678 }, { "epoch": 0.9459947154733518, "grad_norm": 0.0008230507368414166, "learning_rate": 6.079484425349087e-07, "loss": 0.0, "step": 14679 }, { "epoch": 0.946059160920281, "grad_norm": 0.00024397614750560574, "learning_rate": 6.072323666308629e-07, "loss": 0.0, "step": 14680 }, { "epoch": 0.9461236063672102, "grad_norm": 0.007197154311858535, "learning_rate": 6.065162907268171e-07, "loss": 0.0, "step": 14681 }, { "epoch": 0.9461880518141393, "grad_norm": 0.1378045088749925, "learning_rate": 6.058002148227712e-07, "loss": 0.0002, "step": 14682 }, { "epoch": 0.9462524972610685, "grad_norm": 0.0013014266775007563, "learning_rate": 6.050841389187254e-07, "loss": 0.0, "step": 14683 }, { "epoch": 0.9463169427079977, "grad_norm": 0.000253774893202994, "learning_rate": 6.043680630146796e-07, "loss": 0.0, "step": 14684 }, { "epoch": 0.9463813881549269, "grad_norm": 0.0038829678671018237, "learning_rate": 6.036519871106338e-07, "loss": 0.0, "step": 14685 }, { "epoch": 0.9464458336018561, "grad_norm": 0.00037332670641363337, "learning_rate": 6.02935911206588e-07, "loss": 0.0, "step": 14686 }, { "epoch": 0.9465102790487852, "grad_norm": 0.00011434537095249858, "learning_rate": 6.022198353025422e-07, "loss": 0.0, "step": 14687 }, { "epoch": 0.9465747244957143, "grad_norm": 0.022522121263546153, "learning_rate": 6.015037593984962e-07, "loss": 0.0001, "step": 14688 }, { "epoch": 0.9466391699426435, "grad_norm": 0.08534634801181562, "learning_rate": 6.007876834944504e-07, "loss": 0.0002, "step": 14689 }, { "epoch": 0.9467036153895727, "grad_norm": 0.008271676467209783, "learning_rate": 6.000716075904046e-07, "loss": 0.0, "step": 14690 }, { "epoch": 0.9467680608365019, "grad_norm": 0.01073189738361365, "learning_rate": 5.993555316863588e-07, "loss": 0.0, "step": 14691 }, { "epoch": 0.9468325062834311, "grad_norm": 0.004863051197559781, "learning_rate": 5.986394557823129e-07, "loss": 0.0, "step": 14692 }, { "epoch": 0.9468969517303603, "grad_norm": 0.010054763393330567, "learning_rate": 5.979233798782671e-07, "loss": 0.0, "step": 14693 }, { "epoch": 0.9469613971772894, "grad_norm": 0.002437897019058872, "learning_rate": 5.972073039742213e-07, "loss": 0.0, "step": 14694 }, { "epoch": 0.9470258426242186, "grad_norm": 0.013852156443718828, "learning_rate": 5.964912280701755e-07, "loss": 0.0001, "step": 14695 }, { "epoch": 0.9470902880711478, "grad_norm": 0.0003029941248519201, "learning_rate": 5.957751521661297e-07, "loss": 0.0, "step": 14696 }, { "epoch": 0.947154733518077, "grad_norm": 0.002594353409038197, "learning_rate": 5.950590762620839e-07, "loss": 0.0, "step": 14697 }, { "epoch": 0.9472191789650061, "grad_norm": 0.2996835992179366, "learning_rate": 5.94343000358038e-07, "loss": 0.0019, "step": 14698 }, { "epoch": 0.9472836244119353, "grad_norm": 0.011269491880908408, "learning_rate": 5.936269244539922e-07, "loss": 0.0, "step": 14699 }, { "epoch": 0.9473480698588644, "grad_norm": 0.0009420079780451361, "learning_rate": 5.929108485499463e-07, "loss": 0.0, "step": 14700 }, { "epoch": 0.9474125153057936, "grad_norm": 0.0017608986153713367, "learning_rate": 5.921947726459005e-07, "loss": 0.0, "step": 14701 }, { "epoch": 0.9474769607527228, "grad_norm": 0.0013094455840437949, "learning_rate": 5.914786967418547e-07, "loss": 0.0, "step": 14702 }, { "epoch": 0.947541406199652, "grad_norm": 0.002251141911154188, "learning_rate": 5.907626208378089e-07, "loss": 0.0, "step": 14703 }, { "epoch": 0.9476058516465812, "grad_norm": 0.011790262267177658, "learning_rate": 5.90046544933763e-07, "loss": 0.0, "step": 14704 }, { "epoch": 0.9476702970935104, "grad_norm": 0.002285259579575094, "learning_rate": 5.893304690297172e-07, "loss": 0.0, "step": 14705 }, { "epoch": 0.9477347425404395, "grad_norm": 0.0006253211658536153, "learning_rate": 5.886143931256714e-07, "loss": 0.0, "step": 14706 }, { "epoch": 0.9477991879873687, "grad_norm": 0.005398306974137369, "learning_rate": 5.878983172216255e-07, "loss": 0.0, "step": 14707 }, { "epoch": 0.9478636334342979, "grad_norm": 0.0002294010027379783, "learning_rate": 5.871822413175797e-07, "loss": 0.0, "step": 14708 }, { "epoch": 0.947928078881227, "grad_norm": 0.00025866017801789347, "learning_rate": 5.864661654135339e-07, "loss": 0.0, "step": 14709 }, { "epoch": 0.9479925243281562, "grad_norm": 0.0005619206255567925, "learning_rate": 5.857500895094881e-07, "loss": 0.0, "step": 14710 }, { "epoch": 0.9480569697750854, "grad_norm": 0.19647921181904326, "learning_rate": 5.850340136054423e-07, "loss": 0.0021, "step": 14711 }, { "epoch": 0.9481214152220145, "grad_norm": 0.08082751242960239, "learning_rate": 5.843179377013964e-07, "loss": 0.0001, "step": 14712 }, { "epoch": 0.9481858606689437, "grad_norm": 0.0014319050676462684, "learning_rate": 5.836018617973506e-07, "loss": 0.0, "step": 14713 }, { "epoch": 0.9482503061158729, "grad_norm": 0.0021555817053756725, "learning_rate": 5.828857858933047e-07, "loss": 0.0, "step": 14714 }, { "epoch": 0.9483147515628021, "grad_norm": 0.0019083600342925365, "learning_rate": 5.821697099892589e-07, "loss": 0.0, "step": 14715 }, { "epoch": 0.9483791970097313, "grad_norm": 0.41414290673102844, "learning_rate": 5.814536340852131e-07, "loss": 0.003, "step": 14716 }, { "epoch": 0.9484436424566605, "grad_norm": 0.008211531723705947, "learning_rate": 5.807375581811672e-07, "loss": 0.0, "step": 14717 }, { "epoch": 0.9485080879035896, "grad_norm": 0.00016811693723598264, "learning_rate": 5.800214822771214e-07, "loss": 0.0, "step": 14718 }, { "epoch": 0.9485725333505188, "grad_norm": 0.0008967912893646619, "learning_rate": 5.793054063730756e-07, "loss": 0.0, "step": 14719 }, { "epoch": 0.9486369787974479, "grad_norm": 0.006830654593558417, "learning_rate": 5.785893304690298e-07, "loss": 0.0, "step": 14720 }, { "epoch": 0.9487014242443771, "grad_norm": 0.02299987371644241, "learning_rate": 5.77873254564984e-07, "loss": 0.0001, "step": 14721 }, { "epoch": 0.9487658696913063, "grad_norm": 0.023379648106420153, "learning_rate": 5.771571786609382e-07, "loss": 0.0002, "step": 14722 }, { "epoch": 0.9488303151382355, "grad_norm": 0.011996984555252728, "learning_rate": 5.764411027568922e-07, "loss": 0.0001, "step": 14723 }, { "epoch": 0.9488947605851646, "grad_norm": 0.0010316172728567184, "learning_rate": 5.757250268528464e-07, "loss": 0.0, "step": 14724 }, { "epoch": 0.9489592060320938, "grad_norm": 0.0006875478425085163, "learning_rate": 5.750089509488006e-07, "loss": 0.0, "step": 14725 }, { "epoch": 0.949023651479023, "grad_norm": 0.0007850438860483339, "learning_rate": 5.742928750447548e-07, "loss": 0.0, "step": 14726 }, { "epoch": 0.9490880969259522, "grad_norm": 0.1185001488226447, "learning_rate": 5.735767991407089e-07, "loss": 0.0001, "step": 14727 }, { "epoch": 0.9491525423728814, "grad_norm": 0.00042052385974219097, "learning_rate": 5.728607232366631e-07, "loss": 0.0, "step": 14728 }, { "epoch": 0.9492169878198106, "grad_norm": 0.023423001652901322, "learning_rate": 5.721446473326173e-07, "loss": 0.0002, "step": 14729 }, { "epoch": 0.9492814332667397, "grad_norm": 0.00019013561235615476, "learning_rate": 5.714285714285715e-07, "loss": 0.0, "step": 14730 }, { "epoch": 0.9493458787136689, "grad_norm": 0.008218830548370379, "learning_rate": 5.707124955245257e-07, "loss": 0.0, "step": 14731 }, { "epoch": 0.949410324160598, "grad_norm": 0.006414075984919605, "learning_rate": 5.699964196204799e-07, "loss": 0.0, "step": 14732 }, { "epoch": 0.9494747696075272, "grad_norm": 0.0007078383412863962, "learning_rate": 5.69280343716434e-07, "loss": 0.0, "step": 14733 }, { "epoch": 0.9495392150544564, "grad_norm": 0.030856944468306394, "learning_rate": 5.685642678123882e-07, "loss": 0.0, "step": 14734 }, { "epoch": 0.9496036605013856, "grad_norm": 0.0069689116245061295, "learning_rate": 5.678481919083424e-07, "loss": 0.0, "step": 14735 }, { "epoch": 0.9496681059483147, "grad_norm": 0.005919522472079522, "learning_rate": 5.671321160042964e-07, "loss": 0.0, "step": 14736 }, { "epoch": 0.9497325513952439, "grad_norm": 0.0028393379796273445, "learning_rate": 5.664160401002506e-07, "loss": 0.0, "step": 14737 }, { "epoch": 0.9497969968421731, "grad_norm": 0.036288034329051345, "learning_rate": 5.656999641962048e-07, "loss": 0.0001, "step": 14738 }, { "epoch": 0.9498614422891023, "grad_norm": 0.0012907005085677104, "learning_rate": 5.64983888292159e-07, "loss": 0.0, "step": 14739 }, { "epoch": 0.9499258877360315, "grad_norm": 0.01761313683527187, "learning_rate": 5.642678123881132e-07, "loss": 0.0001, "step": 14740 }, { "epoch": 0.9499903331829607, "grad_norm": 0.009967518907129018, "learning_rate": 5.635517364840674e-07, "loss": 0.0, "step": 14741 }, { "epoch": 0.9500547786298899, "grad_norm": 0.0018596338611846243, "learning_rate": 5.628356605800215e-07, "loss": 0.0, "step": 14742 }, { "epoch": 0.9501192240768189, "grad_norm": 0.004828228626011528, "learning_rate": 5.621195846759757e-07, "loss": 0.0, "step": 14743 }, { "epoch": 0.9501836695237481, "grad_norm": 0.006116660387896934, "learning_rate": 5.614035087719299e-07, "loss": 0.0, "step": 14744 }, { "epoch": 0.9502481149706773, "grad_norm": 0.00060880213351492, "learning_rate": 5.606874328678841e-07, "loss": 0.0, "step": 14745 }, { "epoch": 0.9503125604176065, "grad_norm": 0.14036646230097022, "learning_rate": 5.599713569638382e-07, "loss": 0.0002, "step": 14746 }, { "epoch": 0.9503770058645357, "grad_norm": 0.3058433494413237, "learning_rate": 5.592552810597923e-07, "loss": 0.0008, "step": 14747 }, { "epoch": 0.9504414513114648, "grad_norm": 0.024473252072277532, "learning_rate": 5.585392051557465e-07, "loss": 0.0001, "step": 14748 }, { "epoch": 0.950505896758394, "grad_norm": 0.0001964074795710888, "learning_rate": 5.578231292517007e-07, "loss": 0.0, "step": 14749 }, { "epoch": 0.9505703422053232, "grad_norm": 0.0008308660348116496, "learning_rate": 5.571070533476549e-07, "loss": 0.0, "step": 14750 }, { "epoch": 0.9506347876522524, "grad_norm": 0.0006085072018791508, "learning_rate": 5.563909774436091e-07, "loss": 0.0, "step": 14751 }, { "epoch": 0.9506992330991816, "grad_norm": 0.0014048571143467556, "learning_rate": 5.556749015395632e-07, "loss": 0.0, "step": 14752 }, { "epoch": 0.9507636785461108, "grad_norm": 0.000805794097092772, "learning_rate": 5.549588256355174e-07, "loss": 0.0, "step": 14753 }, { "epoch": 0.9508281239930398, "grad_norm": 0.00016851595053399371, "learning_rate": 5.542427497314716e-07, "loss": 0.0, "step": 14754 }, { "epoch": 0.950892569439969, "grad_norm": 0.30476000730045005, "learning_rate": 5.535266738274257e-07, "loss": 0.001, "step": 14755 }, { "epoch": 0.9509570148868982, "grad_norm": 0.017287191174224937, "learning_rate": 5.528105979233799e-07, "loss": 0.0, "step": 14756 }, { "epoch": 0.9510214603338274, "grad_norm": 0.0008960561301208798, "learning_rate": 5.520945220193341e-07, "loss": 0.0, "step": 14757 }, { "epoch": 0.9510859057807566, "grad_norm": 0.022833389816133105, "learning_rate": 5.513784461152883e-07, "loss": 0.0001, "step": 14758 }, { "epoch": 0.9511503512276858, "grad_norm": 0.0007505371049380001, "learning_rate": 5.506623702112424e-07, "loss": 0.0, "step": 14759 }, { "epoch": 0.951214796674615, "grad_norm": 0.004165736964287361, "learning_rate": 5.499462943071966e-07, "loss": 0.0, "step": 14760 }, { "epoch": 0.9512792421215441, "grad_norm": 0.01478945535523023, "learning_rate": 5.492302184031507e-07, "loss": 0.0, "step": 14761 }, { "epoch": 0.9513436875684733, "grad_norm": 0.0018664110528120968, "learning_rate": 5.485141424991049e-07, "loss": 0.0, "step": 14762 }, { "epoch": 0.9514081330154025, "grad_norm": 0.5059164891348179, "learning_rate": 5.477980665950591e-07, "loss": 0.0014, "step": 14763 }, { "epoch": 0.9514725784623317, "grad_norm": 0.0008552185764550522, "learning_rate": 5.470819906910133e-07, "loss": 0.0, "step": 14764 }, { "epoch": 0.9515370239092608, "grad_norm": 0.003477670911935671, "learning_rate": 5.463659147869674e-07, "loss": 0.0, "step": 14765 }, { "epoch": 0.9516014693561899, "grad_norm": 0.5282372710846787, "learning_rate": 5.456498388829216e-07, "loss": 0.0007, "step": 14766 }, { "epoch": 0.9516659148031191, "grad_norm": 0.0669501936225196, "learning_rate": 5.449337629788758e-07, "loss": 0.0001, "step": 14767 }, { "epoch": 0.9517303602500483, "grad_norm": 0.001082596470456628, "learning_rate": 5.4421768707483e-07, "loss": 0.0, "step": 14768 }, { "epoch": 0.9517948056969775, "grad_norm": 0.013068741403885711, "learning_rate": 5.435016111707842e-07, "loss": 0.0, "step": 14769 }, { "epoch": 0.9518592511439067, "grad_norm": 0.0013497502223646179, "learning_rate": 5.427855352667384e-07, "loss": 0.0, "step": 14770 }, { "epoch": 0.9519236965908359, "grad_norm": 0.06839671137127833, "learning_rate": 5.420694593626924e-07, "loss": 0.0001, "step": 14771 }, { "epoch": 0.951988142037765, "grad_norm": 0.00470835857793575, "learning_rate": 5.413533834586466e-07, "loss": 0.0, "step": 14772 }, { "epoch": 0.9520525874846942, "grad_norm": 0.008123316991707919, "learning_rate": 5.406373075546008e-07, "loss": 0.0, "step": 14773 }, { "epoch": 0.9521170329316234, "grad_norm": 0.1347828798719565, "learning_rate": 5.39921231650555e-07, "loss": 0.0002, "step": 14774 }, { "epoch": 0.9521814783785526, "grad_norm": 0.0013437443289455824, "learning_rate": 5.392051557465092e-07, "loss": 0.0, "step": 14775 }, { "epoch": 0.9522459238254817, "grad_norm": 0.03295035955820533, "learning_rate": 5.384890798424634e-07, "loss": 0.0, "step": 14776 }, { "epoch": 0.9523103692724109, "grad_norm": 0.002494679766104263, "learning_rate": 5.377730039384175e-07, "loss": 0.0, "step": 14777 }, { "epoch": 0.95237481471934, "grad_norm": 0.0004908145695510208, "learning_rate": 5.370569280343717e-07, "loss": 0.0, "step": 14778 }, { "epoch": 0.9524392601662692, "grad_norm": 0.07464586197099465, "learning_rate": 5.363408521303259e-07, "loss": 0.0001, "step": 14779 }, { "epoch": 0.9525037056131984, "grad_norm": 9.668897209007286e-05, "learning_rate": 5.356247762262801e-07, "loss": 0.0, "step": 14780 }, { "epoch": 0.9525681510601276, "grad_norm": 0.0045286485902072664, "learning_rate": 5.349087003222342e-07, "loss": 0.0, "step": 14781 }, { "epoch": 0.9526325965070568, "grad_norm": 0.0027660902273609613, "learning_rate": 5.341926244181884e-07, "loss": 0.0, "step": 14782 }, { "epoch": 0.952697041953986, "grad_norm": 0.005766246153932424, "learning_rate": 5.334765485141425e-07, "loss": 0.0, "step": 14783 }, { "epoch": 0.9527614874009152, "grad_norm": 0.001195186415794037, "learning_rate": 5.327604726100967e-07, "loss": 0.0, "step": 14784 }, { "epoch": 0.9528259328478443, "grad_norm": 0.0010986515422721234, "learning_rate": 5.320443967060509e-07, "loss": 0.0, "step": 14785 }, { "epoch": 0.9528903782947735, "grad_norm": 0.02101568137284521, "learning_rate": 5.313283208020051e-07, "loss": 0.0, "step": 14786 }, { "epoch": 0.9529548237417026, "grad_norm": 0.004531570145919255, "learning_rate": 5.306122448979592e-07, "loss": 0.0, "step": 14787 }, { "epoch": 0.9530192691886318, "grad_norm": 0.00046395947667538586, "learning_rate": 5.298961689939134e-07, "loss": 0.0, "step": 14788 }, { "epoch": 0.953083714635561, "grad_norm": 0.07514254155018177, "learning_rate": 5.291800930898676e-07, "loss": 0.0017, "step": 14789 }, { "epoch": 0.9531481600824901, "grad_norm": 0.00022843025780618325, "learning_rate": 5.284640171858217e-07, "loss": 0.0, "step": 14790 }, { "epoch": 0.9532126055294193, "grad_norm": 0.004491149951619666, "learning_rate": 5.277479412817759e-07, "loss": 0.0015, "step": 14791 }, { "epoch": 0.9532770509763485, "grad_norm": 0.0005985131719744827, "learning_rate": 5.270318653777301e-07, "loss": 0.0, "step": 14792 }, { "epoch": 0.9533414964232777, "grad_norm": 0.0005078170219091677, "learning_rate": 5.263157894736843e-07, "loss": 0.0, "step": 14793 }, { "epoch": 0.9534059418702069, "grad_norm": 0.002439488648787511, "learning_rate": 5.255997135696385e-07, "loss": 0.0015, "step": 14794 }, { "epoch": 0.9534703873171361, "grad_norm": 0.001817523298432707, "learning_rate": 5.248836376655927e-07, "loss": 0.0, "step": 14795 }, { "epoch": 0.9535348327640653, "grad_norm": 0.004825207351911492, "learning_rate": 5.241675617615467e-07, "loss": 0.0, "step": 14796 }, { "epoch": 0.9535992782109944, "grad_norm": 0.0004920440792501377, "learning_rate": 5.234514858575009e-07, "loss": 0.0, "step": 14797 }, { "epoch": 0.9536637236579236, "grad_norm": 0.003268970344869564, "learning_rate": 5.227354099534551e-07, "loss": 0.0, "step": 14798 }, { "epoch": 0.9537281691048527, "grad_norm": 6.151258179910864e-05, "learning_rate": 5.220193340494093e-07, "loss": 0.0, "step": 14799 }, { "epoch": 0.9537926145517819, "grad_norm": 0.0668628749985501, "learning_rate": 5.213032581453634e-07, "loss": 0.0002, "step": 14800 }, { "epoch": 0.9538570599987111, "grad_norm": 0.000984831570237332, "learning_rate": 5.205871822413176e-07, "loss": 0.0, "step": 14801 }, { "epoch": 0.9539215054456402, "grad_norm": 0.060623120937525234, "learning_rate": 5.198711063372718e-07, "loss": 0.0001, "step": 14802 }, { "epoch": 0.9539859508925694, "grad_norm": 0.0007038617180894285, "learning_rate": 5.19155030433226e-07, "loss": 0.0, "step": 14803 }, { "epoch": 0.9540503963394986, "grad_norm": 0.0028088622921580444, "learning_rate": 5.184389545291802e-07, "loss": 0.0, "step": 14804 }, { "epoch": 0.9541148417864278, "grad_norm": 0.010322705780409896, "learning_rate": 5.177228786251344e-07, "loss": 0.0, "step": 14805 }, { "epoch": 0.954179287233357, "grad_norm": 0.004806938039548968, "learning_rate": 5.170068027210885e-07, "loss": 0.0, "step": 14806 }, { "epoch": 0.9542437326802862, "grad_norm": 0.00021441404646725413, "learning_rate": 5.162907268170426e-07, "loss": 0.0, "step": 14807 }, { "epoch": 0.9543081781272154, "grad_norm": 0.04337507862455308, "learning_rate": 5.155746509129968e-07, "loss": 0.0001, "step": 14808 }, { "epoch": 0.9543726235741445, "grad_norm": 0.16570335639962636, "learning_rate": 5.148585750089509e-07, "loss": 0.0002, "step": 14809 }, { "epoch": 0.9544370690210736, "grad_norm": 0.006217551071894887, "learning_rate": 5.141424991049051e-07, "loss": 0.0, "step": 14810 }, { "epoch": 0.9545015144680028, "grad_norm": 0.0012870383012455599, "learning_rate": 5.134264232008593e-07, "loss": 0.0, "step": 14811 }, { "epoch": 0.954565959914932, "grad_norm": 0.016881525437275658, "learning_rate": 5.127103472968135e-07, "loss": 0.0, "step": 14812 }, { "epoch": 0.9546304053618612, "grad_norm": 0.001555288436966095, "learning_rate": 5.119942713927677e-07, "loss": 0.0, "step": 14813 }, { "epoch": 0.9546948508087904, "grad_norm": 0.008399473534627302, "learning_rate": 5.112781954887219e-07, "loss": 0.0, "step": 14814 }, { "epoch": 0.9547592962557195, "grad_norm": 0.0026212772132492594, "learning_rate": 5.10562119584676e-07, "loss": 0.0, "step": 14815 }, { "epoch": 0.9548237417026487, "grad_norm": 0.005733009960411951, "learning_rate": 5.098460436806302e-07, "loss": 0.0, "step": 14816 }, { "epoch": 0.9548881871495779, "grad_norm": 0.030370706949806717, "learning_rate": 5.091299677765844e-07, "loss": 0.0001, "step": 14817 }, { "epoch": 0.9549526325965071, "grad_norm": 0.0005157755746085372, "learning_rate": 5.084138918725386e-07, "loss": 0.0, "step": 14818 }, { "epoch": 0.9550170780434363, "grad_norm": 2.917667148725611e-05, "learning_rate": 5.076978159684926e-07, "loss": 0.0, "step": 14819 }, { "epoch": 0.9550815234903655, "grad_norm": 0.09262644516760862, "learning_rate": 5.069817400644468e-07, "loss": 0.0008, "step": 14820 }, { "epoch": 0.9551459689372945, "grad_norm": 0.003610028407824278, "learning_rate": 5.06265664160401e-07, "loss": 0.0, "step": 14821 }, { "epoch": 0.9552104143842237, "grad_norm": 0.0028029187425091767, "learning_rate": 5.055495882563552e-07, "loss": 0.0, "step": 14822 }, { "epoch": 0.9552748598311529, "grad_norm": 0.00040773365753766214, "learning_rate": 5.048335123523094e-07, "loss": 0.0, "step": 14823 }, { "epoch": 0.9553393052780821, "grad_norm": 0.00031257184924321485, "learning_rate": 5.041174364482636e-07, "loss": 0.0, "step": 14824 }, { "epoch": 0.9554037507250113, "grad_norm": 0.00040114875345770496, "learning_rate": 5.034013605442177e-07, "loss": 0.0, "step": 14825 }, { "epoch": 0.9554681961719405, "grad_norm": 0.008520543711840797, "learning_rate": 5.026852846401719e-07, "loss": 0.0, "step": 14826 }, { "epoch": 0.9555326416188696, "grad_norm": 0.00017712328294400463, "learning_rate": 5.019692087361261e-07, "loss": 0.0, "step": 14827 }, { "epoch": 0.9555970870657988, "grad_norm": 0.00448170861329047, "learning_rate": 5.012531328320802e-07, "loss": 0.0, "step": 14828 }, { "epoch": 0.955661532512728, "grad_norm": 0.002184216611653189, "learning_rate": 5.005370569280344e-07, "loss": 0.0, "step": 14829 }, { "epoch": 0.9557259779596572, "grad_norm": 0.2183928895592596, "learning_rate": 4.998209810239886e-07, "loss": 0.001, "step": 14830 }, { "epoch": 0.9557904234065864, "grad_norm": 0.012881565613289223, "learning_rate": 4.991049051199427e-07, "loss": 0.0001, "step": 14831 }, { "epoch": 0.9558548688535154, "grad_norm": 0.09858547151000312, "learning_rate": 4.983888292158969e-07, "loss": 0.0007, "step": 14832 }, { "epoch": 0.9559193143004446, "grad_norm": 0.001604564122585394, "learning_rate": 4.976727533118511e-07, "loss": 0.0, "step": 14833 }, { "epoch": 0.9559837597473738, "grad_norm": 0.002502730599271993, "learning_rate": 4.969566774078053e-07, "loss": 0.0, "step": 14834 }, { "epoch": 0.956048205194303, "grad_norm": 0.039794989158713906, "learning_rate": 4.962406015037594e-07, "loss": 0.0, "step": 14835 }, { "epoch": 0.9561126506412322, "grad_norm": 0.05801081561332805, "learning_rate": 4.955245255997136e-07, "loss": 0.0004, "step": 14836 }, { "epoch": 0.9561770960881614, "grad_norm": 0.5109591488232742, "learning_rate": 4.948084496956678e-07, "loss": 0.0038, "step": 14837 }, { "epoch": 0.9562415415350906, "grad_norm": 0.0008859893281252492, "learning_rate": 4.940923737916219e-07, "loss": 0.0, "step": 14838 }, { "epoch": 0.9563059869820197, "grad_norm": 0.0004626148219854151, "learning_rate": 4.933762978875761e-07, "loss": 0.0, "step": 14839 }, { "epoch": 0.9563704324289489, "grad_norm": 0.13976812235699776, "learning_rate": 4.926602219835303e-07, "loss": 0.0006, "step": 14840 }, { "epoch": 0.9564348778758781, "grad_norm": 0.0003939018396175427, "learning_rate": 4.919441460794845e-07, "loss": 0.0, "step": 14841 }, { "epoch": 0.9564993233228073, "grad_norm": 0.004048120224935769, "learning_rate": 4.912280701754387e-07, "loss": 0.0, "step": 14842 }, { "epoch": 0.9565637687697364, "grad_norm": 0.2624320373657076, "learning_rate": 4.905119942713928e-07, "loss": 0.0012, "step": 14843 }, { "epoch": 0.9566282142166656, "grad_norm": 0.014094358110339713, "learning_rate": 4.897959183673469e-07, "loss": 0.0, "step": 14844 }, { "epoch": 0.9566926596635947, "grad_norm": 0.0006931803598242944, "learning_rate": 4.890798424633011e-07, "loss": 0.0, "step": 14845 }, { "epoch": 0.9567571051105239, "grad_norm": 0.026155114545255122, "learning_rate": 4.883637665592553e-07, "loss": 0.0001, "step": 14846 }, { "epoch": 0.9568215505574531, "grad_norm": 0.0004005979354409368, "learning_rate": 4.876476906552095e-07, "loss": 0.0, "step": 14847 }, { "epoch": 0.9568859960043823, "grad_norm": 0.05163703402803826, "learning_rate": 4.869316147511637e-07, "loss": 0.0002, "step": 14848 }, { "epoch": 0.9569504414513115, "grad_norm": 0.00026671949687704445, "learning_rate": 4.862155388471179e-07, "loss": 0.0, "step": 14849 }, { "epoch": 0.9570148868982407, "grad_norm": 0.17085980114842259, "learning_rate": 4.85499462943072e-07, "loss": 0.0001, "step": 14850 }, { "epoch": 0.9570793323451698, "grad_norm": 0.023131071071852638, "learning_rate": 4.847833870390262e-07, "loss": 0.0, "step": 14851 }, { "epoch": 0.957143777792099, "grad_norm": 0.0025030747163234074, "learning_rate": 4.840673111349804e-07, "loss": 0.0, "step": 14852 }, { "epoch": 0.9572082232390282, "grad_norm": 0.0014296599078724075, "learning_rate": 4.833512352309346e-07, "loss": 0.0, "step": 14853 }, { "epoch": 0.9572726686859573, "grad_norm": 0.00019132692676034923, "learning_rate": 4.826351593268887e-07, "loss": 0.0, "step": 14854 }, { "epoch": 0.9573371141328865, "grad_norm": 0.000852706506334407, "learning_rate": 4.819190834228428e-07, "loss": 0.0, "step": 14855 }, { "epoch": 0.9574015595798157, "grad_norm": 0.002841819849834939, "learning_rate": 4.81203007518797e-07, "loss": 0.0, "step": 14856 }, { "epoch": 0.9574660050267448, "grad_norm": 0.2582747621177174, "learning_rate": 4.804869316147512e-07, "loss": 0.0062, "step": 14857 }, { "epoch": 0.957530450473674, "grad_norm": 0.0001664016880520021, "learning_rate": 4.797708557107054e-07, "loss": 0.0, "step": 14858 }, { "epoch": 0.9575948959206032, "grad_norm": 0.03976250129414592, "learning_rate": 4.790547798066596e-07, "loss": 0.0, "step": 14859 }, { "epoch": 0.9576593413675324, "grad_norm": 0.007408347848849625, "learning_rate": 4.783387039026137e-07, "loss": 0.0, "step": 14860 }, { "epoch": 0.9577237868144616, "grad_norm": 0.0006574807862907933, "learning_rate": 4.776226279985679e-07, "loss": 0.0, "step": 14861 }, { "epoch": 0.9577882322613908, "grad_norm": 0.0028955546851931594, "learning_rate": 4.769065520945221e-07, "loss": 0.0, "step": 14862 }, { "epoch": 0.9578526777083199, "grad_norm": 0.007108678630948101, "learning_rate": 4.7619047619047623e-07, "loss": 0.0001, "step": 14863 }, { "epoch": 0.9579171231552491, "grad_norm": 0.27302020792699677, "learning_rate": 4.754744002864304e-07, "loss": 0.002, "step": 14864 }, { "epoch": 0.9579815686021782, "grad_norm": 0.00322635562029426, "learning_rate": 4.747583243823846e-07, "loss": 0.0, "step": 14865 }, { "epoch": 0.9580460140491074, "grad_norm": 0.001582561611280091, "learning_rate": 4.740422484783387e-07, "loss": 0.0, "step": 14866 }, { "epoch": 0.9581104594960366, "grad_norm": 0.03473567228204857, "learning_rate": 4.733261725742929e-07, "loss": 0.0001, "step": 14867 }, { "epoch": 0.9581749049429658, "grad_norm": 0.004032275081067416, "learning_rate": 4.726100966702471e-07, "loss": 0.0, "step": 14868 }, { "epoch": 0.9582393503898949, "grad_norm": 0.005672604264508217, "learning_rate": 4.718940207662012e-07, "loss": 0.0, "step": 14869 }, { "epoch": 0.9583037958368241, "grad_norm": 0.0024886796876244506, "learning_rate": 4.711779448621554e-07, "loss": 0.0, "step": 14870 }, { "epoch": 0.9583682412837533, "grad_norm": 0.38062694010373216, "learning_rate": 4.704618689581096e-07, "loss": 0.0015, "step": 14871 }, { "epoch": 0.9584326867306825, "grad_norm": 0.0013407293218750376, "learning_rate": 4.697457930540638e-07, "loss": 0.0, "step": 14872 }, { "epoch": 0.9584971321776117, "grad_norm": 0.12025046514185207, "learning_rate": 4.6902971715001794e-07, "loss": 0.0003, "step": 14873 }, { "epoch": 0.9585615776245409, "grad_norm": 0.013953009693871022, "learning_rate": 4.6831364124597214e-07, "loss": 0.0, "step": 14874 }, { "epoch": 0.95862602307147, "grad_norm": 0.000177093544672877, "learning_rate": 4.6759756534192633e-07, "loss": 0.0, "step": 14875 }, { "epoch": 0.9586904685183992, "grad_norm": 0.011330444616391867, "learning_rate": 4.668814894378804e-07, "loss": 0.0, "step": 14876 }, { "epoch": 0.9587549139653283, "grad_norm": 0.0004049873983944743, "learning_rate": 4.661654135338346e-07, "loss": 0.0, "step": 14877 }, { "epoch": 0.9588193594122575, "grad_norm": 0.0850805202870303, "learning_rate": 4.654493376297888e-07, "loss": 0.0016, "step": 14878 }, { "epoch": 0.9588838048591867, "grad_norm": 0.0004410744841034204, "learning_rate": 4.6473326172574294e-07, "loss": 0.0, "step": 14879 }, { "epoch": 0.9589482503061159, "grad_norm": 0.011639213360409064, "learning_rate": 4.6401718582169713e-07, "loss": 0.0, "step": 14880 }, { "epoch": 0.959012695753045, "grad_norm": 0.008735612177839278, "learning_rate": 4.6330110991765133e-07, "loss": 0.0, "step": 14881 }, { "epoch": 0.9590771411999742, "grad_norm": 0.020911397296875626, "learning_rate": 4.6258503401360547e-07, "loss": 0.0, "step": 14882 }, { "epoch": 0.9591415866469034, "grad_norm": 0.001564329687710305, "learning_rate": 4.6186895810955966e-07, "loss": 0.0, "step": 14883 }, { "epoch": 0.9592060320938326, "grad_norm": 0.0016710818587629625, "learning_rate": 4.6115288220551385e-07, "loss": 0.0, "step": 14884 }, { "epoch": 0.9592704775407618, "grad_norm": 0.0039500020524210425, "learning_rate": 4.6043680630146794e-07, "loss": 0.0, "step": 14885 }, { "epoch": 0.959334922987691, "grad_norm": 0.00036652025819384343, "learning_rate": 4.5972073039742213e-07, "loss": 0.0, "step": 14886 }, { "epoch": 0.9593993684346201, "grad_norm": 0.06222436145498517, "learning_rate": 4.590046544933763e-07, "loss": 0.0001, "step": 14887 }, { "epoch": 0.9594638138815492, "grad_norm": 0.0022996133312288224, "learning_rate": 4.582885785893305e-07, "loss": 0.0, "step": 14888 }, { "epoch": 0.9595282593284784, "grad_norm": 0.018717318945592407, "learning_rate": 4.5757250268528466e-07, "loss": 0.0001, "step": 14889 }, { "epoch": 0.9595927047754076, "grad_norm": 0.0005461212142547301, "learning_rate": 4.5685642678123885e-07, "loss": 0.0, "step": 14890 }, { "epoch": 0.9596571502223368, "grad_norm": 0.0004114048163026691, "learning_rate": 4.5614035087719304e-07, "loss": 0.0, "step": 14891 }, { "epoch": 0.959721595669266, "grad_norm": 0.0038923848919269173, "learning_rate": 4.554242749731472e-07, "loss": 0.0, "step": 14892 }, { "epoch": 0.9597860411161951, "grad_norm": 0.005338129096024907, "learning_rate": 4.547081990691014e-07, "loss": 0.0, "step": 14893 }, { "epoch": 0.9598504865631243, "grad_norm": 0.04265827342489956, "learning_rate": 4.5399212316505557e-07, "loss": 0.0, "step": 14894 }, { "epoch": 0.9599149320100535, "grad_norm": 0.00013671547659063613, "learning_rate": 4.532760472610097e-07, "loss": 0.0, "step": 14895 }, { "epoch": 0.9599793774569827, "grad_norm": 0.00960232425371068, "learning_rate": 4.525599713569639e-07, "loss": 0.0, "step": 14896 }, { "epoch": 0.9600438229039119, "grad_norm": 0.00965865226960607, "learning_rate": 4.518438954529181e-07, "loss": 0.0, "step": 14897 }, { "epoch": 0.9601082683508411, "grad_norm": 9.192379356709644e-05, "learning_rate": 4.511278195488722e-07, "loss": 0.0, "step": 14898 }, { "epoch": 0.9601727137977701, "grad_norm": 0.00015305449739958055, "learning_rate": 4.504117436448264e-07, "loss": 0.0, "step": 14899 }, { "epoch": 0.9602371592446993, "grad_norm": 0.14304743566147973, "learning_rate": 4.4969566774078057e-07, "loss": 0.0003, "step": 14900 }, { "epoch": 0.9603016046916285, "grad_norm": 0.009295935886398593, "learning_rate": 4.489795918367347e-07, "loss": 0.0, "step": 14901 }, { "epoch": 0.9603660501385577, "grad_norm": 0.0002162139983364177, "learning_rate": 4.482635159326889e-07, "loss": 0.0, "step": 14902 }, { "epoch": 0.9604304955854869, "grad_norm": 0.03242333980491538, "learning_rate": 4.475474400286431e-07, "loss": 0.0001, "step": 14903 }, { "epoch": 0.9604949410324161, "grad_norm": 0.002406562227087017, "learning_rate": 4.4683136412459723e-07, "loss": 0.0, "step": 14904 }, { "epoch": 0.9605593864793452, "grad_norm": 0.0056715677897925445, "learning_rate": 4.461152882205514e-07, "loss": 0.0, "step": 14905 }, { "epoch": 0.9606238319262744, "grad_norm": 0.00045505831911909236, "learning_rate": 4.453992123165056e-07, "loss": 0.0, "step": 14906 }, { "epoch": 0.9606882773732036, "grad_norm": 0.0007268449933700111, "learning_rate": 4.446831364124598e-07, "loss": 0.0, "step": 14907 }, { "epoch": 0.9607527228201328, "grad_norm": 0.00022912322387547547, "learning_rate": 4.439670605084139e-07, "loss": 0.0, "step": 14908 }, { "epoch": 0.960817168267062, "grad_norm": 0.0022316612740019702, "learning_rate": 4.432509846043681e-07, "loss": 0.0, "step": 14909 }, { "epoch": 0.9608816137139911, "grad_norm": 0.004242630686752434, "learning_rate": 4.425349087003223e-07, "loss": 0.0, "step": 14910 }, { "epoch": 0.9609460591609202, "grad_norm": 0.04213162492549318, "learning_rate": 4.418188327962764e-07, "loss": 0.0001, "step": 14911 }, { "epoch": 0.9610105046078494, "grad_norm": 0.0008370413518882776, "learning_rate": 4.411027568922306e-07, "loss": 0.0, "step": 14912 }, { "epoch": 0.9610749500547786, "grad_norm": 0.0009282456515276538, "learning_rate": 4.403866809881848e-07, "loss": 0.0, "step": 14913 }, { "epoch": 0.9611393955017078, "grad_norm": 0.0004216024907836894, "learning_rate": 4.3967060508413895e-07, "loss": 0.0, "step": 14914 }, { "epoch": 0.961203840948637, "grad_norm": 0.004501349239021442, "learning_rate": 4.3895452918009314e-07, "loss": 0.0, "step": 14915 }, { "epoch": 0.9612682863955662, "grad_norm": 0.007865435385491804, "learning_rate": 4.3823845327604733e-07, "loss": 0.0001, "step": 14916 }, { "epoch": 0.9613327318424953, "grad_norm": 0.0001713778914629011, "learning_rate": 4.375223773720014e-07, "loss": 0.0, "step": 14917 }, { "epoch": 0.9613971772894245, "grad_norm": 0.1708792157943735, "learning_rate": 4.368063014679556e-07, "loss": 0.0007, "step": 14918 }, { "epoch": 0.9614616227363537, "grad_norm": 0.000479988725056364, "learning_rate": 4.360902255639098e-07, "loss": 0.0, "step": 14919 }, { "epoch": 0.9615260681832829, "grad_norm": 0.006377414893212848, "learning_rate": 4.3537414965986395e-07, "loss": 0.0, "step": 14920 }, { "epoch": 0.961590513630212, "grad_norm": 0.0036946523398398512, "learning_rate": 4.3465807375581814e-07, "loss": 0.0, "step": 14921 }, { "epoch": 0.9616549590771412, "grad_norm": 0.014419668187101007, "learning_rate": 4.3394199785177233e-07, "loss": 0.0, "step": 14922 }, { "epoch": 0.9617194045240703, "grad_norm": 0.011962387624208598, "learning_rate": 4.3322592194772647e-07, "loss": 0.0001, "step": 14923 }, { "epoch": 0.9617838499709995, "grad_norm": 0.012241861601313967, "learning_rate": 4.3250984604368067e-07, "loss": 0.0, "step": 14924 }, { "epoch": 0.9618482954179287, "grad_norm": 0.0005817396474641864, "learning_rate": 4.3179377013963486e-07, "loss": 0.0, "step": 14925 }, { "epoch": 0.9619127408648579, "grad_norm": 0.0004082905114981401, "learning_rate": 4.3107769423558905e-07, "loss": 0.0, "step": 14926 }, { "epoch": 0.9619771863117871, "grad_norm": 0.0023784330583258473, "learning_rate": 4.3036161833154314e-07, "loss": 0.0, "step": 14927 }, { "epoch": 0.9620416317587163, "grad_norm": 0.002489868312757046, "learning_rate": 4.2964554242749733e-07, "loss": 0.0, "step": 14928 }, { "epoch": 0.9621060772056454, "grad_norm": 0.003762379342939597, "learning_rate": 4.289294665234515e-07, "loss": 0.0, "step": 14929 }, { "epoch": 0.9621705226525746, "grad_norm": 0.0017895349142890476, "learning_rate": 4.2821339061940566e-07, "loss": 0.0, "step": 14930 }, { "epoch": 0.9622349680995038, "grad_norm": 0.0004923169026729873, "learning_rate": 4.2749731471535986e-07, "loss": 0.0, "step": 14931 }, { "epoch": 0.9622994135464329, "grad_norm": 0.00017590920193244695, "learning_rate": 4.2678123881131405e-07, "loss": 0.0, "step": 14932 }, { "epoch": 0.9623638589933621, "grad_norm": 0.00048500241951370985, "learning_rate": 4.260651629072682e-07, "loss": 0.0, "step": 14933 }, { "epoch": 0.9624283044402913, "grad_norm": 0.0010236614349840823, "learning_rate": 4.253490870032224e-07, "loss": 0.0, "step": 14934 }, { "epoch": 0.9624927498872204, "grad_norm": 0.0016053604449928272, "learning_rate": 4.246330110991766e-07, "loss": 0.0, "step": 14935 }, { "epoch": 0.9625571953341496, "grad_norm": 0.0012853831294313406, "learning_rate": 4.239169351951307e-07, "loss": 0.0, "step": 14936 }, { "epoch": 0.9626216407810788, "grad_norm": 0.0011035914182909066, "learning_rate": 4.232008592910849e-07, "loss": 0.0, "step": 14937 }, { "epoch": 0.962686086228008, "grad_norm": 0.07527333398147694, "learning_rate": 4.224847833870391e-07, "loss": 0.0008, "step": 14938 }, { "epoch": 0.9627505316749372, "grad_norm": 0.03563464857298093, "learning_rate": 4.217687074829932e-07, "loss": 0.0001, "step": 14939 }, { "epoch": 0.9628149771218664, "grad_norm": 0.02769180787150801, "learning_rate": 4.210526315789474e-07, "loss": 0.0, "step": 14940 }, { "epoch": 0.9628794225687956, "grad_norm": 0.0017228653770633396, "learning_rate": 4.2033655567490157e-07, "loss": 0.0, "step": 14941 }, { "epoch": 0.9629438680157247, "grad_norm": 0.013080418369412865, "learning_rate": 4.1962047977085577e-07, "loss": 0.0001, "step": 14942 }, { "epoch": 0.9630083134626538, "grad_norm": 0.33033698275444734, "learning_rate": 4.189044038668099e-07, "loss": 0.0008, "step": 14943 }, { "epoch": 0.963072758909583, "grad_norm": 0.004267734442137806, "learning_rate": 4.181883279627641e-07, "loss": 0.0, "step": 14944 }, { "epoch": 0.9631372043565122, "grad_norm": 0.01560012273070935, "learning_rate": 4.174722520587183e-07, "loss": 0.0, "step": 14945 }, { "epoch": 0.9632016498034414, "grad_norm": 0.0043316884787335036, "learning_rate": 4.1675617615467243e-07, "loss": 0.0, "step": 14946 }, { "epoch": 0.9632660952503705, "grad_norm": 0.00013166375289761843, "learning_rate": 4.160401002506266e-07, "loss": 0.0, "step": 14947 }, { "epoch": 0.9633305406972997, "grad_norm": 0.0010617943515079224, "learning_rate": 4.153240243465808e-07, "loss": 0.0, "step": 14948 }, { "epoch": 0.9633949861442289, "grad_norm": 0.06773688748923617, "learning_rate": 4.146079484425349e-07, "loss": 0.0001, "step": 14949 }, { "epoch": 0.9634594315911581, "grad_norm": 0.03567411390503899, "learning_rate": 4.138918725384891e-07, "loss": 0.0004, "step": 14950 }, { "epoch": 0.9635238770380873, "grad_norm": 6.775566679261235e-05, "learning_rate": 4.131757966344433e-07, "loss": 0.0, "step": 14951 }, { "epoch": 0.9635883224850165, "grad_norm": 0.04405645647140258, "learning_rate": 4.1245972073039743e-07, "loss": 0.0001, "step": 14952 }, { "epoch": 0.9636527679319457, "grad_norm": 0.00023435635260378457, "learning_rate": 4.117436448263516e-07, "loss": 0.0, "step": 14953 }, { "epoch": 0.9637172133788748, "grad_norm": 0.1547491531137055, "learning_rate": 4.110275689223058e-07, "loss": 0.0002, "step": 14954 }, { "epoch": 0.9637816588258039, "grad_norm": 0.561563241625908, "learning_rate": 4.1031149301825995e-07, "loss": 0.0035, "step": 14955 }, { "epoch": 0.9638461042727331, "grad_norm": 0.005156861571168343, "learning_rate": 4.0959541711421415e-07, "loss": 0.0001, "step": 14956 }, { "epoch": 0.9639105497196623, "grad_norm": 0.19693480029990224, "learning_rate": 4.0887934121016834e-07, "loss": 0.0006, "step": 14957 }, { "epoch": 0.9639749951665915, "grad_norm": 0.0005532097400886752, "learning_rate": 4.0816326530612243e-07, "loss": 0.0, "step": 14958 }, { "epoch": 0.9640394406135206, "grad_norm": 0.00238901745451577, "learning_rate": 4.074471894020766e-07, "loss": 0.0, "step": 14959 }, { "epoch": 0.9641038860604498, "grad_norm": 0.0026651981924149973, "learning_rate": 4.067311134980308e-07, "loss": 0.0, "step": 14960 }, { "epoch": 0.964168331507379, "grad_norm": 0.003920529480750357, "learning_rate": 4.06015037593985e-07, "loss": 0.0, "step": 14961 }, { "epoch": 0.9642327769543082, "grad_norm": 0.003843617604241777, "learning_rate": 4.0529896168993915e-07, "loss": 0.0, "step": 14962 }, { "epoch": 0.9642972224012374, "grad_norm": 0.0025857098032880323, "learning_rate": 4.0458288578589334e-07, "loss": 0.0, "step": 14963 }, { "epoch": 0.9643616678481666, "grad_norm": 0.018335899444566185, "learning_rate": 4.0386680988184753e-07, "loss": 0.0, "step": 14964 }, { "epoch": 0.9644261132950958, "grad_norm": 0.0009981560033705578, "learning_rate": 4.0315073397780167e-07, "loss": 0.0, "step": 14965 }, { "epoch": 0.9644905587420248, "grad_norm": 0.0009387569199021039, "learning_rate": 4.0243465807375586e-07, "loss": 0.0, "step": 14966 }, { "epoch": 0.964555004188954, "grad_norm": 0.019251289181688305, "learning_rate": 4.0171858216971006e-07, "loss": 0.0001, "step": 14967 }, { "epoch": 0.9646194496358832, "grad_norm": 0.00010738621659829706, "learning_rate": 4.010025062656642e-07, "loss": 0.0, "step": 14968 }, { "epoch": 0.9646838950828124, "grad_norm": 0.03203928420734971, "learning_rate": 4.002864303616184e-07, "loss": 0.0001, "step": 14969 }, { "epoch": 0.9647483405297416, "grad_norm": 0.0003362348993916054, "learning_rate": 3.995703544575726e-07, "loss": 0.0, "step": 14970 }, { "epoch": 0.9648127859766708, "grad_norm": 0.000265084003407136, "learning_rate": 3.9885427855352667e-07, "loss": 0.0, "step": 14971 }, { "epoch": 0.9648772314235999, "grad_norm": 0.004495177856111423, "learning_rate": 3.9813820264948086e-07, "loss": 0.0, "step": 14972 }, { "epoch": 0.9649416768705291, "grad_norm": 0.04050054439674393, "learning_rate": 3.9742212674543505e-07, "loss": 0.0001, "step": 14973 }, { "epoch": 0.9650061223174583, "grad_norm": 6.170983398313063e-05, "learning_rate": 3.967060508413892e-07, "loss": 0.0, "step": 14974 }, { "epoch": 0.9650705677643875, "grad_norm": 0.00017663978023309493, "learning_rate": 3.959899749373434e-07, "loss": 0.0, "step": 14975 }, { "epoch": 0.9651350132113167, "grad_norm": 5.699082884079711e-05, "learning_rate": 3.952738990332976e-07, "loss": 0.0, "step": 14976 }, { "epoch": 0.9651994586582457, "grad_norm": 0.000602340558573276, "learning_rate": 3.945578231292517e-07, "loss": 0.0, "step": 14977 }, { "epoch": 0.9652639041051749, "grad_norm": 0.0006447693375527919, "learning_rate": 3.938417472252059e-07, "loss": 0.0, "step": 14978 }, { "epoch": 0.9653283495521041, "grad_norm": 0.005086575180647127, "learning_rate": 3.931256713211601e-07, "loss": 0.0, "step": 14979 }, { "epoch": 0.9653927949990333, "grad_norm": 0.18404470009542506, "learning_rate": 3.924095954171143e-07, "loss": 0.0021, "step": 14980 }, { "epoch": 0.9654572404459625, "grad_norm": 0.0012513134685006979, "learning_rate": 3.916935195130684e-07, "loss": 0.0, "step": 14981 }, { "epoch": 0.9655216858928917, "grad_norm": 0.0026647520580636358, "learning_rate": 3.909774436090226e-07, "loss": 0.0, "step": 14982 }, { "epoch": 0.9655861313398209, "grad_norm": 0.00042825462339338795, "learning_rate": 3.9026136770497677e-07, "loss": 0.0, "step": 14983 }, { "epoch": 0.96565057678675, "grad_norm": 0.07452899937460482, "learning_rate": 3.895452918009309e-07, "loss": 0.0001, "step": 14984 }, { "epoch": 0.9657150222336792, "grad_norm": 0.0003942303834534447, "learning_rate": 3.888292158968851e-07, "loss": 0.0, "step": 14985 }, { "epoch": 0.9657794676806084, "grad_norm": 0.000612557581035135, "learning_rate": 3.881131399928393e-07, "loss": 0.0, "step": 14986 }, { "epoch": 0.9658439131275376, "grad_norm": 0.015846453551998426, "learning_rate": 3.8739706408879344e-07, "loss": 0.0, "step": 14987 }, { "epoch": 0.9659083585744667, "grad_norm": 0.10003049039179873, "learning_rate": 3.8668098818474763e-07, "loss": 0.0012, "step": 14988 }, { "epoch": 0.9659728040213958, "grad_norm": 0.00026728753770736686, "learning_rate": 3.859649122807018e-07, "loss": 0.0, "step": 14989 }, { "epoch": 0.966037249468325, "grad_norm": 0.0002459140433178353, "learning_rate": 3.852488363766559e-07, "loss": 0.0, "step": 14990 }, { "epoch": 0.9661016949152542, "grad_norm": 0.0005397851917570067, "learning_rate": 3.845327604726101e-07, "loss": 0.0, "step": 14991 }, { "epoch": 0.9661661403621834, "grad_norm": 0.00021344605552674962, "learning_rate": 3.838166845685643e-07, "loss": 0.0, "step": 14992 }, { "epoch": 0.9662305858091126, "grad_norm": 0.0004351620778108871, "learning_rate": 3.8310060866451843e-07, "loss": 0.0, "step": 14993 }, { "epoch": 0.9662950312560418, "grad_norm": 0.000685698942079955, "learning_rate": 3.8238453276047263e-07, "loss": 0.0, "step": 14994 }, { "epoch": 0.966359476702971, "grad_norm": 0.04826283823719522, "learning_rate": 3.816684568564268e-07, "loss": 0.0003, "step": 14995 }, { "epoch": 0.9664239221499001, "grad_norm": 0.0009347700493750752, "learning_rate": 3.80952380952381e-07, "loss": 0.0, "step": 14996 }, { "epoch": 0.9664883675968293, "grad_norm": 6.683883255573541e-05, "learning_rate": 3.8023630504833515e-07, "loss": 0.0, "step": 14997 }, { "epoch": 0.9665528130437585, "grad_norm": 0.014872162732202817, "learning_rate": 3.7952022914428935e-07, "loss": 0.0, "step": 14998 }, { "epoch": 0.9666172584906876, "grad_norm": 0.002071540981744842, "learning_rate": 3.7880415324024354e-07, "loss": 0.0, "step": 14999 }, { "epoch": 0.9666817039376168, "grad_norm": 0.0568428519555771, "learning_rate": 3.780880773361976e-07, "loss": 0.0004, "step": 15000 }, { "epoch": 0.966746149384546, "grad_norm": 0.008794983717003983, "learning_rate": 3.773720014321518e-07, "loss": 0.0, "step": 15001 }, { "epoch": 0.9668105948314751, "grad_norm": 0.00263075135347942, "learning_rate": 3.76655925528106e-07, "loss": 0.0, "step": 15002 }, { "epoch": 0.9668750402784043, "grad_norm": 0.00011607515633291526, "learning_rate": 3.7593984962406015e-07, "loss": 0.0, "step": 15003 }, { "epoch": 0.9669394857253335, "grad_norm": 0.00035570977344336345, "learning_rate": 3.7522377372001434e-07, "loss": 0.0, "step": 15004 }, { "epoch": 0.9670039311722627, "grad_norm": 0.0003867004197191581, "learning_rate": 3.7450769781596854e-07, "loss": 0.0, "step": 15005 }, { "epoch": 0.9670683766191919, "grad_norm": 0.00025640017192041177, "learning_rate": 3.737916219119227e-07, "loss": 0.0, "step": 15006 }, { "epoch": 0.9671328220661211, "grad_norm": 0.001456672819982795, "learning_rate": 3.7307554600787687e-07, "loss": 0.0, "step": 15007 }, { "epoch": 0.9671972675130502, "grad_norm": 0.0014484291111371464, "learning_rate": 3.7235947010383106e-07, "loss": 0.0, "step": 15008 }, { "epoch": 0.9672617129599794, "grad_norm": 0.0015187991078433156, "learning_rate": 3.716433941997852e-07, "loss": 0.0, "step": 15009 }, { "epoch": 0.9673261584069085, "grad_norm": 0.0004086356212154145, "learning_rate": 3.709273182957394e-07, "loss": 0.0, "step": 15010 }, { "epoch": 0.9673906038538377, "grad_norm": 0.0012860003776702935, "learning_rate": 3.702112423916936e-07, "loss": 0.0, "step": 15011 }, { "epoch": 0.9674550493007669, "grad_norm": 0.0003662492167377044, "learning_rate": 3.694951664876477e-07, "loss": 0.0, "step": 15012 }, { "epoch": 0.967519494747696, "grad_norm": 0.0007289835883782952, "learning_rate": 3.6877909058360187e-07, "loss": 0.0, "step": 15013 }, { "epoch": 0.9675839401946252, "grad_norm": 0.312234252945699, "learning_rate": 3.6806301467955606e-07, "loss": 0.0006, "step": 15014 }, { "epoch": 0.9676483856415544, "grad_norm": 0.00048179295287462684, "learning_rate": 3.6734693877551025e-07, "loss": 0.0, "step": 15015 }, { "epoch": 0.9677128310884836, "grad_norm": 0.001208616655439765, "learning_rate": 3.666308628714644e-07, "loss": 0.0, "step": 15016 }, { "epoch": 0.9677772765354128, "grad_norm": 0.3449243621045279, "learning_rate": 3.659147869674186e-07, "loss": 0.0002, "step": 15017 }, { "epoch": 0.967841721982342, "grad_norm": 0.015200695074966152, "learning_rate": 3.651987110633728e-07, "loss": 0.0, "step": 15018 }, { "epoch": 0.9679061674292712, "grad_norm": 0.0014013163488261668, "learning_rate": 3.644826351593269e-07, "loss": 0.0, "step": 15019 }, { "epoch": 0.9679706128762003, "grad_norm": 0.0005596241722062707, "learning_rate": 3.637665592552811e-07, "loss": 0.0, "step": 15020 }, { "epoch": 0.9680350583231294, "grad_norm": 0.0005016758906052871, "learning_rate": 3.630504833512353e-07, "loss": 0.0, "step": 15021 }, { "epoch": 0.9680995037700586, "grad_norm": 0.0002737052037732511, "learning_rate": 3.623344074471894e-07, "loss": 0.0, "step": 15022 }, { "epoch": 0.9681639492169878, "grad_norm": 0.004652262607627751, "learning_rate": 3.616183315431436e-07, "loss": 0.0, "step": 15023 }, { "epoch": 0.968228394663917, "grad_norm": 0.0029658107489418965, "learning_rate": 3.609022556390978e-07, "loss": 0.0, "step": 15024 }, { "epoch": 0.9682928401108462, "grad_norm": 0.0018419730604352967, "learning_rate": 3.601861797350519e-07, "loss": 0.0, "step": 15025 }, { "epoch": 0.9683572855577753, "grad_norm": 0.0028526369216188116, "learning_rate": 3.594701038310061e-07, "loss": 0.0, "step": 15026 }, { "epoch": 0.9684217310047045, "grad_norm": 0.0008088329827487265, "learning_rate": 3.587540279269603e-07, "loss": 0.0, "step": 15027 }, { "epoch": 0.9684861764516337, "grad_norm": 0.10358512542605385, "learning_rate": 3.5803795202291444e-07, "loss": 0.0001, "step": 15028 }, { "epoch": 0.9685506218985629, "grad_norm": 0.0004286461885383949, "learning_rate": 3.5732187611886864e-07, "loss": 0.0, "step": 15029 }, { "epoch": 0.9686150673454921, "grad_norm": 0.0017663911047688466, "learning_rate": 3.5660580021482283e-07, "loss": 0.0, "step": 15030 }, { "epoch": 0.9686795127924213, "grad_norm": 0.0016889736383674534, "learning_rate": 3.558897243107769e-07, "loss": 0.0, "step": 15031 }, { "epoch": 0.9687439582393504, "grad_norm": 0.0320417512712144, "learning_rate": 3.551736484067311e-07, "loss": 0.0001, "step": 15032 }, { "epoch": 0.9688084036862795, "grad_norm": 0.014156902284652106, "learning_rate": 3.544575725026853e-07, "loss": 0.0, "step": 15033 }, { "epoch": 0.9688728491332087, "grad_norm": 0.0018575665054532513, "learning_rate": 3.537414965986395e-07, "loss": 0.0, "step": 15034 }, { "epoch": 0.9689372945801379, "grad_norm": 0.3058661502755169, "learning_rate": 3.5302542069459363e-07, "loss": 0.0011, "step": 15035 }, { "epoch": 0.9690017400270671, "grad_norm": 0.05986802368608069, "learning_rate": 3.523093447905478e-07, "loss": 0.0002, "step": 15036 }, { "epoch": 0.9690661854739963, "grad_norm": 0.0004834545781512201, "learning_rate": 3.51593268886502e-07, "loss": 0.0, "step": 15037 }, { "epoch": 0.9691306309209254, "grad_norm": 0.012819738966230294, "learning_rate": 3.5087719298245616e-07, "loss": 0.0, "step": 15038 }, { "epoch": 0.9691950763678546, "grad_norm": 6.932870897949043e-05, "learning_rate": 3.5016111707841035e-07, "loss": 0.0, "step": 15039 }, { "epoch": 0.9692595218147838, "grad_norm": 0.03340801142225263, "learning_rate": 3.4944504117436454e-07, "loss": 0.0001, "step": 15040 }, { "epoch": 0.969323967261713, "grad_norm": 0.0023576116642467257, "learning_rate": 3.487289652703187e-07, "loss": 0.0, "step": 15041 }, { "epoch": 0.9693884127086422, "grad_norm": 0.001895887451075661, "learning_rate": 3.480128893662729e-07, "loss": 0.0, "step": 15042 }, { "epoch": 0.9694528581555714, "grad_norm": 0.0004433743816133635, "learning_rate": 3.4729681346222707e-07, "loss": 0.0, "step": 15043 }, { "epoch": 0.9695173036025004, "grad_norm": 0.19629095086095616, "learning_rate": 3.4658073755818116e-07, "loss": 0.0004, "step": 15044 }, { "epoch": 0.9695817490494296, "grad_norm": 0.00023726558542802232, "learning_rate": 3.4586466165413535e-07, "loss": 0.0, "step": 15045 }, { "epoch": 0.9696461944963588, "grad_norm": 0.018993789490638787, "learning_rate": 3.4514858575008954e-07, "loss": 0.0, "step": 15046 }, { "epoch": 0.969710639943288, "grad_norm": 0.0002520063939503362, "learning_rate": 3.444325098460437e-07, "loss": 0.0, "step": 15047 }, { "epoch": 0.9697750853902172, "grad_norm": 0.005074428359961007, "learning_rate": 3.437164339419979e-07, "loss": 0.0, "step": 15048 }, { "epoch": 0.9698395308371464, "grad_norm": 0.0015976258024932811, "learning_rate": 3.4300035803795207e-07, "loss": 0.0, "step": 15049 }, { "epoch": 0.9699039762840755, "grad_norm": 0.0011570189577347902, "learning_rate": 3.4228428213390626e-07, "loss": 0.0, "step": 15050 }, { "epoch": 0.9699684217310047, "grad_norm": 0.0467625252062478, "learning_rate": 3.415682062298604e-07, "loss": 0.0, "step": 15051 }, { "epoch": 0.9700328671779339, "grad_norm": 0.10716094057074699, "learning_rate": 3.408521303258146e-07, "loss": 0.0001, "step": 15052 }, { "epoch": 0.9700973126248631, "grad_norm": 0.0010008353049710968, "learning_rate": 3.401360544217688e-07, "loss": 0.0, "step": 15053 }, { "epoch": 0.9701617580717923, "grad_norm": 0.05783591134718608, "learning_rate": 3.3941997851772287e-07, "loss": 0.0, "step": 15054 }, { "epoch": 0.9702262035187214, "grad_norm": 0.01778828143835162, "learning_rate": 3.3870390261367707e-07, "loss": 0.0001, "step": 15055 }, { "epoch": 0.9702906489656505, "grad_norm": 0.011812855447108965, "learning_rate": 3.3798782670963126e-07, "loss": 0.0, "step": 15056 }, { "epoch": 0.9703550944125797, "grad_norm": 0.0007705646428616686, "learning_rate": 3.372717508055854e-07, "loss": 0.0, "step": 15057 }, { "epoch": 0.9704195398595089, "grad_norm": 0.0002786041604228193, "learning_rate": 3.365556749015396e-07, "loss": 0.0, "step": 15058 }, { "epoch": 0.9704839853064381, "grad_norm": 0.03147547745520999, "learning_rate": 3.358395989974938e-07, "loss": 0.0003, "step": 15059 }, { "epoch": 0.9705484307533673, "grad_norm": 0.0025172349651695577, "learning_rate": 3.351235230934479e-07, "loss": 0.0, "step": 15060 }, { "epoch": 0.9706128762002965, "grad_norm": 0.0004264914551491804, "learning_rate": 3.344074471894021e-07, "loss": 0.0, "step": 15061 }, { "epoch": 0.9706773216472256, "grad_norm": 0.37015364172979875, "learning_rate": 3.336913712853563e-07, "loss": 0.001, "step": 15062 }, { "epoch": 0.9707417670941548, "grad_norm": 0.0026231783325515613, "learning_rate": 3.329752953813104e-07, "loss": 0.0, "step": 15063 }, { "epoch": 0.970806212541084, "grad_norm": 2.1080350788028017, "learning_rate": 3.322592194772646e-07, "loss": 0.0038, "step": 15064 }, { "epoch": 0.9708706579880132, "grad_norm": 0.001236758915207383, "learning_rate": 3.315431435732188e-07, "loss": 0.0, "step": 15065 }, { "epoch": 0.9709351034349423, "grad_norm": 0.0008858550028966706, "learning_rate": 3.308270676691729e-07, "loss": 0.0, "step": 15066 }, { "epoch": 0.9709995488818715, "grad_norm": 0.012957684924999985, "learning_rate": 3.301109917651271e-07, "loss": 0.0001, "step": 15067 }, { "epoch": 0.9710639943288006, "grad_norm": 0.0035890522764365973, "learning_rate": 3.293949158610813e-07, "loss": 0.0, "step": 15068 }, { "epoch": 0.9711284397757298, "grad_norm": 0.0075972949912963775, "learning_rate": 3.286788399570355e-07, "loss": 0.0001, "step": 15069 }, { "epoch": 0.971192885222659, "grad_norm": 0.00017337491251541505, "learning_rate": 3.2796276405298964e-07, "loss": 0.0, "step": 15070 }, { "epoch": 0.9712573306695882, "grad_norm": 0.00840977936296432, "learning_rate": 3.2724668814894383e-07, "loss": 0.0, "step": 15071 }, { "epoch": 0.9713217761165174, "grad_norm": 0.02215838745153162, "learning_rate": 3.2653061224489803e-07, "loss": 0.0002, "step": 15072 }, { "epoch": 0.9713862215634466, "grad_norm": 8.793780417752341, "learning_rate": 3.258145363408521e-07, "loss": 0.023, "step": 15073 }, { "epoch": 0.9714506670103757, "grad_norm": 0.006577569953616285, "learning_rate": 3.250984604368063e-07, "loss": 0.0, "step": 15074 }, { "epoch": 0.9715151124573049, "grad_norm": 0.11361466455953624, "learning_rate": 3.243823845327605e-07, "loss": 0.0003, "step": 15075 }, { "epoch": 0.9715795579042341, "grad_norm": 0.0046748844567726456, "learning_rate": 3.2366630862871464e-07, "loss": 0.0, "step": 15076 }, { "epoch": 0.9716440033511632, "grad_norm": 0.015705236972287314, "learning_rate": 3.2295023272466883e-07, "loss": 0.0001, "step": 15077 }, { "epoch": 0.9717084487980924, "grad_norm": 0.27528340031077064, "learning_rate": 3.22234156820623e-07, "loss": 0.0022, "step": 15078 }, { "epoch": 0.9717728942450216, "grad_norm": 0.016288971732021627, "learning_rate": 3.2151808091657716e-07, "loss": 0.0001, "step": 15079 }, { "epoch": 0.9718373396919507, "grad_norm": 0.47387322685274225, "learning_rate": 3.2080200501253136e-07, "loss": 0.0044, "step": 15080 }, { "epoch": 0.9719017851388799, "grad_norm": 0.0011477208401962785, "learning_rate": 3.2008592910848555e-07, "loss": 0.0, "step": 15081 }, { "epoch": 0.9719662305858091, "grad_norm": 0.0034684748119078452, "learning_rate": 3.193698532044397e-07, "loss": 0.0, "step": 15082 }, { "epoch": 0.9720306760327383, "grad_norm": 0.004370211072131715, "learning_rate": 3.186537773003939e-07, "loss": 0.0, "step": 15083 }, { "epoch": 0.9720951214796675, "grad_norm": 0.004952676162059169, "learning_rate": 3.179377013963481e-07, "loss": 0.0, "step": 15084 }, { "epoch": 0.9721595669265967, "grad_norm": 0.0369411296736311, "learning_rate": 3.1722162549230216e-07, "loss": 0.0001, "step": 15085 }, { "epoch": 0.9722240123735258, "grad_norm": 0.0003230311543377619, "learning_rate": 3.1650554958825636e-07, "loss": 0.0, "step": 15086 }, { "epoch": 0.972288457820455, "grad_norm": 0.0008222280461801855, "learning_rate": 3.1578947368421055e-07, "loss": 0.0, "step": 15087 }, { "epoch": 0.9723529032673841, "grad_norm": 0.00021895085382019427, "learning_rate": 3.1507339778016474e-07, "loss": 0.0, "step": 15088 }, { "epoch": 0.9724173487143133, "grad_norm": 0.0011354382390777846, "learning_rate": 3.143573218761189e-07, "loss": 0.0, "step": 15089 }, { "epoch": 0.9724817941612425, "grad_norm": 0.0003584275642686638, "learning_rate": 3.136412459720731e-07, "loss": 0.0, "step": 15090 }, { "epoch": 0.9725462396081717, "grad_norm": 0.00023074791790306088, "learning_rate": 3.1292517006802727e-07, "loss": 0.0, "step": 15091 }, { "epoch": 0.9726106850551008, "grad_norm": 0.02096019492573941, "learning_rate": 3.122090941639814e-07, "loss": 0.0, "step": 15092 }, { "epoch": 0.97267513050203, "grad_norm": 0.0014797492472057203, "learning_rate": 3.114930182599356e-07, "loss": 0.0, "step": 15093 }, { "epoch": 0.9727395759489592, "grad_norm": 0.0010520218654664925, "learning_rate": 3.1077694235588974e-07, "loss": 0.0, "step": 15094 }, { "epoch": 0.9728040213958884, "grad_norm": 0.0008259018489257181, "learning_rate": 3.1006086645184393e-07, "loss": 0.0, "step": 15095 }, { "epoch": 0.9728684668428176, "grad_norm": 0.000960079676407256, "learning_rate": 3.0934479054779807e-07, "loss": 0.0, "step": 15096 }, { "epoch": 0.9729329122897468, "grad_norm": 0.000389977335042752, "learning_rate": 3.0862871464375226e-07, "loss": 0.0, "step": 15097 }, { "epoch": 0.972997357736676, "grad_norm": 0.008095213189473769, "learning_rate": 3.0791263873970646e-07, "loss": 0.0, "step": 15098 }, { "epoch": 0.973061803183605, "grad_norm": 0.0006163576107396637, "learning_rate": 3.071965628356606e-07, "loss": 0.0, "step": 15099 }, { "epoch": 0.9731262486305342, "grad_norm": 0.03904227537872416, "learning_rate": 3.064804869316148e-07, "loss": 0.0002, "step": 15100 }, { "epoch": 0.9731906940774634, "grad_norm": 0.09723703527236527, "learning_rate": 3.0576441102756893e-07, "loss": 0.0002, "step": 15101 }, { "epoch": 0.9732551395243926, "grad_norm": 0.1627526732828032, "learning_rate": 3.050483351235231e-07, "loss": 0.0006, "step": 15102 }, { "epoch": 0.9733195849713218, "grad_norm": 0.055487841104563056, "learning_rate": 3.043322592194773e-07, "loss": 0.0001, "step": 15103 }, { "epoch": 0.973384030418251, "grad_norm": 0.00046731365863853907, "learning_rate": 3.0361618331543146e-07, "loss": 0.0, "step": 15104 }, { "epoch": 0.9734484758651801, "grad_norm": 0.0034985155925200406, "learning_rate": 3.029001074113856e-07, "loss": 0.0, "step": 15105 }, { "epoch": 0.9735129213121093, "grad_norm": 0.2584352197511904, "learning_rate": 3.021840315073398e-07, "loss": 0.0013, "step": 15106 }, { "epoch": 0.9735773667590385, "grad_norm": 0.017777880831103005, "learning_rate": 3.01467955603294e-07, "loss": 0.0001, "step": 15107 }, { "epoch": 0.9736418122059677, "grad_norm": 0.155115485075132, "learning_rate": 3.007518796992481e-07, "loss": 0.0006, "step": 15108 }, { "epoch": 0.9737062576528969, "grad_norm": 0.003783563882959519, "learning_rate": 3.000358037952023e-07, "loss": 0.0, "step": 15109 }, { "epoch": 0.973770703099826, "grad_norm": 0.01880250699719194, "learning_rate": 2.9931972789115645e-07, "loss": 0.0, "step": 15110 }, { "epoch": 0.9738351485467551, "grad_norm": 0.0008284128947969417, "learning_rate": 2.9860365198711065e-07, "loss": 0.0, "step": 15111 }, { "epoch": 0.9738995939936843, "grad_norm": 0.00040061634608030576, "learning_rate": 2.9788757608306484e-07, "loss": 0.0, "step": 15112 }, { "epoch": 0.9739640394406135, "grad_norm": 0.0015559103676428346, "learning_rate": 2.97171500179019e-07, "loss": 0.0, "step": 15113 }, { "epoch": 0.9740284848875427, "grad_norm": 4.241696346927127e-05, "learning_rate": 2.9645542427497317e-07, "loss": 0.0, "step": 15114 }, { "epoch": 0.9740929303344719, "grad_norm": 0.0075211743685994975, "learning_rate": 2.9573934837092736e-07, "loss": 0.0, "step": 15115 }, { "epoch": 0.974157375781401, "grad_norm": 0.0007922172603788567, "learning_rate": 2.950232724668815e-07, "loss": 0.0, "step": 15116 }, { "epoch": 0.9742218212283302, "grad_norm": 8.615947529956862e-05, "learning_rate": 2.943071965628357e-07, "loss": 0.0, "step": 15117 }, { "epoch": 0.9742862666752594, "grad_norm": 0.0011283102264602943, "learning_rate": 2.9359112065878984e-07, "loss": 0.0, "step": 15118 }, { "epoch": 0.9743507121221886, "grad_norm": 0.0003551449614521658, "learning_rate": 2.9287504475474403e-07, "loss": 0.0, "step": 15119 }, { "epoch": 0.9744151575691178, "grad_norm": 0.011122411754986475, "learning_rate": 2.921589688506982e-07, "loss": 0.0, "step": 15120 }, { "epoch": 0.974479603016047, "grad_norm": 0.002033701591427555, "learning_rate": 2.9144289294665236e-07, "loss": 0.0, "step": 15121 }, { "epoch": 0.974544048462976, "grad_norm": 0.0004772722821963182, "learning_rate": 2.9072681704260656e-07, "loss": 0.0, "step": 15122 }, { "epoch": 0.9746084939099052, "grad_norm": 0.014922254632288611, "learning_rate": 2.900107411385607e-07, "loss": 0.0, "step": 15123 }, { "epoch": 0.9746729393568344, "grad_norm": 0.14024032856409033, "learning_rate": 2.892946652345149e-07, "loss": 0.0005, "step": 15124 }, { "epoch": 0.9747373848037636, "grad_norm": 0.005984868862872008, "learning_rate": 2.885785893304691e-07, "loss": 0.0, "step": 15125 }, { "epoch": 0.9748018302506928, "grad_norm": 0.004334931194733932, "learning_rate": 2.878625134264232e-07, "loss": 0.0, "step": 15126 }, { "epoch": 0.974866275697622, "grad_norm": 0.002894429516225133, "learning_rate": 2.871464375223774e-07, "loss": 0.0, "step": 15127 }, { "epoch": 0.9749307211445511, "grad_norm": 0.0003413323669872801, "learning_rate": 2.8643036161833155e-07, "loss": 0.0, "step": 15128 }, { "epoch": 0.9749951665914803, "grad_norm": 0.0010935772408200211, "learning_rate": 2.8571428571428575e-07, "loss": 0.0, "step": 15129 }, { "epoch": 0.9750596120384095, "grad_norm": 0.11490509704268698, "learning_rate": 2.8499820981023994e-07, "loss": 0.0001, "step": 15130 }, { "epoch": 0.9751240574853387, "grad_norm": 0.24854126418726763, "learning_rate": 2.842821339061941e-07, "loss": 0.0005, "step": 15131 }, { "epoch": 0.9751885029322679, "grad_norm": 0.008830847027775436, "learning_rate": 2.835660580021482e-07, "loss": 0.0, "step": 15132 }, { "epoch": 0.975252948379197, "grad_norm": 0.0008734777265217501, "learning_rate": 2.828499820981024e-07, "loss": 0.0, "step": 15133 }, { "epoch": 0.9753173938261261, "grad_norm": 0.36864601786661755, "learning_rate": 2.821339061940566e-07, "loss": 0.0007, "step": 15134 }, { "epoch": 0.9753818392730553, "grad_norm": 0.0022641853155302765, "learning_rate": 2.8141783029001074e-07, "loss": 0.0, "step": 15135 }, { "epoch": 0.9754462847199845, "grad_norm": 0.06988027629866522, "learning_rate": 2.8070175438596494e-07, "loss": 0.0002, "step": 15136 }, { "epoch": 0.9755107301669137, "grad_norm": 0.0006696937469466642, "learning_rate": 2.799856784819191e-07, "loss": 0.0, "step": 15137 }, { "epoch": 0.9755751756138429, "grad_norm": 0.0013796730065024532, "learning_rate": 2.7926960257787327e-07, "loss": 0.0, "step": 15138 }, { "epoch": 0.9756396210607721, "grad_norm": 0.0004004937699149055, "learning_rate": 2.7855352667382746e-07, "loss": 0.0, "step": 15139 }, { "epoch": 0.9757040665077013, "grad_norm": 0.005334612036549248, "learning_rate": 2.778374507697816e-07, "loss": 0.0, "step": 15140 }, { "epoch": 0.9757685119546304, "grad_norm": 0.002374710997573885, "learning_rate": 2.771213748657358e-07, "loss": 0.0, "step": 15141 }, { "epoch": 0.9758329574015596, "grad_norm": 0.0010283480037241186, "learning_rate": 2.7640529896168994e-07, "loss": 0.0, "step": 15142 }, { "epoch": 0.9758974028484888, "grad_norm": 0.0018373973709965015, "learning_rate": 2.7568922305764413e-07, "loss": 0.0, "step": 15143 }, { "epoch": 0.9759618482954179, "grad_norm": 0.00855921588567582, "learning_rate": 2.749731471535983e-07, "loss": 0.0, "step": 15144 }, { "epoch": 0.9760262937423471, "grad_norm": 0.0020050944102561717, "learning_rate": 2.7425707124955246e-07, "loss": 0.0, "step": 15145 }, { "epoch": 0.9760907391892762, "grad_norm": 0.022109847137461454, "learning_rate": 2.7354099534550665e-07, "loss": 0.0, "step": 15146 }, { "epoch": 0.9761551846362054, "grad_norm": 0.0011914119683068853, "learning_rate": 2.728249194414608e-07, "loss": 0.0, "step": 15147 }, { "epoch": 0.9762196300831346, "grad_norm": 0.0011568151027143419, "learning_rate": 2.72108843537415e-07, "loss": 0.0, "step": 15148 }, { "epoch": 0.9762840755300638, "grad_norm": 0.0003769420355886379, "learning_rate": 2.713927676333692e-07, "loss": 0.0, "step": 15149 }, { "epoch": 0.976348520976993, "grad_norm": 0.004740660637754758, "learning_rate": 2.706766917293233e-07, "loss": 0.0, "step": 15150 }, { "epoch": 0.9764129664239222, "grad_norm": 0.01738213764846972, "learning_rate": 2.699606158252775e-07, "loss": 0.0002, "step": 15151 }, { "epoch": 0.9764774118708514, "grad_norm": 0.0019549345364176865, "learning_rate": 2.692445399212317e-07, "loss": 0.0, "step": 15152 }, { "epoch": 0.9765418573177805, "grad_norm": 0.0007493480390459141, "learning_rate": 2.6852846401718585e-07, "loss": 0.0, "step": 15153 }, { "epoch": 0.9766063027647097, "grad_norm": 0.0026944598274480157, "learning_rate": 2.6781238811314004e-07, "loss": 0.0, "step": 15154 }, { "epoch": 0.9766707482116388, "grad_norm": 0.011713198623444991, "learning_rate": 2.670963122090942e-07, "loss": 0.0, "step": 15155 }, { "epoch": 0.976735193658568, "grad_norm": 0.022796453401214445, "learning_rate": 2.6638023630504837e-07, "loss": 0.0, "step": 15156 }, { "epoch": 0.9767996391054972, "grad_norm": 0.0003065624087929334, "learning_rate": 2.6566416040100256e-07, "loss": 0.0, "step": 15157 }, { "epoch": 0.9768640845524263, "grad_norm": 0.00849296321003075, "learning_rate": 2.649480844969567e-07, "loss": 0.0, "step": 15158 }, { "epoch": 0.9769285299993555, "grad_norm": 0.0007528658127822991, "learning_rate": 2.6423200859291084e-07, "loss": 0.0, "step": 15159 }, { "epoch": 0.9769929754462847, "grad_norm": 0.013537214203229, "learning_rate": 2.6351593268886504e-07, "loss": 0.0, "step": 15160 }, { "epoch": 0.9770574208932139, "grad_norm": 0.008491264624011879, "learning_rate": 2.6279985678481923e-07, "loss": 0.0, "step": 15161 }, { "epoch": 0.9771218663401431, "grad_norm": 0.040971711185676433, "learning_rate": 2.6208378088077337e-07, "loss": 0.0001, "step": 15162 }, { "epoch": 0.9771863117870723, "grad_norm": 0.0021006739403628245, "learning_rate": 2.6136770497672756e-07, "loss": 0.0, "step": 15163 }, { "epoch": 0.9772507572340015, "grad_norm": 0.0033201136895484383, "learning_rate": 2.606516290726817e-07, "loss": 0.0, "step": 15164 }, { "epoch": 0.9773152026809306, "grad_norm": 0.0009516754660645897, "learning_rate": 2.599355531686359e-07, "loss": 0.0, "step": 15165 }, { "epoch": 0.9773796481278597, "grad_norm": 0.0014256401567068185, "learning_rate": 2.592194772645901e-07, "loss": 0.0, "step": 15166 }, { "epoch": 0.9774440935747889, "grad_norm": 0.007363190876471923, "learning_rate": 2.5850340136054423e-07, "loss": 0.0001, "step": 15167 }, { "epoch": 0.9775085390217181, "grad_norm": 0.0001785714074357449, "learning_rate": 2.577873254564984e-07, "loss": 0.0, "step": 15168 }, { "epoch": 0.9775729844686473, "grad_norm": 0.0010834485266816886, "learning_rate": 2.5707124955245256e-07, "loss": 0.0, "step": 15169 }, { "epoch": 0.9776374299155765, "grad_norm": 0.14266291148414587, "learning_rate": 2.5635517364840675e-07, "loss": 0.0011, "step": 15170 }, { "epoch": 0.9777018753625056, "grad_norm": 0.10230791742266519, "learning_rate": 2.5563909774436095e-07, "loss": 0.0003, "step": 15171 }, { "epoch": 0.9777663208094348, "grad_norm": 0.18012416689572205, "learning_rate": 2.549230218403151e-07, "loss": 0.0011, "step": 15172 }, { "epoch": 0.977830766256364, "grad_norm": 0.018825811985009936, "learning_rate": 2.542069459362693e-07, "loss": 0.0, "step": 15173 }, { "epoch": 0.9778952117032932, "grad_norm": 0.0003657376167657997, "learning_rate": 2.534908700322234e-07, "loss": 0.0, "step": 15174 }, { "epoch": 0.9779596571502224, "grad_norm": 0.0016266523017274357, "learning_rate": 2.527747941281776e-07, "loss": 0.0, "step": 15175 }, { "epoch": 0.9780241025971516, "grad_norm": 0.031643844370067425, "learning_rate": 2.520587182241318e-07, "loss": 0.0002, "step": 15176 }, { "epoch": 0.9780885480440807, "grad_norm": 0.0001272113728231804, "learning_rate": 2.5134264232008594e-07, "loss": 0.0, "step": 15177 }, { "epoch": 0.9781529934910098, "grad_norm": 0.051368001198941485, "learning_rate": 2.506265664160401e-07, "loss": 0.0001, "step": 15178 }, { "epoch": 0.978217438937939, "grad_norm": 0.11006070518329952, "learning_rate": 2.499104905119943e-07, "loss": 0.0017, "step": 15179 }, { "epoch": 0.9782818843848682, "grad_norm": 0.007755887701504084, "learning_rate": 2.4919441460794847e-07, "loss": 0.0, "step": 15180 }, { "epoch": 0.9783463298317974, "grad_norm": 0.0017534684004093396, "learning_rate": 2.4847833870390266e-07, "loss": 0.0, "step": 15181 }, { "epoch": 0.9784107752787266, "grad_norm": 0.07081709706644274, "learning_rate": 2.477622627998568e-07, "loss": 0.0001, "step": 15182 }, { "epoch": 0.9784752207256557, "grad_norm": 0.0018758709562702654, "learning_rate": 2.4704618689581094e-07, "loss": 0.0, "step": 15183 }, { "epoch": 0.9785396661725849, "grad_norm": 0.001043274861493202, "learning_rate": 2.4633011099176513e-07, "loss": 0.0, "step": 15184 }, { "epoch": 0.9786041116195141, "grad_norm": 0.0011120056715451592, "learning_rate": 2.4561403508771933e-07, "loss": 0.0, "step": 15185 }, { "epoch": 0.9786685570664433, "grad_norm": 0.07570947081657063, "learning_rate": 2.4489795918367347e-07, "loss": 0.0002, "step": 15186 }, { "epoch": 0.9787330025133725, "grad_norm": 0.0002145057054067127, "learning_rate": 2.4418188327962766e-07, "loss": 0.0, "step": 15187 }, { "epoch": 0.9787974479603017, "grad_norm": 0.003613192592037386, "learning_rate": 2.4346580737558185e-07, "loss": 0.0, "step": 15188 }, { "epoch": 0.9788618934072307, "grad_norm": 0.01221347519544524, "learning_rate": 2.42749731471536e-07, "loss": 0.0, "step": 15189 }, { "epoch": 0.9789263388541599, "grad_norm": 0.0004138912713609249, "learning_rate": 2.420336555674902e-07, "loss": 0.0, "step": 15190 }, { "epoch": 0.9789907843010891, "grad_norm": 0.013110504602359498, "learning_rate": 2.413175796634443e-07, "loss": 0.0, "step": 15191 }, { "epoch": 0.9790552297480183, "grad_norm": 0.07918108957232092, "learning_rate": 2.406015037593985e-07, "loss": 0.0001, "step": 15192 }, { "epoch": 0.9791196751949475, "grad_norm": 0.0009919039297520671, "learning_rate": 2.398854278553527e-07, "loss": 0.0, "step": 15193 }, { "epoch": 0.9791841206418767, "grad_norm": 0.0015389498764722693, "learning_rate": 2.3916935195130685e-07, "loss": 0.0, "step": 15194 }, { "epoch": 0.9792485660888058, "grad_norm": 0.0007932537619711912, "learning_rate": 2.3845327604726104e-07, "loss": 0.0, "step": 15195 }, { "epoch": 0.979313011535735, "grad_norm": 0.3223723494933655, "learning_rate": 2.377372001432152e-07, "loss": 0.0007, "step": 15196 }, { "epoch": 0.9793774569826642, "grad_norm": 0.0037370869206710114, "learning_rate": 2.3702112423916935e-07, "loss": 0.0, "step": 15197 }, { "epoch": 0.9794419024295934, "grad_norm": 0.01667958658079532, "learning_rate": 2.3630504833512354e-07, "loss": 0.0, "step": 15198 }, { "epoch": 0.9795063478765226, "grad_norm": 0.045919618893321044, "learning_rate": 2.355889724310777e-07, "loss": 0.0001, "step": 15199 }, { "epoch": 0.9795707933234516, "grad_norm": 0.00020583061499714295, "learning_rate": 2.348728965270319e-07, "loss": 0.0, "step": 15200 }, { "epoch": 0.9796352387703808, "grad_norm": 0.0035573688295585603, "learning_rate": 2.3415682062298607e-07, "loss": 0.0, "step": 15201 }, { "epoch": 0.97969968421731, "grad_norm": 0.0003429043040178539, "learning_rate": 2.334407447189402e-07, "loss": 0.0, "step": 15202 }, { "epoch": 0.9797641296642392, "grad_norm": 0.00013085403944323605, "learning_rate": 2.327246688148944e-07, "loss": 0.0, "step": 15203 }, { "epoch": 0.9798285751111684, "grad_norm": 0.005921728071037252, "learning_rate": 2.3200859291084857e-07, "loss": 0.0, "step": 15204 }, { "epoch": 0.9798930205580976, "grad_norm": 0.000992523744680652, "learning_rate": 2.3129251700680273e-07, "loss": 0.0, "step": 15205 }, { "epoch": 0.9799574660050268, "grad_norm": 0.0006133625288807913, "learning_rate": 2.3057644110275693e-07, "loss": 0.0, "step": 15206 }, { "epoch": 0.9800219114519559, "grad_norm": 0.0008549871418847644, "learning_rate": 2.2986036519871107e-07, "loss": 0.0, "step": 15207 }, { "epoch": 0.9800863568988851, "grad_norm": 0.00020320387837958342, "learning_rate": 2.2914428929466526e-07, "loss": 0.0, "step": 15208 }, { "epoch": 0.9801508023458143, "grad_norm": 0.22708419225833618, "learning_rate": 2.2842821339061943e-07, "loss": 0.0029, "step": 15209 }, { "epoch": 0.9802152477927435, "grad_norm": 0.0004024974515455393, "learning_rate": 2.277121374865736e-07, "loss": 0.0, "step": 15210 }, { "epoch": 0.9802796932396726, "grad_norm": 0.04274219695899788, "learning_rate": 2.2699606158252778e-07, "loss": 0.0001, "step": 15211 }, { "epoch": 0.9803441386866018, "grad_norm": 0.034374677117413543, "learning_rate": 2.2627998567848195e-07, "loss": 0.0001, "step": 15212 }, { "epoch": 0.9804085841335309, "grad_norm": 0.000535652454101118, "learning_rate": 2.255639097744361e-07, "loss": 0.0, "step": 15213 }, { "epoch": 0.9804730295804601, "grad_norm": 0.14314235455022237, "learning_rate": 2.2484783387039028e-07, "loss": 0.0002, "step": 15214 }, { "epoch": 0.9805374750273893, "grad_norm": 0.0018527310044761708, "learning_rate": 2.2413175796634445e-07, "loss": 0.0, "step": 15215 }, { "epoch": 0.9806019204743185, "grad_norm": 0.013785798169173939, "learning_rate": 2.2341568206229862e-07, "loss": 0.0, "step": 15216 }, { "epoch": 0.9806663659212477, "grad_norm": 0.008295814609581176, "learning_rate": 2.226996061582528e-07, "loss": 0.0, "step": 15217 }, { "epoch": 0.9807308113681769, "grad_norm": 0.00018404890305874304, "learning_rate": 2.2198353025420695e-07, "loss": 0.0, "step": 15218 }, { "epoch": 0.980795256815106, "grad_norm": 0.00012482961636409894, "learning_rate": 2.2126745435016114e-07, "loss": 0.0, "step": 15219 }, { "epoch": 0.9808597022620352, "grad_norm": 0.0049240704985918085, "learning_rate": 2.205513784461153e-07, "loss": 0.0, "step": 15220 }, { "epoch": 0.9809241477089644, "grad_norm": 0.0031281767796909107, "learning_rate": 2.1983530254206947e-07, "loss": 0.0, "step": 15221 }, { "epoch": 0.9809885931558935, "grad_norm": 0.0008334030137153192, "learning_rate": 2.1911922663802367e-07, "loss": 0.0, "step": 15222 }, { "epoch": 0.9810530386028227, "grad_norm": 0.00019850462293857934, "learning_rate": 2.184031507339778e-07, "loss": 0.0, "step": 15223 }, { "epoch": 0.9811174840497519, "grad_norm": 0.00036782715442014083, "learning_rate": 2.1768707482993197e-07, "loss": 0.0, "step": 15224 }, { "epoch": 0.981181929496681, "grad_norm": 0.0003710061416936885, "learning_rate": 2.1697099892588617e-07, "loss": 0.0, "step": 15225 }, { "epoch": 0.9812463749436102, "grad_norm": 0.012523587796355857, "learning_rate": 2.1625492302184033e-07, "loss": 0.0, "step": 15226 }, { "epoch": 0.9813108203905394, "grad_norm": 0.002729799152419526, "learning_rate": 2.1553884711779453e-07, "loss": 0.0, "step": 15227 }, { "epoch": 0.9813752658374686, "grad_norm": 0.0001641298882696401, "learning_rate": 2.1482277121374867e-07, "loss": 0.0, "step": 15228 }, { "epoch": 0.9814397112843978, "grad_norm": 1.309516649618846, "learning_rate": 2.1410669530970283e-07, "loss": 0.0006, "step": 15229 }, { "epoch": 0.981504156731327, "grad_norm": 0.014743855267792875, "learning_rate": 2.1339061940565702e-07, "loss": 0.0, "step": 15230 }, { "epoch": 0.9815686021782561, "grad_norm": 0.0023293280917756737, "learning_rate": 2.126745435016112e-07, "loss": 0.0, "step": 15231 }, { "epoch": 0.9816330476251853, "grad_norm": 9.553283079941483e-05, "learning_rate": 2.1195846759756536e-07, "loss": 0.0, "step": 15232 }, { "epoch": 0.9816974930721144, "grad_norm": 4.284839848487141e-05, "learning_rate": 2.1124239169351955e-07, "loss": 0.0, "step": 15233 }, { "epoch": 0.9817619385190436, "grad_norm": 0.0060593230029311156, "learning_rate": 2.105263157894737e-07, "loss": 0.0001, "step": 15234 }, { "epoch": 0.9818263839659728, "grad_norm": 0.0011964518485110813, "learning_rate": 2.0981023988542788e-07, "loss": 0.0, "step": 15235 }, { "epoch": 0.981890829412902, "grad_norm": 0.32554900492517685, "learning_rate": 2.0909416398138205e-07, "loss": 0.0022, "step": 15236 }, { "epoch": 0.9819552748598311, "grad_norm": 0.0006848094652021027, "learning_rate": 2.0837808807733622e-07, "loss": 0.0, "step": 15237 }, { "epoch": 0.9820197203067603, "grad_norm": 0.001575442477279625, "learning_rate": 2.076620121732904e-07, "loss": 0.0, "step": 15238 }, { "epoch": 0.9820841657536895, "grad_norm": 0.07122442175884686, "learning_rate": 2.0694593626924455e-07, "loss": 0.0003, "step": 15239 }, { "epoch": 0.9821486112006187, "grad_norm": 0.047471250288807096, "learning_rate": 2.0622986036519871e-07, "loss": 0.0004, "step": 15240 }, { "epoch": 0.9822130566475479, "grad_norm": 0.0006905600191781044, "learning_rate": 2.055137844611529e-07, "loss": 0.0, "step": 15241 }, { "epoch": 0.9822775020944771, "grad_norm": 3.866391618307458, "learning_rate": 2.0479770855710707e-07, "loss": 0.0589, "step": 15242 }, { "epoch": 0.9823419475414062, "grad_norm": 0.0015586490202091803, "learning_rate": 2.0408163265306121e-07, "loss": 0.0, "step": 15243 }, { "epoch": 0.9824063929883353, "grad_norm": 0.0002299173824988018, "learning_rate": 2.033655567490154e-07, "loss": 0.0, "step": 15244 }, { "epoch": 0.9824708384352645, "grad_norm": 0.05903546807777156, "learning_rate": 2.0264948084496957e-07, "loss": 0.0003, "step": 15245 }, { "epoch": 0.9825352838821937, "grad_norm": 0.05004071891032356, "learning_rate": 2.0193340494092377e-07, "loss": 0.0001, "step": 15246 }, { "epoch": 0.9825997293291229, "grad_norm": 0.00033025603537803906, "learning_rate": 2.0121732903687793e-07, "loss": 0.0, "step": 15247 }, { "epoch": 0.9826641747760521, "grad_norm": 0.0006552670394381289, "learning_rate": 2.005012531328321e-07, "loss": 0.0, "step": 15248 }, { "epoch": 0.9827286202229812, "grad_norm": 0.0016977273912929997, "learning_rate": 1.997851772287863e-07, "loss": 0.0, "step": 15249 }, { "epoch": 0.9827930656699104, "grad_norm": 0.18565537141618688, "learning_rate": 1.9906910132474043e-07, "loss": 0.0005, "step": 15250 }, { "epoch": 0.9828575111168396, "grad_norm": 0.07746788176045784, "learning_rate": 1.983530254206946e-07, "loss": 0.0017, "step": 15251 }, { "epoch": 0.9829219565637688, "grad_norm": 0.001825236225272376, "learning_rate": 1.976369495166488e-07, "loss": 0.0, "step": 15252 }, { "epoch": 0.982986402010698, "grad_norm": 0.02276023707093468, "learning_rate": 1.9692087361260296e-07, "loss": 0.0002, "step": 15253 }, { "epoch": 0.9830508474576272, "grad_norm": 0.0002736884330044127, "learning_rate": 1.9620479770855715e-07, "loss": 0.0, "step": 15254 }, { "epoch": 0.9831152929045563, "grad_norm": 0.003627516108369626, "learning_rate": 1.954887218045113e-07, "loss": 0.0, "step": 15255 }, { "epoch": 0.9831797383514854, "grad_norm": 0.0022958784688874136, "learning_rate": 1.9477264590046546e-07, "loss": 0.0, "step": 15256 }, { "epoch": 0.9832441837984146, "grad_norm": 0.00014915189100592606, "learning_rate": 1.9405656999641965e-07, "loss": 0.0, "step": 15257 }, { "epoch": 0.9833086292453438, "grad_norm": 6.148843509926524e-05, "learning_rate": 1.9334049409237381e-07, "loss": 0.0, "step": 15258 }, { "epoch": 0.983373074692273, "grad_norm": 0.00010684416463237988, "learning_rate": 1.9262441818832795e-07, "loss": 0.0, "step": 15259 }, { "epoch": 0.9834375201392022, "grad_norm": 0.8740741054404098, "learning_rate": 1.9190834228428215e-07, "loss": 0.0013, "step": 15260 }, { "epoch": 0.9835019655861313, "grad_norm": 0.001053662257371922, "learning_rate": 1.9119226638023631e-07, "loss": 0.0, "step": 15261 }, { "epoch": 0.9835664110330605, "grad_norm": 0.2909590903034731, "learning_rate": 1.904761904761905e-07, "loss": 0.0005, "step": 15262 }, { "epoch": 0.9836308564799897, "grad_norm": 0.001149833658102217, "learning_rate": 1.8976011457214467e-07, "loss": 0.0, "step": 15263 }, { "epoch": 0.9836953019269189, "grad_norm": 0.0009560136118597889, "learning_rate": 1.890440386680988e-07, "loss": 0.0, "step": 15264 }, { "epoch": 0.9837597473738481, "grad_norm": 0.04803480853553378, "learning_rate": 1.88327962764053e-07, "loss": 0.0002, "step": 15265 }, { "epoch": 0.9838241928207773, "grad_norm": 0.0016538616173160513, "learning_rate": 1.8761188686000717e-07, "loss": 0.0, "step": 15266 }, { "epoch": 0.9838886382677063, "grad_norm": 0.01374418114037808, "learning_rate": 1.8689581095596134e-07, "loss": 0.0001, "step": 15267 }, { "epoch": 0.9839530837146355, "grad_norm": 0.19178463067401658, "learning_rate": 1.8617973505191553e-07, "loss": 0.0003, "step": 15268 }, { "epoch": 0.9840175291615647, "grad_norm": 0.0006956476860202471, "learning_rate": 1.854636591478697e-07, "loss": 0.0, "step": 15269 }, { "epoch": 0.9840819746084939, "grad_norm": 0.00035165631147443776, "learning_rate": 1.8474758324382384e-07, "loss": 0.0, "step": 15270 }, { "epoch": 0.9841464200554231, "grad_norm": 0.002953816493226346, "learning_rate": 1.8403150733977803e-07, "loss": 0.0, "step": 15271 }, { "epoch": 0.9842108655023523, "grad_norm": 0.0003641283605604771, "learning_rate": 1.833154314357322e-07, "loss": 0.0, "step": 15272 }, { "epoch": 0.9842753109492814, "grad_norm": 0.0033303717674017914, "learning_rate": 1.825993555316864e-07, "loss": 0.0, "step": 15273 }, { "epoch": 0.9843397563962106, "grad_norm": 0.0006977955450955413, "learning_rate": 1.8188327962764056e-07, "loss": 0.0, "step": 15274 }, { "epoch": 0.9844042018431398, "grad_norm": 0.006914318477005871, "learning_rate": 1.811672037235947e-07, "loss": 0.0, "step": 15275 }, { "epoch": 0.984468647290069, "grad_norm": 0.0029855712369581877, "learning_rate": 1.804511278195489e-07, "loss": 0.0, "step": 15276 }, { "epoch": 0.9845330927369982, "grad_norm": 0.0007799178281663518, "learning_rate": 1.7973505191550305e-07, "loss": 0.0, "step": 15277 }, { "epoch": 0.9845975381839273, "grad_norm": 0.010909400934901978, "learning_rate": 1.7901897601145722e-07, "loss": 0.0, "step": 15278 }, { "epoch": 0.9846619836308564, "grad_norm": 0.0044053793582521416, "learning_rate": 1.7830290010741141e-07, "loss": 0.0, "step": 15279 }, { "epoch": 0.9847264290777856, "grad_norm": 0.9564209529679507, "learning_rate": 1.7758682420336555e-07, "loss": 0.0091, "step": 15280 }, { "epoch": 0.9847908745247148, "grad_norm": 0.40579912033949256, "learning_rate": 1.7687074829931975e-07, "loss": 0.0035, "step": 15281 }, { "epoch": 0.984855319971644, "grad_norm": 0.09994860926754734, "learning_rate": 1.761546723952739e-07, "loss": 0.0001, "step": 15282 }, { "epoch": 0.9849197654185732, "grad_norm": 0.04675263088519276, "learning_rate": 1.7543859649122808e-07, "loss": 0.0003, "step": 15283 }, { "epoch": 0.9849842108655024, "grad_norm": 0.00011445797154276652, "learning_rate": 1.7472252058718227e-07, "loss": 0.0, "step": 15284 }, { "epoch": 0.9850486563124315, "grad_norm": 0.012145525836946542, "learning_rate": 1.7400644468313644e-07, "loss": 0.0, "step": 15285 }, { "epoch": 0.9851131017593607, "grad_norm": 0.0003475919052068295, "learning_rate": 1.7329036877909058e-07, "loss": 0.0, "step": 15286 }, { "epoch": 0.9851775472062899, "grad_norm": 0.02558688960961057, "learning_rate": 1.7257429287504477e-07, "loss": 0.0, "step": 15287 }, { "epoch": 0.9852419926532191, "grad_norm": 0.00010669744202209233, "learning_rate": 1.7185821697099894e-07, "loss": 0.0, "step": 15288 }, { "epoch": 0.9853064381001482, "grad_norm": 0.0006361258570100312, "learning_rate": 1.7114214106695313e-07, "loss": 0.0, "step": 15289 }, { "epoch": 0.9853708835470774, "grad_norm": 0.046747478735065066, "learning_rate": 1.704260651629073e-07, "loss": 0.0001, "step": 15290 }, { "epoch": 0.9854353289940065, "grad_norm": 0.00012749033638977258, "learning_rate": 1.6970998925886144e-07, "loss": 0.0, "step": 15291 }, { "epoch": 0.9854997744409357, "grad_norm": 0.0007303949571343691, "learning_rate": 1.6899391335481563e-07, "loss": 0.0, "step": 15292 }, { "epoch": 0.9855642198878649, "grad_norm": 0.00033894960956004134, "learning_rate": 1.682778374507698e-07, "loss": 0.0, "step": 15293 }, { "epoch": 0.9856286653347941, "grad_norm": 0.12599162817477413, "learning_rate": 1.6756176154672396e-07, "loss": 0.0014, "step": 15294 }, { "epoch": 0.9856931107817233, "grad_norm": 0.0004907227780169364, "learning_rate": 1.6684568564267816e-07, "loss": 0.0, "step": 15295 }, { "epoch": 0.9857575562286525, "grad_norm": 0.3005311471082716, "learning_rate": 1.661296097386323e-07, "loss": 0.0012, "step": 15296 }, { "epoch": 0.9858220016755816, "grad_norm": 0.001568842005944896, "learning_rate": 1.6541353383458646e-07, "loss": 0.0, "step": 15297 }, { "epoch": 0.9858864471225108, "grad_norm": 0.004407247136844806, "learning_rate": 1.6469745793054065e-07, "loss": 0.0, "step": 15298 }, { "epoch": 0.98595089256944, "grad_norm": 0.0003473983895130904, "learning_rate": 1.6398138202649482e-07, "loss": 0.0, "step": 15299 }, { "epoch": 0.9860153380163691, "grad_norm": 0.00863692447582535, "learning_rate": 1.6326530612244901e-07, "loss": 0.0, "step": 15300 }, { "epoch": 0.9860797834632983, "grad_norm": 0.00211103398049022, "learning_rate": 1.6254923021840315e-07, "loss": 0.0, "step": 15301 }, { "epoch": 0.9861442289102275, "grad_norm": 0.0018230068901888296, "learning_rate": 1.6183315431435732e-07, "loss": 0.0, "step": 15302 }, { "epoch": 0.9862086743571566, "grad_norm": 0.015832869636882328, "learning_rate": 1.611170784103115e-07, "loss": 0.0, "step": 15303 }, { "epoch": 0.9862731198040858, "grad_norm": 0.023791212077183488, "learning_rate": 1.6040100250626568e-07, "loss": 0.0015, "step": 15304 }, { "epoch": 0.986337565251015, "grad_norm": 0.0038339565004358363, "learning_rate": 1.5968492660221985e-07, "loss": 0.0, "step": 15305 }, { "epoch": 0.9864020106979442, "grad_norm": 0.043639507155779036, "learning_rate": 1.5896885069817404e-07, "loss": 0.0001, "step": 15306 }, { "epoch": 0.9864664561448734, "grad_norm": 0.006059066329226813, "learning_rate": 1.5825277479412818e-07, "loss": 0.0, "step": 15307 }, { "epoch": 0.9865309015918026, "grad_norm": 0.0073793358972736945, "learning_rate": 1.5753669889008237e-07, "loss": 0.0001, "step": 15308 }, { "epoch": 0.9865953470387318, "grad_norm": 0.11897962667166657, "learning_rate": 1.5682062298603654e-07, "loss": 0.0004, "step": 15309 }, { "epoch": 0.9866597924856609, "grad_norm": 0.0016394853548579568, "learning_rate": 1.561045470819907e-07, "loss": 0.0, "step": 15310 }, { "epoch": 0.98672423793259, "grad_norm": 0.0023086441209033705, "learning_rate": 1.5538847117794487e-07, "loss": 0.0, "step": 15311 }, { "epoch": 0.9867886833795192, "grad_norm": 0.000473328226471948, "learning_rate": 1.5467239527389904e-07, "loss": 0.0, "step": 15312 }, { "epoch": 0.9868531288264484, "grad_norm": 0.003990274313165149, "learning_rate": 1.5395631936985323e-07, "loss": 0.0, "step": 15313 }, { "epoch": 0.9869175742733776, "grad_norm": 0.17149084682895488, "learning_rate": 1.532402434658074e-07, "loss": 0.0006, "step": 15314 }, { "epoch": 0.9869820197203067, "grad_norm": 0.009257975158263423, "learning_rate": 1.5252416756176156e-07, "loss": 0.0, "step": 15315 }, { "epoch": 0.9870464651672359, "grad_norm": 0.014955583267460219, "learning_rate": 1.5180809165771573e-07, "loss": 0.0, "step": 15316 }, { "epoch": 0.9871109106141651, "grad_norm": 0.10271010438525041, "learning_rate": 1.510920157536699e-07, "loss": 0.0001, "step": 15317 }, { "epoch": 0.9871753560610943, "grad_norm": 0.001647549115393784, "learning_rate": 1.5037593984962406e-07, "loss": 0.0, "step": 15318 }, { "epoch": 0.9872398015080235, "grad_norm": 0.00025357547641687695, "learning_rate": 1.4965986394557823e-07, "loss": 0.0, "step": 15319 }, { "epoch": 0.9873042469549527, "grad_norm": 0.019612992119780556, "learning_rate": 1.4894378804153242e-07, "loss": 0.0, "step": 15320 }, { "epoch": 0.9873686924018819, "grad_norm": 0.001171164674540023, "learning_rate": 1.4822771213748659e-07, "loss": 0.0, "step": 15321 }, { "epoch": 0.9874331378488109, "grad_norm": 0.0034558749272922897, "learning_rate": 1.4751163623344075e-07, "loss": 0.0, "step": 15322 }, { "epoch": 0.9874975832957401, "grad_norm": 0.00045139744625150767, "learning_rate": 1.4679556032939492e-07, "loss": 0.0, "step": 15323 }, { "epoch": 0.9875620287426693, "grad_norm": 0.0007023775731221126, "learning_rate": 1.460794844253491e-07, "loss": 0.0, "step": 15324 }, { "epoch": 0.9876264741895985, "grad_norm": 0.0009513016630730917, "learning_rate": 1.4536340852130328e-07, "loss": 0.0, "step": 15325 }, { "epoch": 0.9876909196365277, "grad_norm": 0.05982552834564737, "learning_rate": 1.4464733261725744e-07, "loss": 0.0002, "step": 15326 }, { "epoch": 0.9877553650834568, "grad_norm": 0.003501219232904495, "learning_rate": 1.439312567132116e-07, "loss": 0.0, "step": 15327 }, { "epoch": 0.987819810530386, "grad_norm": 0.007539849747465022, "learning_rate": 1.4321518080916578e-07, "loss": 0.0, "step": 15328 }, { "epoch": 0.9878842559773152, "grad_norm": 0.0007982695888039108, "learning_rate": 1.4249910490511997e-07, "loss": 0.0, "step": 15329 }, { "epoch": 0.9879487014242444, "grad_norm": 0.0011078097149875995, "learning_rate": 1.417830290010741e-07, "loss": 0.0, "step": 15330 }, { "epoch": 0.9880131468711736, "grad_norm": 0.0003443949979325242, "learning_rate": 1.410669530970283e-07, "loss": 0.0, "step": 15331 }, { "epoch": 0.9880775923181028, "grad_norm": 0.1633003675412003, "learning_rate": 1.4035087719298247e-07, "loss": 0.0005, "step": 15332 }, { "epoch": 0.988142037765032, "grad_norm": 0.18057904691791732, "learning_rate": 1.3963480128893664e-07, "loss": 0.0018, "step": 15333 }, { "epoch": 0.988206483211961, "grad_norm": 0.0007753720428181999, "learning_rate": 1.389187253848908e-07, "loss": 0.0, "step": 15334 }, { "epoch": 0.9882709286588902, "grad_norm": 0.0017123565601338646, "learning_rate": 1.3820264948084497e-07, "loss": 0.0, "step": 15335 }, { "epoch": 0.9883353741058194, "grad_norm": 0.0344613932183989, "learning_rate": 1.3748657357679916e-07, "loss": 0.0, "step": 15336 }, { "epoch": 0.9883998195527486, "grad_norm": 0.19325097253192008, "learning_rate": 1.3677049767275333e-07, "loss": 0.0002, "step": 15337 }, { "epoch": 0.9884642649996778, "grad_norm": 0.0009819431197656918, "learning_rate": 1.360544217687075e-07, "loss": 0.0, "step": 15338 }, { "epoch": 0.988528710446607, "grad_norm": 0.1516750895755982, "learning_rate": 1.3533834586466166e-07, "loss": 0.0023, "step": 15339 }, { "epoch": 0.9885931558935361, "grad_norm": 0.001658402824212663, "learning_rate": 1.3462226996061585e-07, "loss": 0.0, "step": 15340 }, { "epoch": 0.9886576013404653, "grad_norm": 0.0015698162912195304, "learning_rate": 1.3390619405657002e-07, "loss": 0.0, "step": 15341 }, { "epoch": 0.9887220467873945, "grad_norm": 0.008429947141429642, "learning_rate": 1.3319011815252419e-07, "loss": 0.0001, "step": 15342 }, { "epoch": 0.9887864922343237, "grad_norm": 0.0024822454325503923, "learning_rate": 1.3247404224847835e-07, "loss": 0.0, "step": 15343 }, { "epoch": 0.9888509376812529, "grad_norm": 4.2695759973628696e-05, "learning_rate": 1.3175796634443252e-07, "loss": 0.0, "step": 15344 }, { "epoch": 0.988915383128182, "grad_norm": 0.03248854376485151, "learning_rate": 1.3104189044038668e-07, "loss": 0.0001, "step": 15345 }, { "epoch": 0.9889798285751111, "grad_norm": 0.009666005164565861, "learning_rate": 1.3032581453634085e-07, "loss": 0.0, "step": 15346 }, { "epoch": 0.9890442740220403, "grad_norm": 0.012461908937499057, "learning_rate": 1.2960973863229504e-07, "loss": 0.0, "step": 15347 }, { "epoch": 0.9891087194689695, "grad_norm": 0.009181846757203724, "learning_rate": 1.288936627282492e-07, "loss": 0.0, "step": 15348 }, { "epoch": 0.9891731649158987, "grad_norm": 0.3086505185798492, "learning_rate": 1.2817758682420338e-07, "loss": 0.0005, "step": 15349 }, { "epoch": 0.9892376103628279, "grad_norm": 9.000946232606643e-05, "learning_rate": 1.2746151092015754e-07, "loss": 0.0, "step": 15350 }, { "epoch": 0.989302055809757, "grad_norm": 0.00017109891452988802, "learning_rate": 1.267454350161117e-07, "loss": 0.0, "step": 15351 }, { "epoch": 0.9893665012566862, "grad_norm": 0.0014436826725710914, "learning_rate": 1.260293591120659e-07, "loss": 0.0, "step": 15352 }, { "epoch": 0.9894309467036154, "grad_norm": 0.010794710318647652, "learning_rate": 1.2531328320802004e-07, "loss": 0.0, "step": 15353 }, { "epoch": 0.9894953921505446, "grad_norm": 0.00487863410298011, "learning_rate": 1.2459720730397423e-07, "loss": 0.0, "step": 15354 }, { "epoch": 0.9895598375974738, "grad_norm": 0.004251305964631465, "learning_rate": 1.238811313999284e-07, "loss": 0.0, "step": 15355 }, { "epoch": 0.9896242830444029, "grad_norm": 0.0009995787571245016, "learning_rate": 1.2316505549588257e-07, "loss": 0.0, "step": 15356 }, { "epoch": 0.989688728491332, "grad_norm": 0.0002976666463085584, "learning_rate": 1.2244897959183673e-07, "loss": 0.0, "step": 15357 }, { "epoch": 0.9897531739382612, "grad_norm": 0.00029859447566665933, "learning_rate": 1.2173290368779093e-07, "loss": 0.0, "step": 15358 }, { "epoch": 0.9898176193851904, "grad_norm": 0.017260082460229826, "learning_rate": 1.210168277837451e-07, "loss": 0.0002, "step": 15359 }, { "epoch": 0.9898820648321196, "grad_norm": 0.03121110798224628, "learning_rate": 1.2030075187969926e-07, "loss": 0.0001, "step": 15360 }, { "epoch": 0.9899465102790488, "grad_norm": 0.0005941675613315212, "learning_rate": 1.1958467597565343e-07, "loss": 0.0, "step": 15361 }, { "epoch": 0.990010955725978, "grad_norm": 9.114690847879131e-05, "learning_rate": 1.188686000716076e-07, "loss": 0.0, "step": 15362 }, { "epoch": 0.9900754011729072, "grad_norm": 1.7429828966388234, "learning_rate": 1.1815252416756177e-07, "loss": 0.0055, "step": 15363 }, { "epoch": 0.9901398466198363, "grad_norm": 0.518626867181079, "learning_rate": 1.1743644826351595e-07, "loss": 0.0028, "step": 15364 }, { "epoch": 0.9902042920667655, "grad_norm": 0.3350462068176509, "learning_rate": 1.167203723594701e-07, "loss": 0.0003, "step": 15365 }, { "epoch": 0.9902687375136947, "grad_norm": 0.05405229337421042, "learning_rate": 1.1600429645542428e-07, "loss": 0.0001, "step": 15366 }, { "epoch": 0.9903331829606238, "grad_norm": 0.06269417714193161, "learning_rate": 1.1528822055137846e-07, "loss": 0.0001, "step": 15367 }, { "epoch": 0.990397628407553, "grad_norm": 0.00020441138102923534, "learning_rate": 1.1457214464733263e-07, "loss": 0.0, "step": 15368 }, { "epoch": 0.9904620738544822, "grad_norm": 0.3037918203904865, "learning_rate": 1.138560687432868e-07, "loss": 0.0008, "step": 15369 }, { "epoch": 0.9905265193014113, "grad_norm": 0.02238230572787554, "learning_rate": 1.1313999283924098e-07, "loss": 0.0002, "step": 15370 }, { "epoch": 0.9905909647483405, "grad_norm": 0.1309645785006673, "learning_rate": 1.1242391693519514e-07, "loss": 0.0003, "step": 15371 }, { "epoch": 0.9906554101952697, "grad_norm": 0.001562903964244142, "learning_rate": 1.1170784103114931e-07, "loss": 0.0, "step": 15372 }, { "epoch": 0.9907198556421989, "grad_norm": 0.45439102627328637, "learning_rate": 1.1099176512710347e-07, "loss": 0.0021, "step": 15373 }, { "epoch": 0.9907843010891281, "grad_norm": 0.4411138224181444, "learning_rate": 1.1027568922305765e-07, "loss": 0.0053, "step": 15374 }, { "epoch": 0.9908487465360573, "grad_norm": 0.0006388717337027706, "learning_rate": 1.0955961331901183e-07, "loss": 0.0, "step": 15375 }, { "epoch": 0.9909131919829864, "grad_norm": 0.08775647044388801, "learning_rate": 1.0884353741496599e-07, "loss": 0.0009, "step": 15376 }, { "epoch": 0.9909776374299156, "grad_norm": 0.01739325179771781, "learning_rate": 1.0812746151092017e-07, "loss": 0.0001, "step": 15377 }, { "epoch": 0.9910420828768447, "grad_norm": 0.034971450178768425, "learning_rate": 1.0741138560687433e-07, "loss": 0.0001, "step": 15378 }, { "epoch": 0.9911065283237739, "grad_norm": 0.21837122514810614, "learning_rate": 1.0669530970282851e-07, "loss": 0.0027, "step": 15379 }, { "epoch": 0.9911709737707031, "grad_norm": 0.03615418008543626, "learning_rate": 1.0597923379878268e-07, "loss": 0.0, "step": 15380 }, { "epoch": 0.9912354192176323, "grad_norm": 0.0013153067476646975, "learning_rate": 1.0526315789473685e-07, "loss": 0.0, "step": 15381 }, { "epoch": 0.9912998646645614, "grad_norm": 0.013645522977196328, "learning_rate": 1.0454708199069102e-07, "loss": 0.0001, "step": 15382 }, { "epoch": 0.9913643101114906, "grad_norm": 0.018869085279380988, "learning_rate": 1.038310060866452e-07, "loss": 0.0, "step": 15383 }, { "epoch": 0.9914287555584198, "grad_norm": 0.00017469868199359676, "learning_rate": 1.0311493018259936e-07, "loss": 0.0, "step": 15384 }, { "epoch": 0.991493201005349, "grad_norm": 0.004191734400330657, "learning_rate": 1.0239885427855354e-07, "loss": 0.0, "step": 15385 }, { "epoch": 0.9915576464522782, "grad_norm": 0.00111540569401605, "learning_rate": 1.016827783745077e-07, "loss": 0.0, "step": 15386 }, { "epoch": 0.9916220918992074, "grad_norm": 0.0028105942217102444, "learning_rate": 1.0096670247046188e-07, "loss": 0.0, "step": 15387 }, { "epoch": 0.9916865373461365, "grad_norm": 0.0014986055018981306, "learning_rate": 1.0025062656641605e-07, "loss": 0.0, "step": 15388 }, { "epoch": 0.9917509827930656, "grad_norm": 0.0698733557021849, "learning_rate": 9.953455066237022e-08, "loss": 0.0001, "step": 15389 }, { "epoch": 0.9918154282399948, "grad_norm": 1.3593856109555784, "learning_rate": 9.88184747583244e-08, "loss": 0.0078, "step": 15390 }, { "epoch": 0.991879873686924, "grad_norm": 1.6501662604056602, "learning_rate": 9.810239885427857e-08, "loss": 0.0138, "step": 15391 }, { "epoch": 0.9919443191338532, "grad_norm": 0.17455723899428105, "learning_rate": 9.738632295023273e-08, "loss": 0.0006, "step": 15392 }, { "epoch": 0.9920087645807824, "grad_norm": 0.011819212039522532, "learning_rate": 9.667024704618691e-08, "loss": 0.0001, "step": 15393 }, { "epoch": 0.9920732100277115, "grad_norm": 0.00014493524179976452, "learning_rate": 9.595417114214107e-08, "loss": 0.0, "step": 15394 }, { "epoch": 0.9921376554746407, "grad_norm": 0.0006374483992628114, "learning_rate": 9.523809523809525e-08, "loss": 0.0, "step": 15395 }, { "epoch": 0.9922021009215699, "grad_norm": 0.16560081944202426, "learning_rate": 9.45220193340494e-08, "loss": 0.0011, "step": 15396 }, { "epoch": 0.9922665463684991, "grad_norm": 0.00026379790258233425, "learning_rate": 9.380594343000359e-08, "loss": 0.0, "step": 15397 }, { "epoch": 0.9923309918154283, "grad_norm": 0.02047838287760723, "learning_rate": 9.308986752595777e-08, "loss": 0.0001, "step": 15398 }, { "epoch": 0.9923954372623575, "grad_norm": 0.2446784085500637, "learning_rate": 9.237379162191192e-08, "loss": 0.0004, "step": 15399 }, { "epoch": 0.9924598827092865, "grad_norm": 0.003227860027021074, "learning_rate": 9.16577157178661e-08, "loss": 0.0, "step": 15400 }, { "epoch": 0.9925243281562157, "grad_norm": 0.0002485238947423622, "learning_rate": 9.094163981382028e-08, "loss": 0.0, "step": 15401 }, { "epoch": 0.9925887736031449, "grad_norm": 0.01720941809554806, "learning_rate": 9.022556390977444e-08, "loss": 0.0001, "step": 15402 }, { "epoch": 0.9926532190500741, "grad_norm": 0.00032100960062109813, "learning_rate": 8.950948800572861e-08, "loss": 0.0, "step": 15403 }, { "epoch": 0.9927176644970033, "grad_norm": 0.00013967251299634785, "learning_rate": 8.879341210168278e-08, "loss": 0.0, "step": 15404 }, { "epoch": 0.9927821099439325, "grad_norm": 0.009221607936256614, "learning_rate": 8.807733619763696e-08, "loss": 0.0, "step": 15405 }, { "epoch": 0.9928465553908616, "grad_norm": 0.02119800257858215, "learning_rate": 8.736126029359114e-08, "loss": 0.0015, "step": 15406 }, { "epoch": 0.9929110008377908, "grad_norm": 0.006607428148178863, "learning_rate": 8.664518438954529e-08, "loss": 0.0001, "step": 15407 }, { "epoch": 0.99297544628472, "grad_norm": 0.000993933285930776, "learning_rate": 8.592910848549947e-08, "loss": 0.0, "step": 15408 }, { "epoch": 0.9930398917316492, "grad_norm": 0.04713009178308316, "learning_rate": 8.521303258145365e-08, "loss": 0.0001, "step": 15409 }, { "epoch": 0.9931043371785784, "grad_norm": 0.00726139924226023, "learning_rate": 8.449695667740781e-08, "loss": 0.0001, "step": 15410 }, { "epoch": 0.9931687826255076, "grad_norm": 0.0018708575708756142, "learning_rate": 8.378088077336198e-08, "loss": 0.0, "step": 15411 }, { "epoch": 0.9932332280724366, "grad_norm": 0.0834809032921711, "learning_rate": 8.306480486931615e-08, "loss": 0.0002, "step": 15412 }, { "epoch": 0.9932976735193658, "grad_norm": 0.00013640415194037607, "learning_rate": 8.234872896527033e-08, "loss": 0.0, "step": 15413 }, { "epoch": 0.993362118966295, "grad_norm": 0.014602227139775253, "learning_rate": 8.163265306122451e-08, "loss": 0.0, "step": 15414 }, { "epoch": 0.9934265644132242, "grad_norm": 0.01868237382937571, "learning_rate": 8.091657715717866e-08, "loss": 0.0001, "step": 15415 }, { "epoch": 0.9934910098601534, "grad_norm": 0.6517121232399404, "learning_rate": 8.020050125313284e-08, "loss": 0.0011, "step": 15416 }, { "epoch": 0.9935554553070826, "grad_norm": 0.0210594611327598, "learning_rate": 7.948442534908702e-08, "loss": 0.0, "step": 15417 }, { "epoch": 0.9936199007540117, "grad_norm": 0.20600616643122802, "learning_rate": 7.876834944504119e-08, "loss": 0.0003, "step": 15418 }, { "epoch": 0.9936843462009409, "grad_norm": 0.0006812269480493978, "learning_rate": 7.805227354099535e-08, "loss": 0.0, "step": 15419 }, { "epoch": 0.9937487916478701, "grad_norm": 0.011687333493238744, "learning_rate": 7.733619763694952e-08, "loss": 0.0, "step": 15420 }, { "epoch": 0.9938132370947993, "grad_norm": 0.00022207912377938065, "learning_rate": 7.66201217329037e-08, "loss": 0.0, "step": 15421 }, { "epoch": 0.9938776825417285, "grad_norm": 0.006105163470982768, "learning_rate": 7.590404582885786e-08, "loss": 0.0, "step": 15422 }, { "epoch": 0.9939421279886576, "grad_norm": 0.004071650145522754, "learning_rate": 7.518796992481203e-08, "loss": 0.0, "step": 15423 }, { "epoch": 0.9940065734355867, "grad_norm": 0.0001294769158907121, "learning_rate": 7.447189402076621e-08, "loss": 0.0, "step": 15424 }, { "epoch": 0.9940710188825159, "grad_norm": 0.007293988964356481, "learning_rate": 7.375581811672038e-08, "loss": 0.0, "step": 15425 }, { "epoch": 0.9941354643294451, "grad_norm": 0.5071361732953923, "learning_rate": 7.303974221267456e-08, "loss": 0.0024, "step": 15426 }, { "epoch": 0.9941999097763743, "grad_norm": 0.004333334066827409, "learning_rate": 7.232366630862872e-08, "loss": 0.0, "step": 15427 }, { "epoch": 0.9942643552233035, "grad_norm": 0.017363078925581327, "learning_rate": 7.160759040458289e-08, "loss": 0.0, "step": 15428 }, { "epoch": 0.9943288006702327, "grad_norm": 0.0011615992960153968, "learning_rate": 7.089151450053705e-08, "loss": 0.0, "step": 15429 }, { "epoch": 0.9943932461171618, "grad_norm": 0.0003038760653883744, "learning_rate": 7.017543859649123e-08, "loss": 0.0, "step": 15430 }, { "epoch": 0.994457691564091, "grad_norm": 0.000252489032190593, "learning_rate": 6.94593626924454e-08, "loss": 0.0, "step": 15431 }, { "epoch": 0.9945221370110202, "grad_norm": 0.0002281339806080607, "learning_rate": 6.874328678839958e-08, "loss": 0.0, "step": 15432 }, { "epoch": 0.9945865824579494, "grad_norm": 0.0021669236006270874, "learning_rate": 6.802721088435375e-08, "loss": 0.0, "step": 15433 }, { "epoch": 0.9946510279048785, "grad_norm": 0.0035040173439457962, "learning_rate": 6.731113498030793e-08, "loss": 0.0, "step": 15434 }, { "epoch": 0.9947154733518077, "grad_norm": 4.2521529407209614e-05, "learning_rate": 6.659505907626209e-08, "loss": 0.0, "step": 15435 }, { "epoch": 0.9947799187987368, "grad_norm": 0.0026709807723655685, "learning_rate": 6.587898317221626e-08, "loss": 0.0, "step": 15436 }, { "epoch": 0.994844364245666, "grad_norm": 0.010820683146318837, "learning_rate": 6.516290726817043e-08, "loss": 0.0, "step": 15437 }, { "epoch": 0.9949088096925952, "grad_norm": 0.003205757880112345, "learning_rate": 6.44468313641246e-08, "loss": 0.0, "step": 15438 }, { "epoch": 0.9949732551395244, "grad_norm": 0.0005007387307297139, "learning_rate": 6.373075546007877e-08, "loss": 0.0, "step": 15439 }, { "epoch": 0.9950377005864536, "grad_norm": 0.046509660126946864, "learning_rate": 6.301467955603295e-08, "loss": 0.0003, "step": 15440 }, { "epoch": 0.9951021460333828, "grad_norm": 0.013549052359340931, "learning_rate": 6.229860365198712e-08, "loss": 0.0, "step": 15441 }, { "epoch": 0.995166591480312, "grad_norm": 0.009939784287012338, "learning_rate": 6.158252774794128e-08, "loss": 0.0001, "step": 15442 }, { "epoch": 0.9952310369272411, "grad_norm": 0.002741087693011581, "learning_rate": 6.086645184389546e-08, "loss": 0.0, "step": 15443 }, { "epoch": 0.9952954823741703, "grad_norm": 0.006260263699628107, "learning_rate": 6.015037593984963e-08, "loss": 0.0, "step": 15444 }, { "epoch": 0.9953599278210994, "grad_norm": 0.00049759231216585, "learning_rate": 5.94343000358038e-08, "loss": 0.0, "step": 15445 }, { "epoch": 0.9954243732680286, "grad_norm": 0.1380881394314596, "learning_rate": 5.8718224131757975e-08, "loss": 0.0003, "step": 15446 }, { "epoch": 0.9954888187149578, "grad_norm": 0.006507125813773554, "learning_rate": 5.800214822771214e-08, "loss": 0.0, "step": 15447 }, { "epoch": 0.9955532641618869, "grad_norm": 0.007913763476941546, "learning_rate": 5.7286072323666315e-08, "loss": 0.0, "step": 15448 }, { "epoch": 0.9956177096088161, "grad_norm": 0.0058919833109365456, "learning_rate": 5.656999641962049e-08, "loss": 0.0, "step": 15449 }, { "epoch": 0.9956821550557453, "grad_norm": 0.042774442493267895, "learning_rate": 5.5853920515574654e-08, "loss": 0.0002, "step": 15450 }, { "epoch": 0.9957466005026745, "grad_norm": 0.0007127180380731889, "learning_rate": 5.513784461152883e-08, "loss": 0.0, "step": 15451 }, { "epoch": 0.9958110459496037, "grad_norm": 0.1438407771996523, "learning_rate": 5.4421768707482993e-08, "loss": 0.0006, "step": 15452 }, { "epoch": 0.9958754913965329, "grad_norm": 0.0007669071129745034, "learning_rate": 5.3705692803437166e-08, "loss": 0.0, "step": 15453 }, { "epoch": 0.995939936843462, "grad_norm": 0.0002747317806881671, "learning_rate": 5.298961689939134e-08, "loss": 0.0, "step": 15454 }, { "epoch": 0.9960043822903912, "grad_norm": 5.1187894086526105e-05, "learning_rate": 5.227354099534551e-08, "loss": 0.0, "step": 15455 }, { "epoch": 0.9960688277373203, "grad_norm": 0.19142032104464268, "learning_rate": 5.155746509129968e-08, "loss": 0.0006, "step": 15456 }, { "epoch": 0.9961332731842495, "grad_norm": 0.6594525034042554, "learning_rate": 5.084138918725385e-08, "loss": 0.0018, "step": 15457 }, { "epoch": 0.9961977186311787, "grad_norm": 0.0003211975571060089, "learning_rate": 5.0125313283208025e-08, "loss": 0.0, "step": 15458 }, { "epoch": 0.9962621640781079, "grad_norm": 0.0012403521668988536, "learning_rate": 4.94092373791622e-08, "loss": 0.0, "step": 15459 }, { "epoch": 0.996326609525037, "grad_norm": 0.038828634005938584, "learning_rate": 4.8693161475116364e-08, "loss": 0.0001, "step": 15460 }, { "epoch": 0.9963910549719662, "grad_norm": 0.0019163043650427354, "learning_rate": 4.797708557107054e-08, "loss": 0.0, "step": 15461 }, { "epoch": 0.9964555004188954, "grad_norm": 0.006335604384163084, "learning_rate": 4.72610096670247e-08, "loss": 0.0, "step": 15462 }, { "epoch": 0.9965199458658246, "grad_norm": 0.0015440385523529667, "learning_rate": 4.654493376297888e-08, "loss": 0.0, "step": 15463 }, { "epoch": 0.9965843913127538, "grad_norm": 0.000717511672539573, "learning_rate": 4.582885785893305e-08, "loss": 0.0, "step": 15464 }, { "epoch": 0.996648836759683, "grad_norm": 0.00018937198611685636, "learning_rate": 4.511278195488722e-08, "loss": 0.0, "step": 15465 }, { "epoch": 0.9967132822066122, "grad_norm": 0.0010951535217054588, "learning_rate": 4.439670605084139e-08, "loss": 0.0, "step": 15466 }, { "epoch": 0.9967777276535412, "grad_norm": 0.038216985921274704, "learning_rate": 4.368063014679557e-08, "loss": 0.0, "step": 15467 }, { "epoch": 0.9968421731004704, "grad_norm": 0.001310616487340104, "learning_rate": 4.2964554242749734e-08, "loss": 0.0, "step": 15468 }, { "epoch": 0.9969066185473996, "grad_norm": 0.006292524679154751, "learning_rate": 4.224847833870391e-08, "loss": 0.0001, "step": 15469 }, { "epoch": 0.9969710639943288, "grad_norm": 0.000838499974695811, "learning_rate": 4.1532402434658074e-08, "loss": 0.0, "step": 15470 }, { "epoch": 0.997035509441258, "grad_norm": 0.0010050876167897022, "learning_rate": 4.0816326530612253e-08, "loss": 0.0, "step": 15471 }, { "epoch": 0.9970999548881871, "grad_norm": 0.005370776582966026, "learning_rate": 4.010025062656642e-08, "loss": 0.0, "step": 15472 }, { "epoch": 0.9971644003351163, "grad_norm": 0.0003092987419902564, "learning_rate": 3.938417472252059e-08, "loss": 0.0, "step": 15473 }, { "epoch": 0.9972288457820455, "grad_norm": 0.0013591428298022192, "learning_rate": 3.866809881847476e-08, "loss": 0.0, "step": 15474 }, { "epoch": 0.9972932912289747, "grad_norm": 0.23138436589381467, "learning_rate": 3.795202291442893e-08, "loss": 0.0018, "step": 15475 }, { "epoch": 0.9973577366759039, "grad_norm": 0.001873350951168154, "learning_rate": 3.7235947010383105e-08, "loss": 0.0, "step": 15476 }, { "epoch": 0.9974221821228331, "grad_norm": 0.004598840045965651, "learning_rate": 3.651987110633728e-08, "loss": 0.0, "step": 15477 }, { "epoch": 0.9974866275697621, "grad_norm": 0.005085088395541839, "learning_rate": 3.5803795202291444e-08, "loss": 0.0, "step": 15478 }, { "epoch": 0.9975510730166913, "grad_norm": 0.001091900765744506, "learning_rate": 3.508771929824562e-08, "loss": 0.0, "step": 15479 }, { "epoch": 0.9976155184636205, "grad_norm": 0.0009730691764594346, "learning_rate": 3.437164339419979e-08, "loss": 0.0, "step": 15480 }, { "epoch": 0.9976799639105497, "grad_norm": 0.0017135795294502306, "learning_rate": 3.365556749015396e-08, "loss": 0.0, "step": 15481 }, { "epoch": 0.9977444093574789, "grad_norm": 0.01811945222196252, "learning_rate": 3.293949158610813e-08, "loss": 0.0002, "step": 15482 }, { "epoch": 0.9978088548044081, "grad_norm": 0.009926933709437718, "learning_rate": 3.22234156820623e-08, "loss": 0.0, "step": 15483 }, { "epoch": 0.9978733002513372, "grad_norm": 0.0017775873375116566, "learning_rate": 3.1507339778016475e-08, "loss": 0.0, "step": 15484 }, { "epoch": 0.9979377456982664, "grad_norm": 0.0006008194079299575, "learning_rate": 3.079126387397064e-08, "loss": 0.0, "step": 15485 }, { "epoch": 0.9980021911451956, "grad_norm": 0.00021254665315161218, "learning_rate": 3.0075187969924815e-08, "loss": 0.0, "step": 15486 }, { "epoch": 0.9980666365921248, "grad_norm": 0.00026559116747325334, "learning_rate": 2.9359112065878988e-08, "loss": 0.0, "step": 15487 }, { "epoch": 0.998131082039054, "grad_norm": 0.7161213032538332, "learning_rate": 2.8643036161833157e-08, "loss": 0.0038, "step": 15488 }, { "epoch": 0.9981955274859832, "grad_norm": 0.00048493262049720823, "learning_rate": 2.7926960257787327e-08, "loss": 0.0, "step": 15489 }, { "epoch": 0.9982599729329122, "grad_norm": 0.012352251282173892, "learning_rate": 2.7210884353741497e-08, "loss": 0.0, "step": 15490 }, { "epoch": 0.9983244183798414, "grad_norm": 0.00370486564736607, "learning_rate": 2.649480844969567e-08, "loss": 0.0, "step": 15491 }, { "epoch": 0.9983888638267706, "grad_norm": 0.0036452617386337023, "learning_rate": 2.577873254564984e-08, "loss": 0.0, "step": 15492 }, { "epoch": 0.9984533092736998, "grad_norm": 0.1567663838989723, "learning_rate": 2.5062656641604012e-08, "loss": 0.0002, "step": 15493 }, { "epoch": 0.998517754720629, "grad_norm": 0.0005995450102191095, "learning_rate": 2.4346580737558182e-08, "loss": 0.0, "step": 15494 }, { "epoch": 0.9985822001675582, "grad_norm": 0.000392582037022992, "learning_rate": 2.363050483351235e-08, "loss": 0.0, "step": 15495 }, { "epoch": 0.9986466456144873, "grad_norm": 0.0009271351805534993, "learning_rate": 2.2914428929466525e-08, "loss": 0.0, "step": 15496 }, { "epoch": 0.9987110910614165, "grad_norm": 0.00014936378110164797, "learning_rate": 2.2198353025420694e-08, "loss": 0.0, "step": 15497 }, { "epoch": 0.9987755365083457, "grad_norm": 0.00043512063478467827, "learning_rate": 2.1482277121374867e-08, "loss": 0.0, "step": 15498 }, { "epoch": 0.9988399819552749, "grad_norm": 0.09968455133142981, "learning_rate": 2.0766201217329037e-08, "loss": 0.0003, "step": 15499 }, { "epoch": 0.9989044274022041, "grad_norm": 0.00020179967403577038, "learning_rate": 2.005012531328321e-08, "loss": 0.0, "step": 15500 }, { "epoch": 0.9989688728491332, "grad_norm": 0.04932118147213376, "learning_rate": 1.933404940923738e-08, "loss": 0.0001, "step": 15501 }, { "epoch": 0.9990333182960623, "grad_norm": 0.021431060225883328, "learning_rate": 1.8617973505191552e-08, "loss": 0.0001, "step": 15502 }, { "epoch": 0.9990977637429915, "grad_norm": 2.3081529524238576e-05, "learning_rate": 1.7901897601145722e-08, "loss": 0.0, "step": 15503 }, { "epoch": 0.9991622091899207, "grad_norm": 0.0006019946843029049, "learning_rate": 1.7185821697099895e-08, "loss": 0.0, "step": 15504 }, { "epoch": 0.9992266546368499, "grad_norm": 0.00010010075275028686, "learning_rate": 1.6469745793054065e-08, "loss": 0.0, "step": 15505 }, { "epoch": 0.9992911000837791, "grad_norm": 0.004928500843443072, "learning_rate": 1.5753669889008238e-08, "loss": 0.0, "step": 15506 }, { "epoch": 0.9993555455307083, "grad_norm": 0.0028870445087702625, "learning_rate": 1.5037593984962407e-08, "loss": 0.0, "step": 15507 }, { "epoch": 0.9994199909776375, "grad_norm": 1.111107454028999, "learning_rate": 1.4321518080916579e-08, "loss": 0.0037, "step": 15508 }, { "epoch": 0.9994844364245666, "grad_norm": 0.006669760087007271, "learning_rate": 1.3605442176870748e-08, "loss": 0.0001, "step": 15509 }, { "epoch": 0.9995488818714958, "grad_norm": 0.007184647860438429, "learning_rate": 1.288936627282492e-08, "loss": 0.0, "step": 15510 }, { "epoch": 0.999613327318425, "grad_norm": 0.000263493157723586, "learning_rate": 1.2173290368779091e-08, "loss": 0.0, "step": 15511 }, { "epoch": 0.9996777727653541, "grad_norm": 0.019085772562670834, "learning_rate": 1.1457214464733262e-08, "loss": 0.0, "step": 15512 }, { "epoch": 0.9997422182122833, "grad_norm": 0.12087096979193869, "learning_rate": 1.0741138560687434e-08, "loss": 0.0003, "step": 15513 }, { "epoch": 0.9998066636592124, "grad_norm": 0.04529828473437381, "learning_rate": 1.0025062656641605e-08, "loss": 0.0, "step": 15514 }, { "epoch": 0.9998711091061416, "grad_norm": 8.925880633125442e-05, "learning_rate": 9.308986752595776e-09, "loss": 0.0, "step": 15515 }, { "epoch": 0.9999355545530708, "grad_norm": 0.0003679149024459179, "learning_rate": 8.592910848549948e-09, "loss": 0.0, "step": 15516 }, { "epoch": 1.0, "grad_norm": 0.2631523948960553, "learning_rate": 7.876834944504119e-09, "loss": 0.0003, "step": 15517 } ], "logging_steps": 1.0, "max_steps": 15517, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 12, "trial_name": null, "trial_params": null }