{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8599068434252956, "eval_steps": 200, "global_step": 14400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.9715753015645525e-05, "grad_norm": 32.21074295043945, "learning_rate": 0.0, "loss": 1.5278, "step": 1 }, { "epoch": 0.00011943150603129105, "grad_norm": 38.61150360107422, "learning_rate": 9.337115778813537e-07, "loss": 1.5781, "step": 2 }, { "epoch": 0.00017914725904693657, "grad_norm": 38.57005310058594, "learning_rate": 1.4798978374311272e-06, "loss": 1.5591, "step": 3 }, { "epoch": 0.0002388630120625821, "grad_norm": 32.119728088378906, "learning_rate": 1.8674231557627074e-06, "loss": 1.5327, "step": 4 }, { "epoch": 0.00029857876507822763, "grad_norm": 31.805309295654297, "learning_rate": 2.1680111452043246e-06, "loss": 1.5817, "step": 5 }, { "epoch": 0.00035829451809387314, "grad_norm": 12.78921127319336, "learning_rate": 2.4136094153124806e-06, "loss": 1.3793, "step": 6 }, { "epoch": 0.0004180102711095187, "grad_norm": 15.423554420471191, "learning_rate": 2.6212597939473906e-06, "loss": 1.3628, "step": 7 }, { "epoch": 0.0004777260241251642, "grad_norm": 13.241992950439453, "learning_rate": 2.801134733644061e-06, "loss": 1.2432, "step": 8 }, { "epoch": 0.0005374417771408098, "grad_norm": 17.152511596679688, "learning_rate": 2.9597956748622543e-06, "loss": 1.1936, "step": 9 }, { "epoch": 0.0005971575301564553, "grad_norm": 13.746131896972656, "learning_rate": 3.101722723085679e-06, "loss": 1.2378, "step": 10 }, { "epoch": 0.0006568732831721008, "grad_norm": 15.981043815612793, "learning_rate": 3.2301113552104767e-06, "loss": 1.1957, "step": 11 }, { "epoch": 0.0007165890361877463, "grad_norm": 15.861040115356445, "learning_rate": 3.3473209931938345e-06, "loss": 1.2263, "step": 12 }, { "epoch": 0.0007763047892033919, "grad_norm": 15.734783172607422, "learning_rate": 3.455143408080351e-06, "loss": 1.213, "step": 13 }, { "epoch": 0.0008360205422190374, "grad_norm": 15.07627010345459, "learning_rate": 3.5549713718287437e-06, "loss": 1.1921, "step": 14 }, { "epoch": 0.0008957362952346829, "grad_norm": 7.80892276763916, "learning_rate": 3.6479089826354518e-06, "loss": 1.1508, "step": 15 }, { "epoch": 0.0009554520482503284, "grad_norm": 4.8491010665893555, "learning_rate": 3.7348463115254147e-06, "loss": 1.1537, "step": 16 }, { "epoch": 0.001015167801265974, "grad_norm": 4.5702104568481445, "learning_rate": 3.816511379035256e-06, "loss": 1.1461, "step": 17 }, { "epoch": 0.0010748835542816195, "grad_norm": 4.6307244300842285, "learning_rate": 3.893507252743607e-06, "loss": 1.1388, "step": 18 }, { "epoch": 0.001134599307297265, "grad_norm": 3.4550459384918213, "learning_rate": 3.966339101303026e-06, "loss": 1.176, "step": 19 }, { "epoch": 0.0011943150603129105, "grad_norm": 4.925933361053467, "learning_rate": 4.035434300967032e-06, "loss": 1.1751, "step": 20 }, { "epoch": 0.0012540308133285561, "grad_norm": 6.368457317352295, "learning_rate": 4.101157631378517e-06, "loss": 1.1796, "step": 21 }, { "epoch": 0.0013137465663442015, "grad_norm": 3.323585271835327, "learning_rate": 4.163822933091831e-06, "loss": 1.1517, "step": 22 }, { "epoch": 0.0013734623193598472, "grad_norm": 4.026809215545654, "learning_rate": 4.223702171634056e-06, "loss": 1.1048, "step": 23 }, { "epoch": 0.0014331780723754925, "grad_norm": 3.38273549079895, "learning_rate": 4.2810325710751884e-06, "loss": 1.1101, "step": 24 }, { "epoch": 0.0014928938253911382, "grad_norm": 4.450174808502197, "learning_rate": 4.336022290408649e-06, "loss": 1.1438, "step": 25 }, { "epoch": 0.0015526095784067838, "grad_norm": 3.7922277450561523, "learning_rate": 4.388854985961705e-06, "loss": 1.1284, "step": 26 }, { "epoch": 0.0016123253314224292, "grad_norm": 3.479353189468384, "learning_rate": 4.439693512293381e-06, "loss": 1.1569, "step": 27 }, { "epoch": 0.0016720410844380748, "grad_norm": 3.0619735717773438, "learning_rate": 4.488682949710097e-06, "loss": 1.1292, "step": 28 }, { "epoch": 0.0017317568374537204, "grad_norm": 3.782655954360962, "learning_rate": 4.535953100278194e-06, "loss": 1.1633, "step": 29 }, { "epoch": 0.0017914725904693658, "grad_norm": 3.234083414077759, "learning_rate": 4.581620560516806e-06, "loss": 1.0988, "step": 30 }, { "epoch": 0.0018511883434850114, "grad_norm": 3.227975368499756, "learning_rate": 4.62579045410531e-06, "loss": 1.1075, "step": 31 }, { "epoch": 0.0019109040965006568, "grad_norm": 5.8864970207214355, "learning_rate": 4.668557889406769e-06, "loss": 1.1389, "step": 32 }, { "epoch": 0.001970619849516302, "grad_norm": 3.2359490394592285, "learning_rate": 4.710009192641604e-06, "loss": 1.0905, "step": 33 }, { "epoch": 0.002030335602531948, "grad_norm": 8.200643539428711, "learning_rate": 4.75022295691661e-06, "loss": 1.1035, "step": 34 }, { "epoch": 0.0020900513555475934, "grad_norm": 3.1254472732543945, "learning_rate": 4.789270939151715e-06, "loss": 1.0669, "step": 35 }, { "epoch": 0.002149767108563239, "grad_norm": 2.961514711380005, "learning_rate": 4.827218830624961e-06, "loss": 1.1255, "step": 36 }, { "epoch": 0.0022094828615788847, "grad_norm": 5.060825824737549, "learning_rate": 4.864126921920735e-06, "loss": 1.0952, "step": 37 }, { "epoch": 0.00226919861459453, "grad_norm": 7.455005645751953, "learning_rate": 4.900050679184379e-06, "loss": 1.1152, "step": 38 }, { "epoch": 0.0023289143676101754, "grad_norm": 3.5833842754364014, "learning_rate": 4.935041245511478e-06, "loss": 1.0955, "step": 39 }, { "epoch": 0.002388630120625821, "grad_norm": 4.333029270172119, "learning_rate": 4.969145878848386e-06, "loss": 1.0712, "step": 40 }, { "epoch": 0.0024483458736414667, "grad_norm": 5.018174648284912, "learning_rate": 5.002408335813361e-06, "loss": 1.0945, "step": 41 }, { "epoch": 0.0025080616266571123, "grad_norm": 4.46086311340332, "learning_rate": 5.034869209259872e-06, "loss": 1.1312, "step": 42 }, { "epoch": 0.0025677773796727575, "grad_norm": 2.7651569843292236, "learning_rate": 5.066566226114873e-06, "loss": 1.0999, "step": 43 }, { "epoch": 0.002627493132688403, "grad_norm": 5.622435092926025, "learning_rate": 5.097534510973183e-06, "loss": 1.0699, "step": 44 }, { "epoch": 0.0026872088857040487, "grad_norm": 3.112889289855957, "learning_rate": 5.1278068200665785e-06, "loss": 1.0799, "step": 45 }, { "epoch": 0.0027469246387196943, "grad_norm": 2.7961902618408203, "learning_rate": 5.157413749515411e-06, "loss": 1.0848, "step": 46 }, { "epoch": 0.00280664039173534, "grad_norm": 2.8306519985198975, "learning_rate": 5.186383921182103e-06, "loss": 1.0618, "step": 47 }, { "epoch": 0.002866356144750985, "grad_norm": 4.200173377990723, "learning_rate": 5.214744148956542e-06, "loss": 1.0976, "step": 48 }, { "epoch": 0.0029260718977666307, "grad_norm": 9.786463737487793, "learning_rate": 5.242519587894781e-06, "loss": 1.0936, "step": 49 }, { "epoch": 0.0029857876507822763, "grad_norm": 3.896644115447998, "learning_rate": 5.269733868290003e-06, "loss": 1.0851, "step": 50 }, { "epoch": 0.003045503403797922, "grad_norm": 3.0368170738220215, "learning_rate": 5.2964092164663825e-06, "loss": 1.071, "step": 51 }, { "epoch": 0.0031052191568135675, "grad_norm": 2.436109781265259, "learning_rate": 5.322566563843059e-06, "loss": 1.0654, "step": 52 }, { "epoch": 0.003164934909829213, "grad_norm": 2.6994972229003906, "learning_rate": 5.348225645609087e-06, "loss": 1.0131, "step": 53 }, { "epoch": 0.0032246506628448583, "grad_norm": 2.8595664501190186, "learning_rate": 5.373405090174735e-06, "loss": 1.1269, "step": 54 }, { "epoch": 0.003284366415860504, "grad_norm": 3.853848457336426, "learning_rate": 5.398122500414802e-06, "loss": 1.0371, "step": 55 }, { "epoch": 0.0033440821688761496, "grad_norm": 3.3257908821105957, "learning_rate": 5.422394527591453e-06, "loss": 1.08, "step": 56 }, { "epoch": 0.003403797921891795, "grad_norm": 2.9927568435668945, "learning_rate": 5.446236938734153e-06, "loss": 1.1156, "step": 57 }, { "epoch": 0.003463513674907441, "grad_norm": 2.756293773651123, "learning_rate": 5.4696646781595474e-06, "loss": 1.0557, "step": 58 }, { "epoch": 0.003523229427923086, "grad_norm": 2.771888494491577, "learning_rate": 5.492691923732424e-06, "loss": 1.0475, "step": 59 }, { "epoch": 0.0035829451809387316, "grad_norm": 2.9528775215148926, "learning_rate": 5.515332138398159e-06, "loss": 1.0747, "step": 60 }, { "epoch": 0.003642660933954377, "grad_norm": 5.733924388885498, "learning_rate": 5.537598117455701e-06, "loss": 1.027, "step": 61 }, { "epoch": 0.003702376686970023, "grad_norm": 2.9885594844818115, "learning_rate": 5.559502031986665e-06, "loss": 1.0358, "step": 62 }, { "epoch": 0.0037620924399856684, "grad_norm": 3.4102060794830322, "learning_rate": 5.581055468809644e-06, "loss": 1.0352, "step": 63 }, { "epoch": 0.0038218081930013136, "grad_norm": 3.107546329498291, "learning_rate": 5.602269467288122e-06, "loss": 1.0613, "step": 64 }, { "epoch": 0.0038815239460169592, "grad_norm": 3.1059072017669678, "learning_rate": 5.6231545532846755e-06, "loss": 1.0179, "step": 65 }, { "epoch": 0.003941239699032604, "grad_norm": 8.351855278015137, "learning_rate": 5.643720770522957e-06, "loss": 1.0646, "step": 66 }, { "epoch": 0.00400095545204825, "grad_norm": 3.3199243545532227, "learning_rate": 5.663977709591351e-06, "loss": 1.0447, "step": 67 }, { "epoch": 0.004060671205063896, "grad_norm": 2.7105557918548584, "learning_rate": 5.6839345347979635e-06, "loss": 1.0482, "step": 68 }, { "epoch": 0.004120386958079541, "grad_norm": 2.873178720474243, "learning_rate": 5.7036000090651845e-06, "loss": 1.0145, "step": 69 }, { "epoch": 0.004180102711095187, "grad_norm": 3.4067234992980957, "learning_rate": 5.72298251703307e-06, "loss": 1.076, "step": 70 }, { "epoch": 0.0042398184641108325, "grad_norm": 3.7690725326538086, "learning_rate": 5.742090086524027e-06, "loss": 1.0574, "step": 71 }, { "epoch": 0.004299534217126478, "grad_norm": 2.3073956966400146, "learning_rate": 5.760930408506315e-06, "loss": 1.0379, "step": 72 }, { "epoch": 0.004359249970142124, "grad_norm": 3.255676746368408, "learning_rate": 5.779510855680616e-06, "loss": 1.0519, "step": 73 }, { "epoch": 0.004418965723157769, "grad_norm": 5.264045238494873, "learning_rate": 5.79783849980209e-06, "loss": 1.0686, "step": 74 }, { "epoch": 0.004478681476173415, "grad_norm": 3.7316696643829346, "learning_rate": 5.815920127839776e-06, "loss": 1.0871, "step": 75 }, { "epoch": 0.00453839722918906, "grad_norm": 3.095223903656006, "learning_rate": 5.833762257065734e-06, "loss": 1.0433, "step": 76 }, { "epoch": 0.004598112982204705, "grad_norm": 3.8971028327941895, "learning_rate": 5.851371149157867e-06, "loss": 1.0135, "step": 77 }, { "epoch": 0.004657828735220351, "grad_norm": 3.654733896255493, "learning_rate": 5.868752823392831e-06, "loss": 1.0427, "step": 78 }, { "epoch": 0.0047175444882359965, "grad_norm": 3.2815909385681152, "learning_rate": 5.885913068998544e-06, "loss": 1.0498, "step": 79 }, { "epoch": 0.004777260241251642, "grad_norm": 2.6778993606567383, "learning_rate": 5.902857456729739e-06, "loss": 1.0653, "step": 80 }, { "epoch": 0.004836975994267288, "grad_norm": 2.1121997833251953, "learning_rate": 5.919591349724509e-06, "loss": 1.0337, "step": 81 }, { "epoch": 0.004896691747282933, "grad_norm": 3.412590742111206, "learning_rate": 5.936119913694715e-06, "loss": 1.0556, "step": 82 }, { "epoch": 0.004956407500298579, "grad_norm": 2.5626962184906006, "learning_rate": 5.952448126498786e-06, "loss": 1.0673, "step": 83 }, { "epoch": 0.005016123253314225, "grad_norm": 3.059201240539551, "learning_rate": 5.968580787141225e-06, "loss": 1.0411, "step": 84 }, { "epoch": 0.00507583900632987, "grad_norm": 2.727278709411621, "learning_rate": 5.984522524239581e-06, "loss": 1.0061, "step": 85 }, { "epoch": 0.005135554759345515, "grad_norm": 4.521495342254639, "learning_rate": 6.000277803996226e-06, "loss": 1.0436, "step": 86 }, { "epoch": 0.0051952705123611605, "grad_norm": 2.323612689971924, "learning_rate": 6.015850937709321e-06, "loss": 1.0083, "step": 87 }, { "epoch": 0.005254986265376806, "grad_norm": 5.319992542266846, "learning_rate": 6.031246088854539e-06, "loss": 1.096, "step": 88 }, { "epoch": 0.005314702018392452, "grad_norm": 2.646199941635132, "learning_rate": 6.046467279766668e-06, "loss": 1.0598, "step": 89 }, { "epoch": 0.005374417771408097, "grad_norm": 2.8894498348236084, "learning_rate": 6.061518397947933e-06, "loss": 1.0263, "step": 90 }, { "epoch": 0.005434133524423743, "grad_norm": 3.7693397998809814, "learning_rate": 6.076403202027741e-06, "loss": 1.0198, "step": 91 }, { "epoch": 0.005493849277439389, "grad_norm": 2.3541369438171387, "learning_rate": 6.091125327396764e-06, "loss": 1.0565, "step": 92 }, { "epoch": 0.005553565030455034, "grad_norm": 9.511266708374023, "learning_rate": 6.105688291536438e-06, "loss": 1.0481, "step": 93 }, { "epoch": 0.00561328078347068, "grad_norm": 2.4397897720336914, "learning_rate": 6.120095499063458e-06, "loss": 0.9958, "step": 94 }, { "epoch": 0.0056729965364863254, "grad_norm": 3.9088056087493896, "learning_rate": 6.13435024650735e-06, "loss": 1.0621, "step": 95 }, { "epoch": 0.00573271228950197, "grad_norm": 2.3829634189605713, "learning_rate": 6.148455726837895e-06, "loss": 1.0489, "step": 96 }, { "epoch": 0.005792428042517616, "grad_norm": 3.2721362113952637, "learning_rate": 6.162415033757953e-06, "loss": 1.0455, "step": 97 }, { "epoch": 0.005852143795533261, "grad_norm": 2.9941048622131348, "learning_rate": 6.176231165776135e-06, "loss": 1.0503, "step": 98 }, { "epoch": 0.005911859548548907, "grad_norm": 2.965546131134033, "learning_rate": 6.189907030072731e-06, "loss": 1.0372, "step": 99 }, { "epoch": 0.005971575301564553, "grad_norm": 4.034855365753174, "learning_rate": 6.203445446171358e-06, "loss": 1.0547, "step": 100 }, { "epoch": 0.006031291054580198, "grad_norm": 2.2606773376464844, "learning_rate": 6.216849149427927e-06, "loss": 1.0399, "step": 101 }, { "epoch": 0.006091006807595844, "grad_norm": 2.798466682434082, "learning_rate": 6.230120794347736e-06, "loss": 1.0875, "step": 102 }, { "epoch": 0.0061507225606114895, "grad_norm": 2.5712764263153076, "learning_rate": 6.243262957740747e-06, "loss": 1.0359, "step": 103 }, { "epoch": 0.006210438313627135, "grad_norm": 3.465635299682617, "learning_rate": 6.256278141724412e-06, "loss": 0.9885, "step": 104 }, { "epoch": 0.006270154066642781, "grad_norm": 3.483391761779785, "learning_rate": 6.269168776582841e-06, "loss": 1.0617, "step": 105 }, { "epoch": 0.006329869819658426, "grad_norm": 4.536008358001709, "learning_rate": 6.28193722349044e-06, "loss": 1.0808, "step": 106 }, { "epoch": 0.006389585572674071, "grad_norm": 3.286954402923584, "learning_rate": 6.294585777107669e-06, "loss": 0.9999, "step": 107 }, { "epoch": 0.006449301325689717, "grad_norm": 6.524769306182861, "learning_rate": 6.307116668056089e-06, "loss": 1.0502, "step": 108 }, { "epoch": 0.006509017078705362, "grad_norm": 2.6852731704711914, "learning_rate": 6.319532065279308e-06, "loss": 1.0091, "step": 109 }, { "epoch": 0.006568732831721008, "grad_norm": 3.1480157375335693, "learning_rate": 6.331834078296155e-06, "loss": 1.0326, "step": 110 }, { "epoch": 0.0066284485847366535, "grad_norm": 3.0494842529296875, "learning_rate": 6.344024759351862e-06, "loss": 1.0594, "step": 111 }, { "epoch": 0.006688164337752299, "grad_norm": 4.797796249389648, "learning_rate": 6.356106105472805e-06, "loss": 1.0521, "step": 112 }, { "epoch": 0.006747880090767945, "grad_norm": 8.146347045898438, "learning_rate": 6.3680800604299e-06, "loss": 1.0598, "step": 113 }, { "epoch": 0.00680759584378359, "grad_norm": 2.3200063705444336, "learning_rate": 6.379948516615507e-06, "loss": 1.0001, "step": 114 }, { "epoch": 0.006867311596799236, "grad_norm": 2.996988296508789, "learning_rate": 6.391713316838382e-06, "loss": 1.029, "step": 115 }, { "epoch": 0.006927027349814882, "grad_norm": 3.113248348236084, "learning_rate": 6.403376256040902e-06, "loss": 1.0377, "step": 116 }, { "epoch": 0.006986743102830526, "grad_norm": 2.273139238357544, "learning_rate": 6.414939082942605e-06, "loss": 1.0251, "step": 117 }, { "epoch": 0.007046458855846172, "grad_norm": 2.74187970161438, "learning_rate": 6.426403501613777e-06, "loss": 1.0049, "step": 118 }, { "epoch": 0.0071061746088618176, "grad_norm": 2.350370407104492, "learning_rate": 6.437771172982647e-06, "loss": 0.9832, "step": 119 }, { "epoch": 0.007165890361877463, "grad_norm": 3.669543981552124, "learning_rate": 6.449043716279513e-06, "loss": 1.0367, "step": 120 }, { "epoch": 0.007225606114893109, "grad_norm": 2.7195417881011963, "learning_rate": 6.460222710420953e-06, "loss": 1.0233, "step": 121 }, { "epoch": 0.007285321867908754, "grad_norm": 3.425628185272217, "learning_rate": 6.471309695337056e-06, "loss": 1.0212, "step": 122 }, { "epoch": 0.0073450376209244, "grad_norm": 3.0846478939056396, "learning_rate": 6.482306173244487e-06, "loss": 1.0508, "step": 123 }, { "epoch": 0.007404753373940046, "grad_norm": 3.4890575408935547, "learning_rate": 6.493213609868018e-06, "loss": 1.0373, "step": 124 }, { "epoch": 0.007464469126955691, "grad_norm": 3.7357869148254395, "learning_rate": 6.504033435612975e-06, "loss": 1.0537, "step": 125 }, { "epoch": 0.007524184879971337, "grad_norm": 2.9401843547821045, "learning_rate": 6.5147670466909984e-06, "loss": 1.0336, "step": 126 }, { "epoch": 0.007583900632986982, "grad_norm": 2.3169431686401367, "learning_rate": 6.5254158062012925e-06, "loss": 1.0185, "step": 127 }, { "epoch": 0.007643616386002627, "grad_norm": 2.4942686557769775, "learning_rate": 6.535981045169476e-06, "loss": 1.0374, "step": 128 }, { "epoch": 0.007703332139018273, "grad_norm": 3.6409366130828857, "learning_rate": 6.546464063546e-06, "loss": 1.0416, "step": 129 }, { "epoch": 0.0077630478920339184, "grad_norm": 2.786487102508545, "learning_rate": 6.556866131166029e-06, "loss": 0.9847, "step": 130 }, { "epoch": 0.007822763645049564, "grad_norm": 2.9382588863372803, "learning_rate": 6.567188488672539e-06, "loss": 1.0019, "step": 131 }, { "epoch": 0.007882479398065209, "grad_norm": 3.7288389205932617, "learning_rate": 6.57743234840431e-06, "loss": 1.0383, "step": 132 }, { "epoch": 0.007942195151080855, "grad_norm": 3.6087450981140137, "learning_rate": 6.587598895250417e-06, "loss": 1.0153, "step": 133 }, { "epoch": 0.0080019109040965, "grad_norm": 2.3993237018585205, "learning_rate": 6.597689287472704e-06, "loss": 1.0079, "step": 134 }, { "epoch": 0.008061626657112147, "grad_norm": 2.8188302516937256, "learning_rate": 6.607704657497707e-06, "loss": 0.9928, "step": 135 }, { "epoch": 0.008121342410127791, "grad_norm": 3.1258137226104736, "learning_rate": 6.617646112679318e-06, "loss": 1.0288, "step": 136 }, { "epoch": 0.008181058163143438, "grad_norm": 3.1168007850646973, "learning_rate": 6.627514736033546e-06, "loss": 1.0763, "step": 137 }, { "epoch": 0.008240773916159082, "grad_norm": 3.368948221206665, "learning_rate": 6.6373115869465376e-06, "loss": 1.0641, "step": 138 }, { "epoch": 0.008300489669174729, "grad_norm": 2.941513776779175, "learning_rate": 6.6470377018570435e-06, "loss": 1.0806, "step": 139 }, { "epoch": 0.008360205422190374, "grad_norm": 4.717541694641113, "learning_rate": 6.656694094914422e-06, "loss": 1.0222, "step": 140 }, { "epoch": 0.00841992117520602, "grad_norm": 2.8448069095611572, "learning_rate": 6.666281758613231e-06, "loss": 1.0389, "step": 141 }, { "epoch": 0.008479636928221665, "grad_norm": 2.9276270866394043, "learning_rate": 6.6758016644053804e-06, "loss": 1.0116, "step": 142 }, { "epoch": 0.00853935268123731, "grad_norm": 2.7096939086914062, "learning_rate": 6.685254763290827e-06, "loss": 1.0339, "step": 143 }, { "epoch": 0.008599068434252956, "grad_norm": 2.982567548751831, "learning_rate": 6.694641986387669e-06, "loss": 1.0141, "step": 144 }, { "epoch": 0.008658784187268601, "grad_norm": 3.467564344406128, "learning_rate": 6.703964245482519e-06, "loss": 1.0163, "step": 145 }, { "epoch": 0.008718499940284247, "grad_norm": 4.507266044616699, "learning_rate": 6.713222433561969e-06, "loss": 1.0217, "step": 146 }, { "epoch": 0.008778215693299892, "grad_norm": 2.6136057376861572, "learning_rate": 6.722417425325908e-06, "loss": 1.0123, "step": 147 }, { "epoch": 0.008837931446315539, "grad_norm": 2.796966791152954, "learning_rate": 6.731550077683443e-06, "loss": 1.0177, "step": 148 }, { "epoch": 0.008897647199331183, "grad_norm": 2.6477837562561035, "learning_rate": 6.740621230232123e-06, "loss": 1.017, "step": 149 }, { "epoch": 0.00895736295234683, "grad_norm": 2.7025527954101562, "learning_rate": 6.749631705721131e-06, "loss": 1.0107, "step": 150 }, { "epoch": 0.009017078705362475, "grad_norm": 3.882481813430786, "learning_rate": 6.758582310499089e-06, "loss": 1.0242, "step": 151 }, { "epoch": 0.00907679445837812, "grad_norm": 2.8709044456481934, "learning_rate": 6.767473834947087e-06, "loss": 1.0433, "step": 152 }, { "epoch": 0.009136510211393766, "grad_norm": 3.6142451763153076, "learning_rate": 6.776307053897509e-06, "loss": 1.0659, "step": 153 }, { "epoch": 0.00919622596440941, "grad_norm": 2.825897693634033, "learning_rate": 6.785082727039222e-06, "loss": 1.0751, "step": 154 }, { "epoch": 0.009255941717425057, "grad_norm": 3.6420154571533203, "learning_rate": 6.793801599309635e-06, "loss": 1.0242, "step": 155 }, { "epoch": 0.009315657470440702, "grad_norm": 2.320249319076538, "learning_rate": 6.802464401274186e-06, "loss": 1.0183, "step": 156 }, { "epoch": 0.009375373223456348, "grad_norm": 2.044633388519287, "learning_rate": 6.811071849493663e-06, "loss": 1.0261, "step": 157 }, { "epoch": 0.009435088976471993, "grad_norm": 7.49614143371582, "learning_rate": 6.8196246468798975e-06, "loss": 1.048, "step": 158 }, { "epoch": 0.00949480472948764, "grad_norm": 3.086452007293701, "learning_rate": 6.828123483040213e-06, "loss": 1.0682, "step": 159 }, { "epoch": 0.009554520482503284, "grad_norm": 2.392066717147827, "learning_rate": 6.836569034611094e-06, "loss": 1.0534, "step": 160 }, { "epoch": 0.00961423623551893, "grad_norm": 6.393759727478027, "learning_rate": 6.844961965581447e-06, "loss": 1.0426, "step": 161 }, { "epoch": 0.009673951988534575, "grad_norm": 3.3937838077545166, "learning_rate": 6.853302927605862e-06, "loss": 1.0122, "step": 162 }, { "epoch": 0.00973366774155022, "grad_norm": 2.7369203567504883, "learning_rate": 6.8615925603082274e-06, "loss": 1.0283, "step": 163 }, { "epoch": 0.009793383494565867, "grad_norm": 2.51983642578125, "learning_rate": 6.869831491576069e-06, "loss": 1.0153, "step": 164 }, { "epoch": 0.009853099247581511, "grad_norm": 2.558015823364258, "learning_rate": 6.878020337845928e-06, "loss": 1.0757, "step": 165 }, { "epoch": 0.009912815000597158, "grad_norm": 2.432539463043213, "learning_rate": 6.88615970438014e-06, "loss": 1.0189, "step": 166 }, { "epoch": 0.009972530753612803, "grad_norm": 1.9349603652954102, "learning_rate": 6.8942501855352726e-06, "loss": 1.0049, "step": 167 }, { "epoch": 0.01003224650662845, "grad_norm": 3.5335938930511475, "learning_rate": 6.902292365022578e-06, "loss": 1.032, "step": 168 }, { "epoch": 0.010091962259644094, "grad_norm": 2.2587783336639404, "learning_rate": 6.910286816160702e-06, "loss": 1.0548, "step": 169 }, { "epoch": 0.01015167801265974, "grad_norm": 3.6973423957824707, "learning_rate": 6.918234102120935e-06, "loss": 1.0597, "step": 170 }, { "epoch": 0.010211393765675385, "grad_norm": 2.900517225265503, "learning_rate": 6.926134776165281e-06, "loss": 1.0299, "step": 171 }, { "epoch": 0.01027110951869103, "grad_norm": 2.1609840393066406, "learning_rate": 6.933989381877579e-06, "loss": 1.0607, "step": 172 }, { "epoch": 0.010330825271706676, "grad_norm": 2.4090497493743896, "learning_rate": 6.941798453387939e-06, "loss": 1.0385, "step": 173 }, { "epoch": 0.010390541024722321, "grad_norm": 2.630496025085449, "learning_rate": 6.949562515590675e-06, "loss": 1.0219, "step": 174 }, { "epoch": 0.010450256777737968, "grad_norm": 2.814255475997925, "learning_rate": 6.957282084356041e-06, "loss": 1.0064, "step": 175 }, { "epoch": 0.010509972530753612, "grad_norm": 2.4914138317108154, "learning_rate": 6.964957666735891e-06, "loss": 1.0451, "step": 176 }, { "epoch": 0.010569688283769259, "grad_norm": 3.216334581375122, "learning_rate": 6.97258976116355e-06, "loss": 1.0125, "step": 177 }, { "epoch": 0.010629404036784904, "grad_norm": 3.6063389778137207, "learning_rate": 6.980178857648021e-06, "loss": 1.0746, "step": 178 }, { "epoch": 0.01068911978980055, "grad_norm": 2.8937387466430664, "learning_rate": 6.987725437962779e-06, "loss": 0.9956, "step": 179 }, { "epoch": 0.010748835542816195, "grad_norm": 2.8174102306365967, "learning_rate": 6.995229975829286e-06, "loss": 1.014, "step": 180 }, { "epoch": 0.010808551295831841, "grad_norm": 5.971186637878418, "learning_rate": 7.002692937095441e-06, "loss": 1.0589, "step": 181 }, { "epoch": 0.010868267048847486, "grad_norm": 2.8017170429229736, "learning_rate": 7.010114779909095e-06, "loss": 1.0268, "step": 182 }, { "epoch": 0.01092798280186313, "grad_norm": 2.515665054321289, "learning_rate": 7.01749595488683e-06, "loss": 1.01, "step": 183 }, { "epoch": 0.010987698554878777, "grad_norm": 2.468982219696045, "learning_rate": 7.024836905278119e-06, "loss": 1.0224, "step": 184 }, { "epoch": 0.011047414307894422, "grad_norm": 2.538766622543335, "learning_rate": 7.0321380671250594e-06, "loss": 1.02, "step": 185 }, { "epoch": 0.011107130060910068, "grad_norm": 4.489034652709961, "learning_rate": 7.0393998694177915e-06, "loss": 0.9799, "step": 186 }, { "epoch": 0.011166845813925713, "grad_norm": 2.963038444519043, "learning_rate": 7.046622734245733e-06, "loss": 1.0669, "step": 187 }, { "epoch": 0.01122656156694136, "grad_norm": 2.459841251373291, "learning_rate": 7.053807076944811e-06, "loss": 1.0325, "step": 188 }, { "epoch": 0.011286277319957004, "grad_norm": 2.288698434829712, "learning_rate": 7.060953306240772e-06, "loss": 0.9927, "step": 189 }, { "epoch": 0.011345993072972651, "grad_norm": 2.7583820819854736, "learning_rate": 7.068061824388705e-06, "loss": 1.0248, "step": 190 }, { "epoch": 0.011405708825988296, "grad_norm": 2.718883514404297, "learning_rate": 7.075133027308916e-06, "loss": 0.9775, "step": 191 }, { "epoch": 0.01146542457900394, "grad_norm": 2.722226619720459, "learning_rate": 7.082167304719249e-06, "loss": 1.0301, "step": 192 }, { "epoch": 0.011525140332019587, "grad_norm": 2.525707244873047, "learning_rate": 7.089165040263968e-06, "loss": 1.032, "step": 193 }, { "epoch": 0.011584856085035232, "grad_norm": 3.577893018722534, "learning_rate": 7.096126611639307e-06, "loss": 0.9678, "step": 194 }, { "epoch": 0.011644571838050878, "grad_norm": 3.032466173171997, "learning_rate": 7.103052390715803e-06, "loss": 1.0659, "step": 195 }, { "epoch": 0.011704287591066523, "grad_norm": 2.5306732654571533, "learning_rate": 7.109942743657487e-06, "loss": 1.0561, "step": 196 }, { "epoch": 0.01176400334408217, "grad_norm": 2.1258394718170166, "learning_rate": 7.116798031038057e-06, "loss": 1.0524, "step": 197 }, { "epoch": 0.011823719097097814, "grad_norm": 3.20447039604187, "learning_rate": 7.123618607954084e-06, "loss": 0.9826, "step": 198 }, { "epoch": 0.01188343485011346, "grad_norm": 2.728058099746704, "learning_rate": 7.130404824135404e-06, "loss": 0.9932, "step": 199 }, { "epoch": 0.011943150603129105, "grad_norm": 4.771402835845947, "learning_rate": 7.13715702405271e-06, "loss": 1.024, "step": 200 }, { "epoch": 0.011943150603129105, "eval_text_loss": 1.031703233718872, "eval_text_runtime": 15.515, "eval_text_samples_per_second": 257.815, "eval_text_steps_per_second": 0.516, "step": 200 }, { "epoch": 0.011943150603129105, "eval_image_loss": 0.7903139591217041, "eval_image_runtime": 5.7087, "eval_image_samples_per_second": 700.681, "eval_image_steps_per_second": 1.401, "step": 200 }, { "epoch": 0.011943150603129105, "eval_video_loss": 1.2638189792633057, "eval_video_runtime": 78.6218, "eval_video_samples_per_second": 50.876, "eval_video_steps_per_second": 0.102, "step": 200 }, { "epoch": 0.012002866356144752, "grad_norm": 2.2492637634277344, "learning_rate": 7.1438755470224775e-06, "loss": 1.0085, "step": 201 }, { "epoch": 0.012062582109160397, "grad_norm": 2.2887918949127197, "learning_rate": 7.150560727309281e-06, "loss": 1.0282, "step": 202 }, { "epoch": 0.012122297862176041, "grad_norm": 3.0041937828063965, "learning_rate": 7.157212894225585e-06, "loss": 1.0151, "step": 203 }, { "epoch": 0.012182013615191688, "grad_norm": 2.3592684268951416, "learning_rate": 7.16383237222909e-06, "loss": 1.0473, "step": 204 }, { "epoch": 0.012241729368207332, "grad_norm": 4.148624420166016, "learning_rate": 7.170419481017687e-06, "loss": 1.0279, "step": 205 }, { "epoch": 0.012301445121222979, "grad_norm": 2.6791231632232666, "learning_rate": 7.176974535622101e-06, "loss": 1.0195, "step": 206 }, { "epoch": 0.012361160874238624, "grad_norm": 4.328194618225098, "learning_rate": 7.18349784649631e-06, "loss": 1.0153, "step": 207 }, { "epoch": 0.01242087662725427, "grad_norm": 2.7164578437805176, "learning_rate": 7.189989719605765e-06, "loss": 1.0575, "step": 208 }, { "epoch": 0.012480592380269915, "grad_norm": 3.432608127593994, "learning_rate": 7.196450456513502e-06, "loss": 0.9866, "step": 209 }, { "epoch": 0.012540308133285561, "grad_norm": 3.079111099243164, "learning_rate": 7.202880354464196e-06, "loss": 1.0402, "step": 210 }, { "epoch": 0.012600023886301206, "grad_norm": 2.206801176071167, "learning_rate": 7.209279706466227e-06, "loss": 1.0188, "step": 211 }, { "epoch": 0.012659739639316853, "grad_norm": 2.494516372680664, "learning_rate": 7.215648801371793e-06, "loss": 0.9935, "step": 212 }, { "epoch": 0.012719455392332497, "grad_norm": 2.506821393966675, "learning_rate": 7.221987923955155e-06, "loss": 1.0238, "step": 213 }, { "epoch": 0.012779171145348142, "grad_norm": 7.14321756362915, "learning_rate": 7.228297354989023e-06, "loss": 0.9905, "step": 214 }, { "epoch": 0.012838886898363789, "grad_norm": 2.5370683670043945, "learning_rate": 7.234577371319197e-06, "loss": 1.0087, "step": 215 }, { "epoch": 0.012898602651379433, "grad_norm": 2.7294580936431885, "learning_rate": 7.240828245937443e-06, "loss": 1.009, "step": 216 }, { "epoch": 0.01295831840439508, "grad_norm": 2.568567991256714, "learning_rate": 7.2470502480527e-06, "loss": 1.0189, "step": 217 }, { "epoch": 0.013018034157410725, "grad_norm": 2.357819080352783, "learning_rate": 7.253243643160662e-06, "loss": 1.0472, "step": 218 }, { "epoch": 0.013077749910426371, "grad_norm": 5.22165584564209, "learning_rate": 7.259408693111743e-06, "loss": 0.9939, "step": 219 }, { "epoch": 0.013137465663442016, "grad_norm": 2.5102665424346924, "learning_rate": 7.26554565617751e-06, "loss": 1.0145, "step": 220 }, { "epoch": 0.013197181416457662, "grad_norm": 2.8445451259613037, "learning_rate": 7.271654787115606e-06, "loss": 1.0203, "step": 221 }, { "epoch": 0.013256897169473307, "grad_norm": 2.450207471847534, "learning_rate": 7.277736337233217e-06, "loss": 1.0472, "step": 222 }, { "epoch": 0.013316612922488952, "grad_norm": 2.670053243637085, "learning_rate": 7.283790554449083e-06, "loss": 0.9963, "step": 223 }, { "epoch": 0.013376328675504598, "grad_norm": 1.8797969818115234, "learning_rate": 7.289817683354159e-06, "loss": 1.0024, "step": 224 }, { "epoch": 0.013436044428520243, "grad_norm": 2.3556129932403564, "learning_rate": 7.2958179652709035e-06, "loss": 1.0009, "step": 225 }, { "epoch": 0.01349576018153589, "grad_norm": 2.326411247253418, "learning_rate": 7.301791638311253e-06, "loss": 1.0277, "step": 226 }, { "epoch": 0.013555475934551534, "grad_norm": 2.449352741241455, "learning_rate": 7.307738937433323e-06, "loss": 1.0025, "step": 227 }, { "epoch": 0.01361519168756718, "grad_norm": 2.485971212387085, "learning_rate": 7.3136600944968605e-06, "loss": 1.0286, "step": 228 }, { "epoch": 0.013674907440582825, "grad_norm": 5.863036155700684, "learning_rate": 7.319555338317483e-06, "loss": 1.0026, "step": 229 }, { "epoch": 0.013734623193598472, "grad_norm": 2.6960055828094482, "learning_rate": 7.325424894719735e-06, "loss": 1.0156, "step": 230 }, { "epoch": 0.013794338946614117, "grad_norm": 6.611846923828125, "learning_rate": 7.331268986588993e-06, "loss": 1.0116, "step": 231 }, { "epoch": 0.013854054699629763, "grad_norm": 2.2162117958068848, "learning_rate": 7.337087833922255e-06, "loss": 1.0179, "step": 232 }, { "epoch": 0.013913770452645408, "grad_norm": 2.954495668411255, "learning_rate": 7.342881653877828e-06, "loss": 1.0734, "step": 233 }, { "epoch": 0.013973486205661053, "grad_norm": 2.3042185306549072, "learning_rate": 7.3486506608239596e-06, "loss": 1.0273, "step": 234 }, { "epoch": 0.0140332019586767, "grad_norm": 3.7148232460021973, "learning_rate": 7.354395066386428e-06, "loss": 0.9934, "step": 235 }, { "epoch": 0.014092917711692344, "grad_norm": 2.161548614501953, "learning_rate": 7.360115079495132e-06, "loss": 1.0049, "step": 236 }, { "epoch": 0.01415263346470799, "grad_norm": 2.4401631355285645, "learning_rate": 7.365810906429671e-06, "loss": 1.0113, "step": 237 }, { "epoch": 0.014212349217723635, "grad_norm": 4.068922996520996, "learning_rate": 7.371482750864001e-06, "loss": 1.0294, "step": 238 }, { "epoch": 0.014272064970739282, "grad_norm": 2.8945131301879883, "learning_rate": 7.377130813910121e-06, "loss": 1.0661, "step": 239 }, { "epoch": 0.014331780723754926, "grad_norm": 2.1950652599334717, "learning_rate": 7.382755294160867e-06, "loss": 1.0257, "step": 240 }, { "epoch": 0.014391496476770573, "grad_norm": 3.9602017402648926, "learning_rate": 7.388356387731815e-06, "loss": 0.9659, "step": 241 }, { "epoch": 0.014451212229786218, "grad_norm": 3.8363289833068848, "learning_rate": 7.393934288302307e-06, "loss": 0.9988, "step": 242 }, { "epoch": 0.014510927982801862, "grad_norm": 4.018577575683594, "learning_rate": 7.3994891871556354e-06, "loss": 1.0592, "step": 243 }, { "epoch": 0.014570643735817509, "grad_norm": 2.5365960597991943, "learning_rate": 7.405021273218409e-06, "loss": 1.0247, "step": 244 }, { "epoch": 0.014630359488833154, "grad_norm": 3.46052622795105, "learning_rate": 7.410530733099106e-06, "loss": 1.0171, "step": 245 }, { "epoch": 0.0146900752418488, "grad_norm": 4.047511100769043, "learning_rate": 7.4160177511258415e-06, "loss": 1.0301, "step": 246 }, { "epoch": 0.014749790994864445, "grad_norm": 2.7098090648651123, "learning_rate": 7.421482509383377e-06, "loss": 1.0221, "step": 247 }, { "epoch": 0.014809506747880091, "grad_norm": 4.692525863647461, "learning_rate": 7.426925187749372e-06, "loss": 1.0271, "step": 248 }, { "epoch": 0.014869222500895736, "grad_norm": 2.070857524871826, "learning_rate": 7.4323459639299125e-06, "loss": 1.0098, "step": 249 }, { "epoch": 0.014928938253911382, "grad_norm": 2.3680224418640137, "learning_rate": 7.437745013494327e-06, "loss": 1.0067, "step": 250 }, { "epoch": 0.014988654006927027, "grad_norm": 2.8411788940429688, "learning_rate": 7.4431225099093095e-06, "loss": 0.9998, "step": 251 }, { "epoch": 0.015048369759942674, "grad_norm": 4.417510509490967, "learning_rate": 7.4484786245723515e-06, "loss": 1.0111, "step": 252 }, { "epoch": 0.015108085512958318, "grad_norm": 2.964920997619629, "learning_rate": 7.453813526844533e-06, "loss": 0.9977, "step": 253 }, { "epoch": 0.015167801265973963, "grad_norm": 3.927359104156494, "learning_rate": 7.459127384082647e-06, "loss": 1.0485, "step": 254 }, { "epoch": 0.01522751701898961, "grad_norm": 2.345654249191284, "learning_rate": 7.464420361670707e-06, "loss": 0.9904, "step": 255 }, { "epoch": 0.015287232772005254, "grad_norm": 2.1002469062805176, "learning_rate": 7.4696926230508294e-06, "loss": 0.9999, "step": 256 }, { "epoch": 0.015346948525020901, "grad_norm": 2.902052164077759, "learning_rate": 7.4749443297535175e-06, "loss": 1.0427, "step": 257 }, { "epoch": 0.015406664278036546, "grad_norm": 1.7242212295532227, "learning_rate": 7.480175641427353e-06, "loss": 0.9904, "step": 258 }, { "epoch": 0.015466380031052192, "grad_norm": 2.2900562286376953, "learning_rate": 7.485386715868126e-06, "loss": 1.0162, "step": 259 }, { "epoch": 0.015526095784067837, "grad_norm": 1.987120270729065, "learning_rate": 7.490577709047383e-06, "loss": 1.0091, "step": 260 }, { "epoch": 0.015585811537083483, "grad_norm": 3.2015271186828613, "learning_rate": 7.495748775140449e-06, "loss": 1.0175, "step": 261 }, { "epoch": 0.015645527290099128, "grad_norm": 4.51984977722168, "learning_rate": 7.500900066553893e-06, "loss": 0.9917, "step": 262 }, { "epoch": 0.015705243043114773, "grad_norm": 3.005253314971924, "learning_rate": 7.506031733952493e-06, "loss": 1.0095, "step": 263 }, { "epoch": 0.015764958796130418, "grad_norm": 2.4132325649261475, "learning_rate": 7.511143926285665e-06, "loss": 1.0558, "step": 264 }, { "epoch": 0.015824674549146066, "grad_norm": 2.7994580268859863, "learning_rate": 7.516236790813411e-06, "loss": 1.0227, "step": 265 }, { "epoch": 0.01588439030216171, "grad_norm": 2.6646173000335693, "learning_rate": 7.52131047313177e-06, "loss": 1.0239, "step": 266 }, { "epoch": 0.015944106055177355, "grad_norm": 2.086621046066284, "learning_rate": 7.526365117197795e-06, "loss": 1.0254, "step": 267 }, { "epoch": 0.016003821808193, "grad_norm": 5.714239120483398, "learning_rate": 7.531400865354059e-06, "loss": 1.0172, "step": 268 }, { "epoch": 0.016063537561208648, "grad_norm": 2.521589994430542, "learning_rate": 7.536417858352705e-06, "loss": 0.9749, "step": 269 }, { "epoch": 0.016123253314224293, "grad_norm": 2.439413547515869, "learning_rate": 7.54141623537906e-06, "loss": 0.9885, "step": 270 }, { "epoch": 0.016182969067239938, "grad_norm": 6.770334720611572, "learning_rate": 7.546396134074794e-06, "loss": 1.0199, "step": 271 }, { "epoch": 0.016242684820255582, "grad_norm": 3.374643087387085, "learning_rate": 7.551357690560671e-06, "loss": 1.0124, "step": 272 }, { "epoch": 0.016302400573271227, "grad_norm": 3.3000428676605225, "learning_rate": 7.556301039458869e-06, "loss": 1.0311, "step": 273 }, { "epoch": 0.016362116326286875, "grad_norm": 3.247999668121338, "learning_rate": 7.5612263139149e-06, "loss": 1.0489, "step": 274 }, { "epoch": 0.01642183207930252, "grad_norm": 2.7137465476989746, "learning_rate": 7.566133645619127e-06, "loss": 1.0443, "step": 275 }, { "epoch": 0.016481547832318165, "grad_norm": 4.322002410888672, "learning_rate": 7.571023164827891e-06, "loss": 1.0221, "step": 276 }, { "epoch": 0.01654126358533381, "grad_norm": 3.543506622314453, "learning_rate": 7.57589500038426e-06, "loss": 1.0582, "step": 277 }, { "epoch": 0.016600979338349458, "grad_norm": 2.6154534816741943, "learning_rate": 7.5807492797383966e-06, "loss": 1.0129, "step": 278 }, { "epoch": 0.016660695091365103, "grad_norm": 6.148569107055664, "learning_rate": 7.5855861289675635e-06, "loss": 1.0153, "step": 279 }, { "epoch": 0.016720410844380747, "grad_norm": 5.147051811218262, "learning_rate": 7.590405672795776e-06, "loss": 1.0477, "step": 280 }, { "epoch": 0.016780126597396392, "grad_norm": 3.7156128883361816, "learning_rate": 7.595208034613094e-06, "loss": 1.0375, "step": 281 }, { "epoch": 0.01683984235041204, "grad_norm": 2.910240411758423, "learning_rate": 7.599993336494585e-06, "loss": 1.0209, "step": 282 }, { "epoch": 0.016899558103427685, "grad_norm": 3.5834686756134033, "learning_rate": 7.6047616992189314e-06, "loss": 1.0439, "step": 283 }, { "epoch": 0.01695927385644333, "grad_norm": 2.321582794189453, "learning_rate": 7.609513242286734e-06, "loss": 0.9826, "step": 284 }, { "epoch": 0.017018989609458975, "grad_norm": 3.472119092941284, "learning_rate": 7.614248083938478e-06, "loss": 1.044, "step": 285 }, { "epoch": 0.01707870536247462, "grad_norm": 2.754265308380127, "learning_rate": 7.6189663411721815e-06, "loss": 0.995, "step": 286 }, { "epoch": 0.017138421115490268, "grad_norm": 2.7821829319000244, "learning_rate": 7.623668129760752e-06, "loss": 0.9958, "step": 287 }, { "epoch": 0.017198136868505912, "grad_norm": 3.197270154953003, "learning_rate": 7.628353564269023e-06, "loss": 1.0512, "step": 288 }, { "epoch": 0.017257852621521557, "grad_norm": 3.3196067810058594, "learning_rate": 7.633022758070511e-06, "loss": 0.996, "step": 289 }, { "epoch": 0.017317568374537202, "grad_norm": 2.1130290031433105, "learning_rate": 7.637675823363873e-06, "loss": 1.0059, "step": 290 }, { "epoch": 0.01737728412755285, "grad_norm": 3.0921685695648193, "learning_rate": 7.642312871189081e-06, "loss": 1.0321, "step": 291 }, { "epoch": 0.017436999880568495, "grad_norm": 2.3877387046813965, "learning_rate": 7.646934011443323e-06, "loss": 1.01, "step": 292 }, { "epoch": 0.01749671563358414, "grad_norm": 2.419053316116333, "learning_rate": 7.651539352896636e-06, "loss": 0.9899, "step": 293 }, { "epoch": 0.017556431386599784, "grad_norm": 2.4262514114379883, "learning_rate": 7.656129003207262e-06, "loss": 1.0251, "step": 294 }, { "epoch": 0.01761614713961543, "grad_norm": 2.4906606674194336, "learning_rate": 7.66070306893675e-06, "loss": 1.0066, "step": 295 }, { "epoch": 0.017675862892631077, "grad_norm": 2.843951463699341, "learning_rate": 7.665261655564796e-06, "loss": 0.9751, "step": 296 }, { "epoch": 0.017735578645646722, "grad_norm": 2.3028924465179443, "learning_rate": 7.669804867503856e-06, "loss": 1.0156, "step": 297 }, { "epoch": 0.017795294398662367, "grad_norm": 3.6781229972839355, "learning_rate": 7.674332808113477e-06, "loss": 1.0331, "step": 298 }, { "epoch": 0.01785501015167801, "grad_norm": 2.630551815032959, "learning_rate": 7.678845579714407e-06, "loss": 0.984, "step": 299 }, { "epoch": 0.01791472590469366, "grad_norm": 2.228489637374878, "learning_rate": 7.683343283602483e-06, "loss": 0.9938, "step": 300 }, { "epoch": 0.017974441657709304, "grad_norm": 2.7629024982452393, "learning_rate": 7.687826020062262e-06, "loss": 1.0395, "step": 301 }, { "epoch": 0.01803415741072495, "grad_norm": 2.5494027137756348, "learning_rate": 7.692293888380442e-06, "loss": 1.0068, "step": 302 }, { "epoch": 0.018093873163740594, "grad_norm": 3.1892669200897217, "learning_rate": 7.696746986859054e-06, "loss": 1.045, "step": 303 }, { "epoch": 0.01815358891675624, "grad_norm": 2.3439342975616455, "learning_rate": 7.701185412828442e-06, "loss": 1.0033, "step": 304 }, { "epoch": 0.018213304669771887, "grad_norm": 2.0282764434814453, "learning_rate": 7.705609262660027e-06, "loss": 0.9898, "step": 305 }, { "epoch": 0.01827302042278753, "grad_norm": 4.74168062210083, "learning_rate": 7.710018631778863e-06, "loss": 1.0556, "step": 306 }, { "epoch": 0.018332736175803176, "grad_norm": 3.040731906890869, "learning_rate": 7.71441361467599e-06, "loss": 1.0028, "step": 307 }, { "epoch": 0.01839245192881882, "grad_norm": 2.821420431137085, "learning_rate": 7.718794304920573e-06, "loss": 0.9938, "step": 308 }, { "epoch": 0.01845216768183447, "grad_norm": 2.2875163555145264, "learning_rate": 7.723160795171874e-06, "loss": 1.0381, "step": 309 }, { "epoch": 0.018511883434850114, "grad_norm": 2.7297627925872803, "learning_rate": 7.727513177190989e-06, "loss": 1.0092, "step": 310 }, { "epoch": 0.01857159918786576, "grad_norm": 15.159585952758789, "learning_rate": 7.731851541852437e-06, "loss": 1.0271, "step": 311 }, { "epoch": 0.018631314940881404, "grad_norm": 2.5875589847564697, "learning_rate": 7.736175979155539e-06, "loss": 1.01, "step": 312 }, { "epoch": 0.018691030693897052, "grad_norm": 1.847036361694336, "learning_rate": 7.740486578235615e-06, "loss": 0.992, "step": 313 }, { "epoch": 0.018750746446912697, "grad_norm": 2.486097574234009, "learning_rate": 7.744783427375016e-06, "loss": 1.0381, "step": 314 }, { "epoch": 0.01881046219992834, "grad_norm": 3.513603448867798, "learning_rate": 7.749066614013969e-06, "loss": 1.0184, "step": 315 }, { "epoch": 0.018870177952943986, "grad_norm": 2.432694673538208, "learning_rate": 7.75333622476125e-06, "loss": 1.0133, "step": 316 }, { "epoch": 0.01892989370595963, "grad_norm": 2.2306694984436035, "learning_rate": 7.757592345404703e-06, "loss": 0.9952, "step": 317 }, { "epoch": 0.01898960945897528, "grad_norm": 2.4886653423309326, "learning_rate": 7.761835060921567e-06, "loss": 0.997, "step": 318 }, { "epoch": 0.019049325211990924, "grad_norm": 2.614149808883667, "learning_rate": 7.76606445548867e-06, "loss": 0.9943, "step": 319 }, { "epoch": 0.01910904096500657, "grad_norm": 2.5109927654266357, "learning_rate": 7.770280612492448e-06, "loss": 0.9825, "step": 320 }, { "epoch": 0.019168756718022213, "grad_norm": 2.690081834793091, "learning_rate": 7.774483614538797e-06, "loss": 0.9674, "step": 321 }, { "epoch": 0.01922847247103786, "grad_norm": 2.8214402198791504, "learning_rate": 7.778673543462802e-06, "loss": 0.9706, "step": 322 }, { "epoch": 0.019288188224053506, "grad_norm": 4.28436803817749, "learning_rate": 7.78285048033828e-06, "loss": 1.0126, "step": 323 }, { "epoch": 0.01934790397706915, "grad_norm": 2.4430036544799805, "learning_rate": 7.787014505487215e-06, "loss": 0.9926, "step": 324 }, { "epoch": 0.019407619730084796, "grad_norm": 2.110793113708496, "learning_rate": 7.791165698489e-06, "loss": 0.9869, "step": 325 }, { "epoch": 0.01946733548310044, "grad_norm": 2.280134677886963, "learning_rate": 7.795304138189582e-06, "loss": 1.0279, "step": 326 }, { "epoch": 0.01952705123611609, "grad_norm": 2.0208663940429688, "learning_rate": 7.799429902710437e-06, "loss": 0.9369, "step": 327 }, { "epoch": 0.019586766989131733, "grad_norm": 2.319324016571045, "learning_rate": 7.803543069457422e-06, "loss": 1.0063, "step": 328 }, { "epoch": 0.019646482742147378, "grad_norm": 2.228748083114624, "learning_rate": 7.807643715129494e-06, "loss": 0.9892, "step": 329 }, { "epoch": 0.019706198495163023, "grad_norm": 2.870412588119507, "learning_rate": 7.811731915727282e-06, "loss": 1.0092, "step": 330 }, { "epoch": 0.01976591424817867, "grad_norm": 3.409494400024414, "learning_rate": 7.815807746561544e-06, "loss": 1.0222, "step": 331 }, { "epoch": 0.019825630001194316, "grad_norm": 3.247436046600342, "learning_rate": 7.819871282261492e-06, "loss": 1.0453, "step": 332 }, { "epoch": 0.01988534575420996, "grad_norm": 3.489839792251587, "learning_rate": 7.82392259678299e-06, "loss": 1.0466, "step": 333 }, { "epoch": 0.019945061507225605, "grad_norm": 3.699694871902466, "learning_rate": 7.827961763416626e-06, "loss": 1.0111, "step": 334 }, { "epoch": 0.02000477726024125, "grad_norm": 2.2615487575531006, "learning_rate": 7.831988854795675e-06, "loss": 0.9901, "step": 335 }, { "epoch": 0.0200644930132569, "grad_norm": 3.0526561737060547, "learning_rate": 7.836003942903932e-06, "loss": 0.9743, "step": 336 }, { "epoch": 0.020124208766272543, "grad_norm": 1.790648341178894, "learning_rate": 7.840007099083433e-06, "loss": 1.0122, "step": 337 }, { "epoch": 0.020183924519288188, "grad_norm": 2.972393035888672, "learning_rate": 7.843998394042056e-06, "loss": 1.0615, "step": 338 }, { "epoch": 0.020243640272303833, "grad_norm": 2.1696572303771973, "learning_rate": 7.847977897861025e-06, "loss": 0.9905, "step": 339 }, { "epoch": 0.02030335602531948, "grad_norm": 4.337213516235352, "learning_rate": 7.851945680002289e-06, "loss": 0.9946, "step": 340 }, { "epoch": 0.020363071778335125, "grad_norm": 2.561218738555908, "learning_rate": 7.855901809315786e-06, "loss": 1.0184, "step": 341 }, { "epoch": 0.02042278753135077, "grad_norm": 2.594031810760498, "learning_rate": 7.859846354046632e-06, "loss": 1.0293, "step": 342 }, { "epoch": 0.020482503284366415, "grad_norm": 2.790238857269287, "learning_rate": 7.86377938184217e-06, "loss": 1.017, "step": 343 }, { "epoch": 0.02054221903738206, "grad_norm": 3.4326369762420654, "learning_rate": 7.867700959758934e-06, "loss": 1.0538, "step": 344 }, { "epoch": 0.020601934790397708, "grad_norm": 4.2151055335998535, "learning_rate": 7.871611154269509e-06, "loss": 0.9429, "step": 345 }, { "epoch": 0.020661650543413353, "grad_norm": 3.176076650619507, "learning_rate": 7.875510031269294e-06, "loss": 1.0272, "step": 346 }, { "epoch": 0.020721366296428997, "grad_norm": 2.235297203063965, "learning_rate": 7.87939765608316e-06, "loss": 1.023, "step": 347 }, { "epoch": 0.020781082049444642, "grad_norm": 3.051011323928833, "learning_rate": 7.883274093472029e-06, "loss": 1.0769, "step": 348 }, { "epoch": 0.02084079780246029, "grad_norm": 2.7594892978668213, "learning_rate": 7.88713940763933e-06, "loss": 1.0103, "step": 349 }, { "epoch": 0.020900513555475935, "grad_norm": 2.777972459793091, "learning_rate": 7.890993662237392e-06, "loss": 1.0458, "step": 350 }, { "epoch": 0.02096022930849158, "grad_norm": 2.3896970748901367, "learning_rate": 7.894836920373732e-06, "loss": 0.9862, "step": 351 }, { "epoch": 0.021019945061507225, "grad_norm": 3.1245925426483154, "learning_rate": 7.898669244617245e-06, "loss": 1.0173, "step": 352 }, { "epoch": 0.021079660814522873, "grad_norm": 2.6203620433807373, "learning_rate": 7.902490697004329e-06, "loss": 1.0087, "step": 353 }, { "epoch": 0.021139376567538518, "grad_norm": 2.9528496265411377, "learning_rate": 7.906301339044904e-06, "loss": 1.0436, "step": 354 }, { "epoch": 0.021199092320554162, "grad_norm": 4.4318132400512695, "learning_rate": 7.910101231728352e-06, "loss": 0.9717, "step": 355 }, { "epoch": 0.021258808073569807, "grad_norm": 2.0231094360351562, "learning_rate": 7.913890435529376e-06, "loss": 1.0235, "step": 356 }, { "epoch": 0.021318523826585452, "grad_norm": 4.160445213317871, "learning_rate": 7.917669010413773e-06, "loss": 1.0162, "step": 357 }, { "epoch": 0.0213782395796011, "grad_norm": 2.2618942260742188, "learning_rate": 7.921437015844132e-06, "loss": 1.0165, "step": 358 }, { "epoch": 0.021437955332616745, "grad_norm": 3.794167995452881, "learning_rate": 7.925194510785445e-06, "loss": 0.9679, "step": 359 }, { "epoch": 0.02149767108563239, "grad_norm": 2.965172529220581, "learning_rate": 7.92894155371064e-06, "loss": 0.9911, "step": 360 }, { "epoch": 0.021557386838648034, "grad_norm": 3.9607503414154053, "learning_rate": 7.932678202606052e-06, "loss": 1.0173, "step": 361 }, { "epoch": 0.021617102591663682, "grad_norm": 3.0105559825897217, "learning_rate": 7.936404514976796e-06, "loss": 0.9789, "step": 362 }, { "epoch": 0.021676818344679327, "grad_norm": 1.7705990076065063, "learning_rate": 7.94012054785208e-06, "loss": 1.0042, "step": 363 }, { "epoch": 0.021736534097694972, "grad_norm": 3.172269821166992, "learning_rate": 7.943826357790448e-06, "loss": 1.0521, "step": 364 }, { "epoch": 0.021796249850710617, "grad_norm": 2.333153486251831, "learning_rate": 7.94752200088494e-06, "loss": 0.9706, "step": 365 }, { "epoch": 0.02185596560372626, "grad_norm": 2.3720552921295166, "learning_rate": 7.951207532768181e-06, "loss": 0.9584, "step": 366 }, { "epoch": 0.02191568135674191, "grad_norm": 8.247212409973145, "learning_rate": 7.954883008617421e-06, "loss": 0.9991, "step": 367 }, { "epoch": 0.021975397109757554, "grad_norm": 3.01139760017395, "learning_rate": 7.958548483159472e-06, "loss": 1.0075, "step": 368 }, { "epoch": 0.0220351128627732, "grad_norm": 2.3563971519470215, "learning_rate": 7.962204010675614e-06, "loss": 0.9631, "step": 369 }, { "epoch": 0.022094828615788844, "grad_norm": 2.3204665184020996, "learning_rate": 7.965849645006414e-06, "loss": 0.9727, "step": 370 }, { "epoch": 0.022154544368804492, "grad_norm": 2.6511340141296387, "learning_rate": 7.969485439556475e-06, "loss": 1.0259, "step": 371 }, { "epoch": 0.022214260121820137, "grad_norm": 2.344806671142578, "learning_rate": 7.973111447299145e-06, "loss": 0.9973, "step": 372 }, { "epoch": 0.02227397587483578, "grad_norm": 2.417497396469116, "learning_rate": 7.976727720781133e-06, "loss": 0.9907, "step": 373 }, { "epoch": 0.022333691627851426, "grad_norm": 3.0068418979644775, "learning_rate": 7.980334312127086e-06, "loss": 0.9837, "step": 374 }, { "epoch": 0.02239340738086707, "grad_norm": 2.4365663528442383, "learning_rate": 7.983931273044101e-06, "loss": 0.9993, "step": 375 }, { "epoch": 0.02245312313388272, "grad_norm": 3.0460968017578125, "learning_rate": 7.987518654826166e-06, "loss": 1.0318, "step": 376 }, { "epoch": 0.022512838886898364, "grad_norm": 2.6265859603881836, "learning_rate": 7.991096508358546e-06, "loss": 1.0278, "step": 377 }, { "epoch": 0.02257255463991401, "grad_norm": 2.182450771331787, "learning_rate": 7.994664884122126e-06, "loss": 1.0095, "step": 378 }, { "epoch": 0.022632270392929654, "grad_norm": 2.2916407585144043, "learning_rate": 7.998223832197672e-06, "loss": 0.9916, "step": 379 }, { "epoch": 0.022691986145945302, "grad_norm": 2.35615873336792, "learning_rate": 8.001773402270058e-06, "loss": 1.0475, "step": 380 }, { "epoch": 0.022751701898960947, "grad_norm": 2.0042548179626465, "learning_rate": 8.00531364363242e-06, "loss": 0.9998, "step": 381 }, { "epoch": 0.02281141765197659, "grad_norm": 2.2791714668273926, "learning_rate": 8.008844605190271e-06, "loss": 0.9911, "step": 382 }, { "epoch": 0.022871133404992236, "grad_norm": 3.7927379608154297, "learning_rate": 8.012366335465542e-06, "loss": 1.0002, "step": 383 }, { "epoch": 0.02293084915800788, "grad_norm": 3.233311653137207, "learning_rate": 8.015878882600603e-06, "loss": 1.0184, "step": 384 }, { "epoch": 0.02299056491102353, "grad_norm": 2.005155086517334, "learning_rate": 8.019382294362193e-06, "loss": 1.0079, "step": 385 }, { "epoch": 0.023050280664039174, "grad_norm": 2.563589572906494, "learning_rate": 8.022876618145322e-06, "loss": 0.959, "step": 386 }, { "epoch": 0.02310999641705482, "grad_norm": 2.183661937713623, "learning_rate": 8.026361900977128e-06, "loss": 1.0145, "step": 387 }, { "epoch": 0.023169712170070463, "grad_norm": 2.758762836456299, "learning_rate": 8.029838189520662e-06, "loss": 0.9834, "step": 388 }, { "epoch": 0.02322942792308611, "grad_norm": 2.0200631618499756, "learning_rate": 8.033305530078641e-06, "loss": 0.9661, "step": 389 }, { "epoch": 0.023289143676101756, "grad_norm": 2.8388724327087402, "learning_rate": 8.036763968597157e-06, "loss": 0.9946, "step": 390 }, { "epoch": 0.0233488594291174, "grad_norm": 3.7795159816741943, "learning_rate": 8.040213550669313e-06, "loss": 0.966, "step": 391 }, { "epoch": 0.023408575182133046, "grad_norm": 4.449980735778809, "learning_rate": 8.043654321538841e-06, "loss": 1.0018, "step": 392 }, { "epoch": 0.023468290935148694, "grad_norm": 2.151075601577759, "learning_rate": 8.047086326103668e-06, "loss": 1.0152, "step": 393 }, { "epoch": 0.02352800668816434, "grad_norm": 2.861572742462158, "learning_rate": 8.05050960891941e-06, "loss": 0.9815, "step": 394 }, { "epoch": 0.023587722441179983, "grad_norm": 2.0881407260894775, "learning_rate": 8.053924214202869e-06, "loss": 0.9915, "step": 395 }, { "epoch": 0.023647438194195628, "grad_norm": 4.217830657958984, "learning_rate": 8.057330185835438e-06, "loss": 0.985, "step": 396 }, { "epoch": 0.023707153947211273, "grad_norm": 2.9552688598632812, "learning_rate": 8.0607275673665e-06, "loss": 0.9651, "step": 397 }, { "epoch": 0.02376686970022692, "grad_norm": 6.2465715408325195, "learning_rate": 8.064116402016758e-06, "loss": 1.0022, "step": 398 }, { "epoch": 0.023826585453242566, "grad_norm": 2.683450222015381, "learning_rate": 8.067496732681544e-06, "loss": 0.9668, "step": 399 }, { "epoch": 0.02388630120625821, "grad_norm": 2.2399802207946777, "learning_rate": 8.070868601934064e-06, "loss": 0.9772, "step": 400 }, { "epoch": 0.02388630120625821, "eval_text_loss": 1.0086963176727295, "eval_text_runtime": 15.0925, "eval_text_samples_per_second": 265.032, "eval_text_steps_per_second": 0.53, "step": 400 }, { "epoch": 0.02388630120625821, "eval_image_loss": 0.7652602195739746, "eval_image_runtime": 5.1511, "eval_image_samples_per_second": 776.531, "eval_image_steps_per_second": 1.553, "step": 400 }, { "epoch": 0.02388630120625821, "eval_video_loss": 1.246963381767273, "eval_video_runtime": 78.7139, "eval_video_samples_per_second": 50.817, "eval_video_steps_per_second": 0.102, "step": 400 }, { "epoch": 0.023946016959273855, "grad_norm": 1.8533250093460083, "learning_rate": 8.074232052028632e-06, "loss": 1.0336, "step": 401 }, { "epoch": 0.024005732712289504, "grad_norm": 3.9925198554992676, "learning_rate": 8.077587124903832e-06, "loss": 1.021, "step": 402 }, { "epoch": 0.02406544846530515, "grad_norm": 3.309813976287842, "learning_rate": 8.08093386218566e-06, "loss": 1.0079, "step": 403 }, { "epoch": 0.024125164218320793, "grad_norm": 4.986968517303467, "learning_rate": 8.084272305190635e-06, "loss": 0.9784, "step": 404 }, { "epoch": 0.024184879971336438, "grad_norm": 3.0065231323242188, "learning_rate": 8.087602494928833e-06, "loss": 1.0392, "step": 405 }, { "epoch": 0.024244595724352083, "grad_norm": 2.753887414932251, "learning_rate": 8.090924472106938e-06, "loss": 1.0009, "step": 406 }, { "epoch": 0.02430431147736773, "grad_norm": 4.460117340087891, "learning_rate": 8.094238277131212e-06, "loss": 1.0391, "step": 407 }, { "epoch": 0.024364027230383375, "grad_norm": 2.003974437713623, "learning_rate": 8.097543950110444e-06, "loss": 1.0015, "step": 408 }, { "epoch": 0.02442374298339902, "grad_norm": 5.298849582672119, "learning_rate": 8.10084153085887e-06, "loss": 0.9776, "step": 409 }, { "epoch": 0.024483458736414665, "grad_norm": 2.331188678741455, "learning_rate": 8.10413105889904e-06, "loss": 1.023, "step": 410 }, { "epoch": 0.024543174489430313, "grad_norm": 2.157909393310547, "learning_rate": 8.107412573464672e-06, "loss": 0.9927, "step": 411 }, { "epoch": 0.024602890242445958, "grad_norm": 2.214704751968384, "learning_rate": 8.110686113503454e-06, "loss": 0.9865, "step": 412 }, { "epoch": 0.024662605995461603, "grad_norm": 3.4655516147613525, "learning_rate": 8.113951717679815e-06, "loss": 1.026, "step": 413 }, { "epoch": 0.024722321748477247, "grad_norm": 2.0888214111328125, "learning_rate": 8.117209424377664e-06, "loss": 1.0069, "step": 414 }, { "epoch": 0.024782037501492892, "grad_norm": 3.5397233963012695, "learning_rate": 8.12045927170311e-06, "loss": 1.0064, "step": 415 }, { "epoch": 0.02484175325450854, "grad_norm": 2.974914312362671, "learning_rate": 8.12370129748712e-06, "loss": 0.9557, "step": 416 }, { "epoch": 0.024901469007524185, "grad_norm": 2.4157917499542236, "learning_rate": 8.12693553928817e-06, "loss": 1.0377, "step": 417 }, { "epoch": 0.02496118476053983, "grad_norm": 2.0566327571868896, "learning_rate": 8.130162034394857e-06, "loss": 0.9886, "step": 418 }, { "epoch": 0.025020900513555475, "grad_norm": 2.251675605773926, "learning_rate": 8.13338081982847e-06, "loss": 0.9832, "step": 419 }, { "epoch": 0.025080616266571123, "grad_norm": 2.2563350200653076, "learning_rate": 8.13659193234555e-06, "loss": 1.0017, "step": 420 }, { "epoch": 0.025140332019586768, "grad_norm": 1.9567314386367798, "learning_rate": 8.139795408440401e-06, "loss": 0.9521, "step": 421 }, { "epoch": 0.025200047772602412, "grad_norm": 3.18942928314209, "learning_rate": 8.14299128434758e-06, "loss": 0.97, "step": 422 }, { "epoch": 0.025259763525618057, "grad_norm": 2.821589946746826, "learning_rate": 8.146179596044357e-06, "loss": 0.9954, "step": 423 }, { "epoch": 0.025319479278633705, "grad_norm": 2.0038585662841797, "learning_rate": 8.149360379253147e-06, "loss": 0.9997, "step": 424 }, { "epoch": 0.02537919503164935, "grad_norm": 2.356797218322754, "learning_rate": 8.152533669443907e-06, "loss": 1.0315, "step": 425 }, { "epoch": 0.025438910784664995, "grad_norm": 2.02579402923584, "learning_rate": 8.155699501836509e-06, "loss": 1.0308, "step": 426 }, { "epoch": 0.02549862653768064, "grad_norm": 2.6679422855377197, "learning_rate": 8.158857911403092e-06, "loss": 1.0005, "step": 427 }, { "epoch": 0.025558342290696284, "grad_norm": 3.0377845764160156, "learning_rate": 8.162008932870376e-06, "loss": 1.0155, "step": 428 }, { "epoch": 0.025618058043711932, "grad_norm": 2.377236843109131, "learning_rate": 8.165152600721954e-06, "loss": 0.985, "step": 429 }, { "epoch": 0.025677773796727577, "grad_norm": 2.7814383506774902, "learning_rate": 8.168288949200552e-06, "loss": 0.9819, "step": 430 }, { "epoch": 0.025737489549743222, "grad_norm": 3.440347671508789, "learning_rate": 8.171418012310269e-06, "loss": 0.9772, "step": 431 }, { "epoch": 0.025797205302758867, "grad_norm": 2.926682710647583, "learning_rate": 8.174539823818798e-06, "loss": 1.0433, "step": 432 }, { "epoch": 0.025856921055774515, "grad_norm": 3.9452977180480957, "learning_rate": 8.177654417259593e-06, "loss": 0.9975, "step": 433 }, { "epoch": 0.02591663680879016, "grad_norm": 3.02215838432312, "learning_rate": 8.180761825934054e-06, "loss": 1.0208, "step": 434 }, { "epoch": 0.025976352561805804, "grad_norm": 2.1339385509490967, "learning_rate": 8.183862082913645e-06, "loss": 1.026, "step": 435 }, { "epoch": 0.02603606831482145, "grad_norm": 3.3353195190429688, "learning_rate": 8.186955221042016e-06, "loss": 0.9853, "step": 436 }, { "epoch": 0.026095784067837094, "grad_norm": 2.756291627883911, "learning_rate": 8.190041272937082e-06, "loss": 0.9667, "step": 437 }, { "epoch": 0.026155499820852742, "grad_norm": 3.126272439956665, "learning_rate": 8.193120270993097e-06, "loss": 0.9676, "step": 438 }, { "epoch": 0.026215215573868387, "grad_norm": 2.372028112411499, "learning_rate": 8.196192247382683e-06, "loss": 1.0163, "step": 439 }, { "epoch": 0.02627493132688403, "grad_norm": 2.3865714073181152, "learning_rate": 8.199257234058861e-06, "loss": 1.0001, "step": 440 }, { "epoch": 0.026334647079899676, "grad_norm": 3.1647794246673584, "learning_rate": 8.202315262757034e-06, "loss": 0.9924, "step": 441 }, { "epoch": 0.026394362832915325, "grad_norm": 3.0949769020080566, "learning_rate": 8.205366364996961e-06, "loss": 0.9892, "step": 442 }, { "epoch": 0.02645407858593097, "grad_norm": 3.062005043029785, "learning_rate": 8.208410572084706e-06, "loss": 1.0268, "step": 443 }, { "epoch": 0.026513794338946614, "grad_norm": 2.590514659881592, "learning_rate": 8.21144791511457e-06, "loss": 0.9716, "step": 444 }, { "epoch": 0.02657351009196226, "grad_norm": 2.3321480751037598, "learning_rate": 8.214478424970992e-06, "loss": 1.011, "step": 445 }, { "epoch": 0.026633225844977904, "grad_norm": 2.13543701171875, "learning_rate": 8.217502132330435e-06, "loss": 0.9995, "step": 446 }, { "epoch": 0.026692941597993552, "grad_norm": 4.306152820587158, "learning_rate": 8.22051906766325e-06, "loss": 1.0546, "step": 447 }, { "epoch": 0.026752657351009197, "grad_norm": 2.679797410964966, "learning_rate": 8.223529261235513e-06, "loss": 1.0316, "step": 448 }, { "epoch": 0.02681237310402484, "grad_norm": 2.978541374206543, "learning_rate": 8.226532743110855e-06, "loss": 1.0152, "step": 449 }, { "epoch": 0.026872088857040486, "grad_norm": 4.04865026473999, "learning_rate": 8.229529543152257e-06, "loss": 1.0354, "step": 450 }, { "epoch": 0.026931804610056134, "grad_norm": 2.9608726501464844, "learning_rate": 8.232519691023839e-06, "loss": 1.0249, "step": 451 }, { "epoch": 0.02699152036307178, "grad_norm": 2.4984374046325684, "learning_rate": 8.235503216192608e-06, "loss": 1.0146, "step": 452 }, { "epoch": 0.027051236116087424, "grad_norm": 2.165470600128174, "learning_rate": 8.238480147930216e-06, "loss": 0.9672, "step": 453 }, { "epoch": 0.02711095186910307, "grad_norm": 2.4176828861236572, "learning_rate": 8.241450515314678e-06, "loss": 0.9992, "step": 454 }, { "epoch": 0.027170667622118713, "grad_norm": 2.4989659786224365, "learning_rate": 8.244414347232067e-06, "loss": 1.0181, "step": 455 }, { "epoch": 0.02723038337513436, "grad_norm": 2.2210817337036133, "learning_rate": 8.247371672378215e-06, "loss": 0.9572, "step": 456 }, { "epoch": 0.027290099128150006, "grad_norm": 2.354964017868042, "learning_rate": 8.250322519260367e-06, "loss": 0.9715, "step": 457 }, { "epoch": 0.02734981488116565, "grad_norm": 3.4354069232940674, "learning_rate": 8.253266916198839e-06, "loss": 0.9971, "step": 458 }, { "epoch": 0.027409530634181296, "grad_norm": 2.637251615524292, "learning_rate": 8.256204891328637e-06, "loss": 1.0009, "step": 459 }, { "epoch": 0.027469246387196944, "grad_norm": 2.985719919204712, "learning_rate": 8.25913647260109e-06, "loss": 0.9768, "step": 460 }, { "epoch": 0.02752896214021259, "grad_norm": 2.34804368019104, "learning_rate": 8.26206168778542e-06, "loss": 1.002, "step": 461 }, { "epoch": 0.027588677893228233, "grad_norm": 2.4918394088745117, "learning_rate": 8.264980564470347e-06, "loss": 1.0142, "step": 462 }, { "epoch": 0.027648393646243878, "grad_norm": 1.7626484632492065, "learning_rate": 8.267893130065628e-06, "loss": 1.0019, "step": 463 }, { "epoch": 0.027708109399259526, "grad_norm": 2.5360546112060547, "learning_rate": 8.27079941180361e-06, "loss": 1.0025, "step": 464 }, { "epoch": 0.02776782515227517, "grad_norm": 2.4753687381744385, "learning_rate": 8.273699436740763e-06, "loss": 0.9707, "step": 465 }, { "epoch": 0.027827540905290816, "grad_norm": 2.32245135307312, "learning_rate": 8.27659323175918e-06, "loss": 0.9586, "step": 466 }, { "epoch": 0.02788725665830646, "grad_norm": 3.416313886642456, "learning_rate": 8.27948082356809e-06, "loss": 1.0124, "step": 467 }, { "epoch": 0.027946972411322105, "grad_norm": 2.6735615730285645, "learning_rate": 8.282362238705313e-06, "loss": 1.0165, "step": 468 }, { "epoch": 0.028006688164337754, "grad_norm": 2.5888137817382812, "learning_rate": 8.285237503538742e-06, "loss": 1.0278, "step": 469 }, { "epoch": 0.0280664039173534, "grad_norm": 2.5993497371673584, "learning_rate": 8.288106644267782e-06, "loss": 0.9919, "step": 470 }, { "epoch": 0.028126119670369043, "grad_norm": 1.9291703701019287, "learning_rate": 8.29096968692479e-06, "loss": 0.9881, "step": 471 }, { "epoch": 0.028185835423384688, "grad_norm": 2.6150386333465576, "learning_rate": 8.293826657376483e-06, "loss": 0.979, "step": 472 }, { "epoch": 0.028245551176400336, "grad_norm": 2.799536943435669, "learning_rate": 8.29667758132535e-06, "loss": 1.0123, "step": 473 }, { "epoch": 0.02830526692941598, "grad_norm": 4.287378311157227, "learning_rate": 8.299522484311024e-06, "loss": 0.9686, "step": 474 }, { "epoch": 0.028364982682431625, "grad_norm": 5.919393062591553, "learning_rate": 8.302361391711676e-06, "loss": 0.999, "step": 475 }, { "epoch": 0.02842469843544727, "grad_norm": 2.9281461238861084, "learning_rate": 8.305194328745354e-06, "loss": 1.0057, "step": 476 }, { "epoch": 0.028484414188462915, "grad_norm": 2.6607279777526855, "learning_rate": 8.30802132047134e-06, "loss": 0.9815, "step": 477 }, { "epoch": 0.028544129941478563, "grad_norm": 7.314914226531982, "learning_rate": 8.310842391791475e-06, "loss": 1.0236, "step": 478 }, { "epoch": 0.028603845694494208, "grad_norm": 2.76414155960083, "learning_rate": 8.313657567451468e-06, "loss": 1.0199, "step": 479 }, { "epoch": 0.028663561447509853, "grad_norm": 3.1445424556732178, "learning_rate": 8.31646687204222e-06, "loss": 0.9788, "step": 480 }, { "epoch": 0.028723277200525497, "grad_norm": 2.1969361305236816, "learning_rate": 8.319270330001087e-06, "loss": 1.0175, "step": 481 }, { "epoch": 0.028782992953541146, "grad_norm": 2.896026372909546, "learning_rate": 8.322067965613168e-06, "loss": 0.9693, "step": 482 }, { "epoch": 0.02884270870655679, "grad_norm": 2.7547106742858887, "learning_rate": 8.324859803012574e-06, "loss": 0.981, "step": 483 }, { "epoch": 0.028902424459572435, "grad_norm": 2.51115345954895, "learning_rate": 8.327645866183662e-06, "loss": 1.0028, "step": 484 }, { "epoch": 0.02896214021258808, "grad_norm": 6.004995346069336, "learning_rate": 8.330426178962278e-06, "loss": 0.9948, "step": 485 }, { "epoch": 0.029021855965603725, "grad_norm": 2.1501522064208984, "learning_rate": 8.333200765036988e-06, "loss": 0.9959, "step": 486 }, { "epoch": 0.029081571718619373, "grad_norm": 3.541078567504883, "learning_rate": 8.33596964795028e-06, "loss": 1.002, "step": 487 }, { "epoch": 0.029141287471635018, "grad_norm": 2.2514796257019043, "learning_rate": 8.338732851099764e-06, "loss": 1.0105, "step": 488 }, { "epoch": 0.029201003224650662, "grad_norm": 2.3225913047790527, "learning_rate": 8.341490397739356e-06, "loss": 0.9958, "step": 489 }, { "epoch": 0.029260718977666307, "grad_norm": 2.723189115524292, "learning_rate": 8.34424231098046e-06, "loss": 0.971, "step": 490 }, { "epoch": 0.029320434730681955, "grad_norm": 2.4837257862091064, "learning_rate": 8.346988613793125e-06, "loss": 0.9724, "step": 491 }, { "epoch": 0.0293801504836976, "grad_norm": 2.130002498626709, "learning_rate": 8.349729329007195e-06, "loss": 1.0084, "step": 492 }, { "epoch": 0.029439866236713245, "grad_norm": 2.524523973464966, "learning_rate": 8.352464479313451e-06, "loss": 1.017, "step": 493 }, { "epoch": 0.02949958198972889, "grad_norm": 2.466947078704834, "learning_rate": 8.35519408726473e-06, "loss": 0.9562, "step": 494 }, { "epoch": 0.029559297742744534, "grad_norm": 2.136032819747925, "learning_rate": 8.357918175277056e-06, "loss": 1.0, "step": 495 }, { "epoch": 0.029619013495760182, "grad_norm": 3.6118786334991455, "learning_rate": 8.360636765630726e-06, "loss": 0.9994, "step": 496 }, { "epoch": 0.029678729248775827, "grad_norm": 2.711876630783081, "learning_rate": 8.363349880471417e-06, "loss": 0.9685, "step": 497 }, { "epoch": 0.029738445001791472, "grad_norm": 2.551297187805176, "learning_rate": 8.366057541811266e-06, "loss": 0.9939, "step": 498 }, { "epoch": 0.029798160754807117, "grad_norm": 2.8349361419677734, "learning_rate": 8.368759771529936e-06, "loss": 0.9846, "step": 499 }, { "epoch": 0.029857876507822765, "grad_norm": 3.289557933807373, "learning_rate": 8.371456591375682e-06, "loss": 0.9663, "step": 500 }, { "epoch": 0.02991759226083841, "grad_norm": 3.1825594902038574, "learning_rate": 8.374148022966398e-06, "loss": 0.9501, "step": 501 }, { "epoch": 0.029977308013854054, "grad_norm": 2.626358985900879, "learning_rate": 8.376834087790663e-06, "loss": 0.9713, "step": 502 }, { "epoch": 0.0300370237668697, "grad_norm": 2.6793174743652344, "learning_rate": 8.379514807208761e-06, "loss": 1.0168, "step": 503 }, { "epoch": 0.030096739519885347, "grad_norm": 2.7036139965057373, "learning_rate": 8.382190202453705e-06, "loss": 1.0361, "step": 504 }, { "epoch": 0.030156455272900992, "grad_norm": 2.029879093170166, "learning_rate": 8.384860294632251e-06, "loss": 0.9988, "step": 505 }, { "epoch": 0.030216171025916637, "grad_norm": 2.660670757293701, "learning_rate": 8.387525104725887e-06, "loss": 1.0163, "step": 506 }, { "epoch": 0.03027588677893228, "grad_norm": 2.594095468521118, "learning_rate": 8.39018465359183e-06, "loss": 1.0027, "step": 507 }, { "epoch": 0.030335602531947926, "grad_norm": 2.5257773399353027, "learning_rate": 8.392838961964001e-06, "loss": 0.9968, "step": 508 }, { "epoch": 0.030395318284963575, "grad_norm": 3.4604275226593018, "learning_rate": 8.395488050454e-06, "loss": 0.9693, "step": 509 }, { "epoch": 0.03045503403797922, "grad_norm": 2.7080302238464355, "learning_rate": 8.39813193955206e-06, "loss": 0.9916, "step": 510 }, { "epoch": 0.030514749790994864, "grad_norm": 2.129288673400879, "learning_rate": 8.400770649628006e-06, "loss": 1.0254, "step": 511 }, { "epoch": 0.03057446554401051, "grad_norm": 2.3170173168182373, "learning_rate": 8.403404200932184e-06, "loss": 0.9808, "step": 512 }, { "epoch": 0.030634181297026157, "grad_norm": 2.1426045894622803, "learning_rate": 8.406032613596406e-06, "loss": 0.9571, "step": 513 }, { "epoch": 0.030693897050041802, "grad_norm": 2.06177020072937, "learning_rate": 8.408655907634871e-06, "loss": 0.9834, "step": 514 }, { "epoch": 0.030753612803057447, "grad_norm": 2.458188056945801, "learning_rate": 8.411274102945072e-06, "loss": 0.9972, "step": 515 }, { "epoch": 0.03081332855607309, "grad_norm": 2.438995361328125, "learning_rate": 8.413887219308707e-06, "loss": 0.9745, "step": 516 }, { "epoch": 0.030873044309088736, "grad_norm": 4.583730697631836, "learning_rate": 8.41649527639258e-06, "loss": 0.9562, "step": 517 }, { "epoch": 0.030932760062104384, "grad_norm": 2.6897096633911133, "learning_rate": 8.41909829374948e-06, "loss": 1.0083, "step": 518 }, { "epoch": 0.03099247581512003, "grad_norm": 3.0441529750823975, "learning_rate": 8.421696290819066e-06, "loss": 0.9893, "step": 519 }, { "epoch": 0.031052191568135674, "grad_norm": 2.0635697841644287, "learning_rate": 8.424289286928738e-06, "loss": 1.0298, "step": 520 }, { "epoch": 0.03111190732115132, "grad_norm": 3.0347108840942383, "learning_rate": 8.426877301294496e-06, "loss": 0.9614, "step": 521 }, { "epoch": 0.031171623074166967, "grad_norm": 4.3361406326293945, "learning_rate": 8.429460353021803e-06, "loss": 1.006, "step": 522 }, { "epoch": 0.03123133882718261, "grad_norm": 1.9658631086349487, "learning_rate": 8.432038461106419e-06, "loss": 0.9601, "step": 523 }, { "epoch": 0.031291054580198256, "grad_norm": 3.021038055419922, "learning_rate": 8.434611644435247e-06, "loss": 0.9897, "step": 524 }, { "epoch": 0.0313507703332139, "grad_norm": 3.089057207107544, "learning_rate": 8.437179921787166e-06, "loss": 0.9646, "step": 525 }, { "epoch": 0.031410486086229546, "grad_norm": 2.7011897563934326, "learning_rate": 8.439743311833847e-06, "loss": 1.0271, "step": 526 }, { "epoch": 0.03147020183924519, "grad_norm": 2.367265224456787, "learning_rate": 8.442301833140567e-06, "loss": 0.9947, "step": 527 }, { "epoch": 0.031529917592260835, "grad_norm": 2.0495121479034424, "learning_rate": 8.444855504167019e-06, "loss": 0.9474, "step": 528 }, { "epoch": 0.03158963334527649, "grad_norm": 2.4325318336486816, "learning_rate": 8.447404343268112e-06, "loss": 0.9274, "step": 529 }, { "epoch": 0.03164934909829213, "grad_norm": 3.066803455352783, "learning_rate": 8.449948368694763e-06, "loss": 1.0148, "step": 530 }, { "epoch": 0.031709064851307776, "grad_norm": 2.4180376529693604, "learning_rate": 8.452487598594677e-06, "loss": 0.9819, "step": 531 }, { "epoch": 0.03176878060432342, "grad_norm": 2.474968194961548, "learning_rate": 8.455022051013125e-06, "loss": 0.985, "step": 532 }, { "epoch": 0.031828496357339066, "grad_norm": 3.2009382247924805, "learning_rate": 8.457551743893712e-06, "loss": 0.9911, "step": 533 }, { "epoch": 0.03188821211035471, "grad_norm": 4.872452735900879, "learning_rate": 8.46007669507915e-06, "loss": 0.9862, "step": 534 }, { "epoch": 0.031947927863370355, "grad_norm": 3.7646145820617676, "learning_rate": 8.462596922311994e-06, "loss": 0.9776, "step": 535 }, { "epoch": 0.032007643616386, "grad_norm": 2.9750936031341553, "learning_rate": 8.465112443235412e-06, "loss": 0.9859, "step": 536 }, { "epoch": 0.032067359369401645, "grad_norm": 2.4788293838500977, "learning_rate": 8.467623275393906e-06, "loss": 0.9747, "step": 537 }, { "epoch": 0.032127075122417297, "grad_norm": 2.9639108180999756, "learning_rate": 8.47012943623406e-06, "loss": 1.0006, "step": 538 }, { "epoch": 0.03218679087543294, "grad_norm": 2.8174211978912354, "learning_rate": 8.472630943105258e-06, "loss": 1.014, "step": 539 }, { "epoch": 0.032246506628448586, "grad_norm": 2.4594955444335938, "learning_rate": 8.475127813260414e-06, "loss": 0.9792, "step": 540 }, { "epoch": 0.03230622238146423, "grad_norm": 3.64703631401062, "learning_rate": 8.477620063856677e-06, "loss": 0.9744, "step": 541 }, { "epoch": 0.032365938134479875, "grad_norm": 2.1643126010894775, "learning_rate": 8.480107711956149e-06, "loss": 1.0056, "step": 542 }, { "epoch": 0.03242565388749552, "grad_norm": 2.7954721450805664, "learning_rate": 8.482590774526568e-06, "loss": 0.9773, "step": 543 }, { "epoch": 0.032485369640511165, "grad_norm": 2.066420078277588, "learning_rate": 8.485069268442025e-06, "loss": 1.0143, "step": 544 }, { "epoch": 0.03254508539352681, "grad_norm": 2.787114143371582, "learning_rate": 8.487543210483632e-06, "loss": 0.9952, "step": 545 }, { "epoch": 0.032604801146542454, "grad_norm": 2.8437230587005615, "learning_rate": 8.490012617340222e-06, "loss": 1.0095, "step": 546 }, { "epoch": 0.032664516899558106, "grad_norm": 2.0401101112365723, "learning_rate": 8.492477505609005e-06, "loss": 0.9752, "step": 547 }, { "epoch": 0.03272423265257375, "grad_norm": 2.9107444286346436, "learning_rate": 8.494937891796253e-06, "loss": 0.9648, "step": 548 }, { "epoch": 0.032783948405589396, "grad_norm": 1.909001111984253, "learning_rate": 8.497393792317955e-06, "loss": 0.9546, "step": 549 }, { "epoch": 0.03284366415860504, "grad_norm": 3.488208055496216, "learning_rate": 8.49984522350048e-06, "loss": 0.9658, "step": 550 }, { "epoch": 0.032903379911620685, "grad_norm": 2.2848711013793945, "learning_rate": 8.50229220158122e-06, "loss": 1.0059, "step": 551 }, { "epoch": 0.03296309566463633, "grad_norm": 3.822787046432495, "learning_rate": 8.504734742709244e-06, "loss": 0.998, "step": 552 }, { "epoch": 0.033022811417651975, "grad_norm": 2.4269731044769287, "learning_rate": 8.507172862945934e-06, "loss": 0.9839, "step": 553 }, { "epoch": 0.03308252717066762, "grad_norm": 2.876436233520508, "learning_rate": 8.509606578265614e-06, "loss": 1.0231, "step": 554 }, { "epoch": 0.03314224292368327, "grad_norm": 2.9473342895507812, "learning_rate": 8.512035904556188e-06, "loss": 0.9927, "step": 555 }, { "epoch": 0.033201958676698916, "grad_norm": 2.8768765926361084, "learning_rate": 8.514460857619751e-06, "loss": 1.0282, "step": 556 }, { "epoch": 0.03326167442971456, "grad_norm": 2.474780321121216, "learning_rate": 8.516881453173216e-06, "loss": 0.9852, "step": 557 }, { "epoch": 0.033321390182730205, "grad_norm": 2.7827835083007812, "learning_rate": 8.519297706848918e-06, "loss": 1.0017, "step": 558 }, { "epoch": 0.03338110593574585, "grad_norm": 3.105313777923584, "learning_rate": 8.521709634195224e-06, "loss": 0.9753, "step": 559 }, { "epoch": 0.033440821688761495, "grad_norm": 3.017817258834839, "learning_rate": 8.524117250677129e-06, "loss": 1.0179, "step": 560 }, { "epoch": 0.03350053744177714, "grad_norm": 2.5324344635009766, "learning_rate": 8.526520571676858e-06, "loss": 0.9909, "step": 561 }, { "epoch": 0.033560253194792784, "grad_norm": 2.288086175918579, "learning_rate": 8.528919612494448e-06, "loss": 1.0111, "step": 562 }, { "epoch": 0.03361996894780843, "grad_norm": 3.3001222610473633, "learning_rate": 8.531314388348336e-06, "loss": 1.0045, "step": 563 }, { "epoch": 0.03367968470082408, "grad_norm": 2.6128640174865723, "learning_rate": 8.53370491437594e-06, "loss": 0.9767, "step": 564 }, { "epoch": 0.033739400453839725, "grad_norm": 2.3834519386291504, "learning_rate": 8.536091205634224e-06, "loss": 0.9736, "step": 565 }, { "epoch": 0.03379911620685537, "grad_norm": 2.4405932426452637, "learning_rate": 8.538473277100285e-06, "loss": 0.9598, "step": 566 }, { "epoch": 0.033858831959871015, "grad_norm": 1.967455267906189, "learning_rate": 8.540851143671898e-06, "loss": 0.9611, "step": 567 }, { "epoch": 0.03391854771288666, "grad_norm": 2.0312681198120117, "learning_rate": 8.543224820168088e-06, "loss": 0.9783, "step": 568 }, { "epoch": 0.033978263465902304, "grad_norm": 2.0535428524017334, "learning_rate": 8.545594321329675e-06, "loss": 1.0273, "step": 569 }, { "epoch": 0.03403797921891795, "grad_norm": 2.539078712463379, "learning_rate": 8.547959661819832e-06, "loss": 0.9443, "step": 570 }, { "epoch": 0.034097694971933594, "grad_norm": 2.629000663757324, "learning_rate": 8.55032085622462e-06, "loss": 0.9854, "step": 571 }, { "epoch": 0.03415741072494924, "grad_norm": 2.819732904434204, "learning_rate": 8.552677919053535e-06, "loss": 0.9636, "step": 572 }, { "epoch": 0.03421712647796489, "grad_norm": 4.769621849060059, "learning_rate": 8.555030864740043e-06, "loss": 0.9558, "step": 573 }, { "epoch": 0.034276842230980535, "grad_norm": 2.35378098487854, "learning_rate": 8.557379707642105e-06, "loss": 0.9752, "step": 574 }, { "epoch": 0.03433655798399618, "grad_norm": 3.133464813232422, "learning_rate": 8.559724462042706e-06, "loss": 0.9968, "step": 575 }, { "epoch": 0.034396273737011825, "grad_norm": 2.6676552295684814, "learning_rate": 8.562065142150377e-06, "loss": 0.9647, "step": 576 }, { "epoch": 0.03445598949002747, "grad_norm": 2.570261001586914, "learning_rate": 8.564401762099708e-06, "loss": 0.9818, "step": 577 }, { "epoch": 0.034515705243043114, "grad_norm": 4.245890140533447, "learning_rate": 8.566734335951866e-06, "loss": 0.9697, "step": 578 }, { "epoch": 0.03457542099605876, "grad_norm": 2.4430272579193115, "learning_rate": 8.569062877695096e-06, "loss": 0.9948, "step": 579 }, { "epoch": 0.034635136749074404, "grad_norm": 2.75565767288208, "learning_rate": 8.571387401245226e-06, "loss": 0.9811, "step": 580 }, { "epoch": 0.03469485250209005, "grad_norm": 2.055065393447876, "learning_rate": 8.573707920446175e-06, "loss": 0.9906, "step": 581 }, { "epoch": 0.0347545682551057, "grad_norm": 3.324329376220703, "learning_rate": 8.576024449070434e-06, "loss": 1.0014, "step": 582 }, { "epoch": 0.034814284008121345, "grad_norm": 2.006458044052124, "learning_rate": 8.578337000819562e-06, "loss": 1.0163, "step": 583 }, { "epoch": 0.03487399976113699, "grad_norm": 2.90090274810791, "learning_rate": 8.580645589324676e-06, "loss": 0.9297, "step": 584 }, { "epoch": 0.034933715514152634, "grad_norm": 1.91677987575531, "learning_rate": 8.58295022814693e-06, "loss": 1.0216, "step": 585 }, { "epoch": 0.03499343126716828, "grad_norm": 2.775322675704956, "learning_rate": 8.585250930777989e-06, "loss": 0.9819, "step": 586 }, { "epoch": 0.035053147020183924, "grad_norm": 2.649601697921753, "learning_rate": 8.587547710640514e-06, "loss": 0.9806, "step": 587 }, { "epoch": 0.03511286277319957, "grad_norm": 2.3494436740875244, "learning_rate": 8.589840581088615e-06, "loss": 1.0042, "step": 588 }, { "epoch": 0.03517257852621521, "grad_norm": 2.0728681087493896, "learning_rate": 8.592129555408336e-06, "loss": 0.9923, "step": 589 }, { "epoch": 0.03523229427923086, "grad_norm": 2.735191822052002, "learning_rate": 8.594414646818101e-06, "loss": 0.9669, "step": 590 }, { "epoch": 0.03529201003224651, "grad_norm": 4.787722587585449, "learning_rate": 8.596695868469184e-06, "loss": 0.9881, "step": 591 }, { "epoch": 0.035351725785262154, "grad_norm": 4.334989547729492, "learning_rate": 8.59897323344615e-06, "loss": 1.0026, "step": 592 }, { "epoch": 0.0354114415382778, "grad_norm": 2.9560718536376953, "learning_rate": 8.601246754767323e-06, "loss": 1.0279, "step": 593 }, { "epoch": 0.035471157291293444, "grad_norm": 3.288362741470337, "learning_rate": 8.603516445385211e-06, "loss": 0.9426, "step": 594 }, { "epoch": 0.03553087304430909, "grad_norm": 1.9483168125152588, "learning_rate": 8.605782318186972e-06, "loss": 0.9389, "step": 595 }, { "epoch": 0.03559058879732473, "grad_norm": 4.391626358032227, "learning_rate": 8.60804438599483e-06, "loss": 1.0041, "step": 596 }, { "epoch": 0.03565030455034038, "grad_norm": 2.6372108459472656, "learning_rate": 8.610302661566532e-06, "loss": 1.0214, "step": 597 }, { "epoch": 0.03571002030335602, "grad_norm": 3.0768442153930664, "learning_rate": 8.61255715759576e-06, "loss": 1.0155, "step": 598 }, { "epoch": 0.03576973605637167, "grad_norm": 2.127523422241211, "learning_rate": 8.614807886712578e-06, "loss": 0.9936, "step": 599 }, { "epoch": 0.03582945180938732, "grad_norm": 3.4810900688171387, "learning_rate": 8.617054861483838e-06, "loss": 0.9383, "step": 600 }, { "epoch": 0.03582945180938732, "eval_text_loss": 0.9976902008056641, "eval_text_runtime": 15.2266, "eval_text_samples_per_second": 262.697, "eval_text_steps_per_second": 0.525, "step": 600 }, { "epoch": 0.03582945180938732, "eval_image_loss": 0.7534788846969604, "eval_image_runtime": 5.2536, "eval_image_samples_per_second": 761.38, "eval_image_steps_per_second": 1.523, "step": 600 }, { "epoch": 0.03582945180938732, "eval_video_loss": 1.232935905456543, "eval_video_runtime": 76.6192, "eval_video_samples_per_second": 52.206, "eval_video_steps_per_second": 0.104, "step": 600 }, { "epoch": 0.035889167562402964, "grad_norm": 2.518705368041992, "learning_rate": 8.619298094413615e-06, "loss": 0.9701, "step": 601 }, { "epoch": 0.03594888331541861, "grad_norm": 2.948274850845337, "learning_rate": 8.621537597943617e-06, "loss": 0.9802, "step": 602 }, { "epoch": 0.036008599068434254, "grad_norm": 3.4024806022644043, "learning_rate": 8.623773384453606e-06, "loss": 0.996, "step": 603 }, { "epoch": 0.0360683148214499, "grad_norm": 2.644775390625, "learning_rate": 8.626005466261797e-06, "loss": 1.0331, "step": 604 }, { "epoch": 0.03612803057446554, "grad_norm": 2.4013051986694336, "learning_rate": 8.628233855625278e-06, "loss": 1.0016, "step": 605 }, { "epoch": 0.03618774632748119, "grad_norm": 2.4091379642486572, "learning_rate": 8.630458564740407e-06, "loss": 0.9823, "step": 606 }, { "epoch": 0.03624746208049683, "grad_norm": 4.709594249725342, "learning_rate": 8.632679605743214e-06, "loss": 1.025, "step": 607 }, { "epoch": 0.03630717783351248, "grad_norm": 2.6972291469573975, "learning_rate": 8.634896990709795e-06, "loss": 0.9887, "step": 608 }, { "epoch": 0.03636689358652813, "grad_norm": 3.1728968620300293, "learning_rate": 8.637110731656712e-06, "loss": 1.0224, "step": 609 }, { "epoch": 0.036426609339543774, "grad_norm": 2.4727485179901123, "learning_rate": 8.63932084054138e-06, "loss": 1.0074, "step": 610 }, { "epoch": 0.03648632509255942, "grad_norm": 2.8803155422210693, "learning_rate": 8.641527329262455e-06, "loss": 1.0064, "step": 611 }, { "epoch": 0.03654604084557506, "grad_norm": 2.813969135284424, "learning_rate": 8.643730209660218e-06, "loss": 1.0031, "step": 612 }, { "epoch": 0.03660575659859071, "grad_norm": 4.069641590118408, "learning_rate": 8.645929493516956e-06, "loss": 0.9677, "step": 613 }, { "epoch": 0.03666547235160635, "grad_norm": 2.3964812755584717, "learning_rate": 8.648125192557343e-06, "loss": 0.972, "step": 614 }, { "epoch": 0.036725188104622, "grad_norm": 2.1857728958129883, "learning_rate": 8.650317318448812e-06, "loss": 1.0023, "step": 615 }, { "epoch": 0.03678490385763764, "grad_norm": 2.4438772201538086, "learning_rate": 8.652505882801928e-06, "loss": 1.0026, "step": 616 }, { "epoch": 0.03684461961065329, "grad_norm": 2.08296799659729, "learning_rate": 8.654690897170755e-06, "loss": 1.0012, "step": 617 }, { "epoch": 0.03690433536366894, "grad_norm": 2.4553956985473633, "learning_rate": 8.656872373053227e-06, "loss": 0.9891, "step": 618 }, { "epoch": 0.03696405111668458, "grad_norm": 2.8117258548736572, "learning_rate": 8.659050321891505e-06, "loss": 1.0413, "step": 619 }, { "epoch": 0.03702376686970023, "grad_norm": 2.5321261882781982, "learning_rate": 8.661224755072342e-06, "loss": 0.9888, "step": 620 }, { "epoch": 0.03708348262271587, "grad_norm": 2.401458740234375, "learning_rate": 8.663395683927438e-06, "loss": 1.0169, "step": 621 }, { "epoch": 0.03714319837573152, "grad_norm": 2.31233286857605, "learning_rate": 8.665563119733792e-06, "loss": 0.981, "step": 622 }, { "epoch": 0.03720291412874716, "grad_norm": 4.575963020324707, "learning_rate": 8.66772707371406e-06, "loss": 0.9557, "step": 623 }, { "epoch": 0.03726262988176281, "grad_norm": 2.2125275135040283, "learning_rate": 8.669887557036894e-06, "loss": 0.957, "step": 624 }, { "epoch": 0.03732234563477845, "grad_norm": 2.22698974609375, "learning_rate": 8.672044580817298e-06, "loss": 0.9794, "step": 625 }, { "epoch": 0.037382061387794104, "grad_norm": 5.3504414558410645, "learning_rate": 8.67419815611697e-06, "loss": 0.9713, "step": 626 }, { "epoch": 0.03744177714080975, "grad_norm": 2.6838252544403076, "learning_rate": 8.676348293944629e-06, "loss": 1.0006, "step": 627 }, { "epoch": 0.03750149289382539, "grad_norm": 3.1517152786254883, "learning_rate": 8.678495005256371e-06, "loss": 0.9405, "step": 628 }, { "epoch": 0.03756120864684104, "grad_norm": 3.678020715713501, "learning_rate": 8.680638300955992e-06, "loss": 1.0257, "step": 629 }, { "epoch": 0.03762092439985668, "grad_norm": 2.767587184906006, "learning_rate": 8.682778191895323e-06, "loss": 0.9671, "step": 630 }, { "epoch": 0.03768064015287233, "grad_norm": 2.4870545864105225, "learning_rate": 8.684914688874564e-06, "loss": 0.9637, "step": 631 }, { "epoch": 0.03774035590588797, "grad_norm": 3.6058225631713867, "learning_rate": 8.687047802642605e-06, "loss": 0.9979, "step": 632 }, { "epoch": 0.03780007165890362, "grad_norm": 3.1595208644866943, "learning_rate": 8.689177543897352e-06, "loss": 1.015, "step": 633 }, { "epoch": 0.03785978741191926, "grad_norm": 2.152146339416504, "learning_rate": 8.691303923286056e-06, "loss": 1.0144, "step": 634 }, { "epoch": 0.03791950316493491, "grad_norm": 2.3754711151123047, "learning_rate": 8.693426951405617e-06, "loss": 0.9836, "step": 635 }, { "epoch": 0.03797921891795056, "grad_norm": 3.537501335144043, "learning_rate": 8.69554663880292e-06, "loss": 0.9824, "step": 636 }, { "epoch": 0.0380389346709662, "grad_norm": 4.447989463806152, "learning_rate": 8.697662995975132e-06, "loss": 1.0392, "step": 637 }, { "epoch": 0.03809865042398185, "grad_norm": 3.2873780727386475, "learning_rate": 8.699776033370024e-06, "loss": 0.9638, "step": 638 }, { "epoch": 0.03815836617699749, "grad_norm": 2.5640757083892822, "learning_rate": 8.701885761386281e-06, "loss": 0.9935, "step": 639 }, { "epoch": 0.03821808193001314, "grad_norm": 3.087899923324585, "learning_rate": 8.7039921903738e-06, "loss": 0.9798, "step": 640 }, { "epoch": 0.03827779768302878, "grad_norm": 2.6570358276367188, "learning_rate": 8.706095330634008e-06, "loss": 1.0136, "step": 641 }, { "epoch": 0.038337513436044426, "grad_norm": 4.44503116607666, "learning_rate": 8.70819519242015e-06, "loss": 1.0038, "step": 642 }, { "epoch": 0.03839722918906007, "grad_norm": 3.275744915008545, "learning_rate": 8.7102917859376e-06, "loss": 0.9675, "step": 643 }, { "epoch": 0.03845694494207572, "grad_norm": 1.875731348991394, "learning_rate": 8.712385121344155e-06, "loss": 1.018, "step": 644 }, { "epoch": 0.03851666069509137, "grad_norm": 2.892348527908325, "learning_rate": 8.714475208750326e-06, "loss": 1.0228, "step": 645 }, { "epoch": 0.03857637644810701, "grad_norm": 2.4299073219299316, "learning_rate": 8.716562058219636e-06, "loss": 0.9965, "step": 646 }, { "epoch": 0.03863609220112266, "grad_norm": 2.2295377254486084, "learning_rate": 8.718645679768912e-06, "loss": 0.9987, "step": 647 }, { "epoch": 0.0386958079541383, "grad_norm": 5.1300530433654785, "learning_rate": 8.72072608336857e-06, "loss": 0.9888, "step": 648 }, { "epoch": 0.03875552370715395, "grad_norm": 2.044740915298462, "learning_rate": 8.7228032789429e-06, "loss": 0.9919, "step": 649 }, { "epoch": 0.03881523946016959, "grad_norm": 3.495175361633301, "learning_rate": 8.724877276370354e-06, "loss": 0.9807, "step": 650 }, { "epoch": 0.038874955213185236, "grad_norm": 4.404551029205322, "learning_rate": 8.726948085483828e-06, "loss": 1.0033, "step": 651 }, { "epoch": 0.03893467096620088, "grad_norm": 2.1326987743377686, "learning_rate": 8.729015716070935e-06, "loss": 0.9573, "step": 652 }, { "epoch": 0.03899438671921653, "grad_norm": 2.3324410915374756, "learning_rate": 8.731080177874292e-06, "loss": 0.9847, "step": 653 }, { "epoch": 0.03905410247223218, "grad_norm": 2.4370675086975098, "learning_rate": 8.73314148059179e-06, "loss": 0.9805, "step": 654 }, { "epoch": 0.03911381822524782, "grad_norm": 3.7332046031951904, "learning_rate": 8.735199633876863e-06, "loss": 0.9747, "step": 655 }, { "epoch": 0.03917353397826347, "grad_norm": 6.57012939453125, "learning_rate": 8.737254647338775e-06, "loss": 1.0169, "step": 656 }, { "epoch": 0.03923324973127911, "grad_norm": 2.566936731338501, "learning_rate": 8.739306530542869e-06, "loss": 0.9678, "step": 657 }, { "epoch": 0.039292965484294756, "grad_norm": 2.512371301651001, "learning_rate": 8.741355293010847e-06, "loss": 1.0114, "step": 658 }, { "epoch": 0.0393526812373104, "grad_norm": 3.0091445446014404, "learning_rate": 8.743400944221035e-06, "loss": 1.0069, "step": 659 }, { "epoch": 0.039412396990326046, "grad_norm": 2.722748041152954, "learning_rate": 8.745443493608635e-06, "loss": 0.9785, "step": 660 }, { "epoch": 0.03947211274334169, "grad_norm": 2.937460422515869, "learning_rate": 8.747482950566004e-06, "loss": 0.9816, "step": 661 }, { "epoch": 0.03953182849635734, "grad_norm": 3.944471836090088, "learning_rate": 8.7495193244429e-06, "loss": 0.9913, "step": 662 }, { "epoch": 0.03959154424937299, "grad_norm": 1.824366569519043, "learning_rate": 8.751552624546735e-06, "loss": 0.9706, "step": 663 }, { "epoch": 0.03965126000238863, "grad_norm": 2.68739914894104, "learning_rate": 8.753582860142847e-06, "loss": 0.9796, "step": 664 }, { "epoch": 0.039710975755404276, "grad_norm": 2.6278557777404785, "learning_rate": 8.755610040454741e-06, "loss": 0.9635, "step": 665 }, { "epoch": 0.03977069150841992, "grad_norm": 2.4711337089538574, "learning_rate": 8.757634174664344e-06, "loss": 0.9437, "step": 666 }, { "epoch": 0.039830407261435566, "grad_norm": 3.0002641677856445, "learning_rate": 8.75965527191225e-06, "loss": 0.9998, "step": 667 }, { "epoch": 0.03989012301445121, "grad_norm": 2.5121090412139893, "learning_rate": 8.76167334129798e-06, "loss": 1.0088, "step": 668 }, { "epoch": 0.039949838767466855, "grad_norm": 4.73759651184082, "learning_rate": 8.763688391880209e-06, "loss": 0.9914, "step": 669 }, { "epoch": 0.0400095545204825, "grad_norm": 2.4189326763153076, "learning_rate": 8.76570043267703e-06, "loss": 1.0225, "step": 670 }, { "epoch": 0.04006927027349815, "grad_norm": 2.605733871459961, "learning_rate": 8.767709472666177e-06, "loss": 0.974, "step": 671 }, { "epoch": 0.0401289860265138, "grad_norm": 2.459507703781128, "learning_rate": 8.769715520785286e-06, "loss": 0.9281, "step": 672 }, { "epoch": 0.04018870177952944, "grad_norm": 3.35530424118042, "learning_rate": 8.771718585932114e-06, "loss": 1.0167, "step": 673 }, { "epoch": 0.040248417532545086, "grad_norm": 2.5841143131256104, "learning_rate": 8.773718676964786e-06, "loss": 1.0227, "step": 674 }, { "epoch": 0.04030813328556073, "grad_norm": 2.1522419452667236, "learning_rate": 8.77571580270203e-06, "loss": 1.0269, "step": 675 }, { "epoch": 0.040367849038576376, "grad_norm": 3.170125722885132, "learning_rate": 8.77770997192341e-06, "loss": 0.9538, "step": 676 }, { "epoch": 0.04042756479159202, "grad_norm": 3.135302782058716, "learning_rate": 8.77970119336955e-06, "loss": 0.9746, "step": 677 }, { "epoch": 0.040487280544607665, "grad_norm": 4.002425193786621, "learning_rate": 8.78168947574238e-06, "loss": 0.9486, "step": 678 }, { "epoch": 0.04054699629762331, "grad_norm": 3.799137830734253, "learning_rate": 8.783674827705344e-06, "loss": 0.9546, "step": 679 }, { "epoch": 0.04060671205063896, "grad_norm": 2.376836061477661, "learning_rate": 8.785657257883642e-06, "loss": 0.9741, "step": 680 }, { "epoch": 0.040666427803654606, "grad_norm": 2.04787015914917, "learning_rate": 8.787636774864448e-06, "loss": 0.9763, "step": 681 }, { "epoch": 0.04072614355667025, "grad_norm": 2.9649362564086914, "learning_rate": 8.789613387197141e-06, "loss": 0.9933, "step": 682 }, { "epoch": 0.040785859309685896, "grad_norm": 3.1383228302001953, "learning_rate": 8.791587103393509e-06, "loss": 0.9754, "step": 683 }, { "epoch": 0.04084557506270154, "grad_norm": 2.2213938236236572, "learning_rate": 8.793557931927987e-06, "loss": 0.97, "step": 684 }, { "epoch": 0.040905290815717185, "grad_norm": 2.333754539489746, "learning_rate": 8.795525881237871e-06, "loss": 0.9181, "step": 685 }, { "epoch": 0.04096500656873283, "grad_norm": 2.7148966789245605, "learning_rate": 8.797490959723525e-06, "loss": 1.0457, "step": 686 }, { "epoch": 0.041024722321748475, "grad_norm": 3.00529146194458, "learning_rate": 8.79945317574861e-06, "loss": 0.9932, "step": 687 }, { "epoch": 0.04108443807476412, "grad_norm": 1.9826706647872925, "learning_rate": 8.801412537640288e-06, "loss": 0.9707, "step": 688 }, { "epoch": 0.04114415382777977, "grad_norm": 5.461206436157227, "learning_rate": 8.803369053689437e-06, "loss": 0.9697, "step": 689 }, { "epoch": 0.041203869580795416, "grad_norm": 2.302966594696045, "learning_rate": 8.805322732150862e-06, "loss": 0.9859, "step": 690 }, { "epoch": 0.04126358533381106, "grad_norm": 2.268347978591919, "learning_rate": 8.807273581243504e-06, "loss": 0.9934, "step": 691 }, { "epoch": 0.041323301086826705, "grad_norm": 2.211693048477173, "learning_rate": 8.809221609150645e-06, "loss": 0.9937, "step": 692 }, { "epoch": 0.04138301683984235, "grad_norm": 5.459770202636719, "learning_rate": 8.81116682402012e-06, "loss": 0.9922, "step": 693 }, { "epoch": 0.041442732592857995, "grad_norm": 3.3929593563079834, "learning_rate": 8.813109233964513e-06, "loss": 1.0207, "step": 694 }, { "epoch": 0.04150244834587364, "grad_norm": 2.0100908279418945, "learning_rate": 8.815048847061368e-06, "loss": 0.9793, "step": 695 }, { "epoch": 0.041562164098889284, "grad_norm": 2.911569356918335, "learning_rate": 8.816985671353384e-06, "loss": 0.9548, "step": 696 }, { "epoch": 0.04162187985190493, "grad_norm": 3.2982544898986816, "learning_rate": 8.818919714848617e-06, "loss": 0.9896, "step": 697 }, { "epoch": 0.04168159560492058, "grad_norm": 3.996865749359131, "learning_rate": 8.820850985520685e-06, "loss": 1.0034, "step": 698 }, { "epoch": 0.041741311357936225, "grad_norm": 2.4150424003601074, "learning_rate": 8.822779491308954e-06, "loss": 0.9903, "step": 699 }, { "epoch": 0.04180102711095187, "grad_norm": 3.057051420211792, "learning_rate": 8.824705240118747e-06, "loss": 0.9799, "step": 700 }, { "epoch": 0.041860742863967515, "grad_norm": 3.4571197032928467, "learning_rate": 8.826628239821524e-06, "loss": 0.9754, "step": 701 }, { "epoch": 0.04192045861698316, "grad_norm": 2.081845760345459, "learning_rate": 8.828548498255085e-06, "loss": 1.0089, "step": 702 }, { "epoch": 0.041980174369998804, "grad_norm": 3.626558542251587, "learning_rate": 8.830466023223761e-06, "loss": 1.0401, "step": 703 }, { "epoch": 0.04203989012301445, "grad_norm": 7.9053144454956055, "learning_rate": 8.832380822498598e-06, "loss": 1.033, "step": 704 }, { "epoch": 0.042099605876030094, "grad_norm": 2.606050491333008, "learning_rate": 8.834292903817556e-06, "loss": 0.9615, "step": 705 }, { "epoch": 0.042159321629045746, "grad_norm": 3.3900890350341797, "learning_rate": 8.836202274885684e-06, "loss": 1.0368, "step": 706 }, { "epoch": 0.04221903738206139, "grad_norm": 3.885801076889038, "learning_rate": 8.838108943375318e-06, "loss": 1.0392, "step": 707 }, { "epoch": 0.042278753135077035, "grad_norm": 2.7427780628204346, "learning_rate": 8.840012916926257e-06, "loss": 0.9565, "step": 708 }, { "epoch": 0.04233846888809268, "grad_norm": 2.598067045211792, "learning_rate": 8.841914203145959e-06, "loss": 0.9514, "step": 709 }, { "epoch": 0.042398184641108325, "grad_norm": 2.253462314605713, "learning_rate": 8.843812809609705e-06, "loss": 1.001, "step": 710 }, { "epoch": 0.04245790039412397, "grad_norm": 2.0500874519348145, "learning_rate": 8.845708743860796e-06, "loss": 0.9589, "step": 711 }, { "epoch": 0.042517616147139614, "grad_norm": 8.783397674560547, "learning_rate": 8.847602013410729e-06, "loss": 1.0065, "step": 712 }, { "epoch": 0.04257733190015526, "grad_norm": 1.9075433015823364, "learning_rate": 8.849492625739366e-06, "loss": 0.9609, "step": 713 }, { "epoch": 0.042637047653170904, "grad_norm": 3.118518829345703, "learning_rate": 8.851380588295126e-06, "loss": 0.9744, "step": 714 }, { "epoch": 0.042696763406186555, "grad_norm": 2.952989339828491, "learning_rate": 8.853265908495152e-06, "loss": 0.9762, "step": 715 }, { "epoch": 0.0427564791592022, "grad_norm": 4.32663631439209, "learning_rate": 8.855148593725487e-06, "loss": 0.9755, "step": 716 }, { "epoch": 0.042816194912217845, "grad_norm": 2.047314167022705, "learning_rate": 8.857028651341247e-06, "loss": 1.0147, "step": 717 }, { "epoch": 0.04287591066523349, "grad_norm": 3.9560232162475586, "learning_rate": 8.858906088666798e-06, "loss": 0.978, "step": 718 }, { "epoch": 0.042935626418249134, "grad_norm": 2.98012113571167, "learning_rate": 8.860780912995925e-06, "loss": 1.0278, "step": 719 }, { "epoch": 0.04299534217126478, "grad_norm": 3.4598562717437744, "learning_rate": 8.862653131591993e-06, "loss": 0.965, "step": 720 }, { "epoch": 0.043055057924280424, "grad_norm": 6.046879291534424, "learning_rate": 8.864522751688137e-06, "loss": 0.9815, "step": 721 }, { "epoch": 0.04311477367729607, "grad_norm": 1.8771892786026, "learning_rate": 8.866389780487405e-06, "loss": 0.9812, "step": 722 }, { "epoch": 0.04317448943031171, "grad_norm": 2.472683906555176, "learning_rate": 8.868254225162942e-06, "loss": 0.9657, "step": 723 }, { "epoch": 0.043234205183327365, "grad_norm": 2.439958333969116, "learning_rate": 8.870116092858147e-06, "loss": 1.0301, "step": 724 }, { "epoch": 0.04329392093634301, "grad_norm": 2.3849401473999023, "learning_rate": 8.871975390686844e-06, "loss": 0.9574, "step": 725 }, { "epoch": 0.043353636689358654, "grad_norm": 2.2923457622528076, "learning_rate": 8.873832125733434e-06, "loss": 0.9771, "step": 726 }, { "epoch": 0.0434133524423743, "grad_norm": 4.130980014801025, "learning_rate": 8.875686305053067e-06, "loss": 0.9583, "step": 727 }, { "epoch": 0.043473068195389944, "grad_norm": 4.277248859405518, "learning_rate": 8.877537935671803e-06, "loss": 0.9606, "step": 728 }, { "epoch": 0.04353278394840559, "grad_norm": 3.733525514602661, "learning_rate": 8.879387024586762e-06, "loss": 0.9577, "step": 729 }, { "epoch": 0.04359249970142123, "grad_norm": 2.1589226722717285, "learning_rate": 8.881233578766294e-06, "loss": 0.981, "step": 730 }, { "epoch": 0.04365221545443688, "grad_norm": 2.1718368530273438, "learning_rate": 8.883077605150128e-06, "loss": 0.9603, "step": 731 }, { "epoch": 0.04371193120745252, "grad_norm": 2.904508352279663, "learning_rate": 8.884919110649536e-06, "loss": 0.9654, "step": 732 }, { "epoch": 0.043771646960468175, "grad_norm": 2.6464998722076416, "learning_rate": 8.88675810214748e-06, "loss": 0.9862, "step": 733 }, { "epoch": 0.04383136271348382, "grad_norm": 3.124312162399292, "learning_rate": 8.888594586498774e-06, "loss": 0.9796, "step": 734 }, { "epoch": 0.043891078466499464, "grad_norm": 4.365747928619385, "learning_rate": 8.890428570530233e-06, "loss": 1.028, "step": 735 }, { "epoch": 0.04395079421951511, "grad_norm": 2.943941354751587, "learning_rate": 8.892260061040826e-06, "loss": 0.9662, "step": 736 }, { "epoch": 0.044010509972530754, "grad_norm": 2.199309825897217, "learning_rate": 8.894089064801827e-06, "loss": 0.9512, "step": 737 }, { "epoch": 0.0440702257255464, "grad_norm": 2.6392436027526855, "learning_rate": 8.895915588556967e-06, "loss": 0.9957, "step": 738 }, { "epoch": 0.04412994147856204, "grad_norm": 2.6318507194519043, "learning_rate": 8.897739639022585e-06, "loss": 0.9736, "step": 739 }, { "epoch": 0.04418965723157769, "grad_norm": 2.9821219444274902, "learning_rate": 8.899561222887767e-06, "loss": 0.9952, "step": 740 }, { "epoch": 0.04424937298459333, "grad_norm": 2.2599985599517822, "learning_rate": 8.901380346814504e-06, "loss": 0.9991, "step": 741 }, { "epoch": 0.044309088737608984, "grad_norm": 2.719639778137207, "learning_rate": 8.90319701743783e-06, "loss": 0.9995, "step": 742 }, { "epoch": 0.04436880449062463, "grad_norm": 1.757688283920288, "learning_rate": 8.905011241365974e-06, "loss": 0.9601, "step": 743 }, { "epoch": 0.044428520243640274, "grad_norm": 3.33503794670105, "learning_rate": 8.906823025180498e-06, "loss": 1.0094, "step": 744 }, { "epoch": 0.04448823599665592, "grad_norm": 3.337151050567627, "learning_rate": 8.908632375436448e-06, "loss": 0.969, "step": 745 }, { "epoch": 0.04454795174967156, "grad_norm": 2.1429412364959717, "learning_rate": 8.910439298662486e-06, "loss": 1.0013, "step": 746 }, { "epoch": 0.04460766750268721, "grad_norm": 2.071488380432129, "learning_rate": 8.91224380136104e-06, "loss": 0.9494, "step": 747 }, { "epoch": 0.04466738325570285, "grad_norm": 2.697925329208374, "learning_rate": 8.91404589000844e-06, "loss": 0.9489, "step": 748 }, { "epoch": 0.0447270990087185, "grad_norm": 3.381443738937378, "learning_rate": 8.91584557105506e-06, "loss": 0.9884, "step": 749 }, { "epoch": 0.04478681476173414, "grad_norm": 1.9096513986587524, "learning_rate": 8.917642850925456e-06, "loss": 0.9971, "step": 750 }, { "epoch": 0.044846530514749794, "grad_norm": 3.4996845722198486, "learning_rate": 8.919437736018498e-06, "loss": 0.9617, "step": 751 }, { "epoch": 0.04490624626776544, "grad_norm": 1.9980181455612183, "learning_rate": 8.921230232707519e-06, "loss": 0.9806, "step": 752 }, { "epoch": 0.04496596202078108, "grad_norm": 4.404122352600098, "learning_rate": 8.923020347340436e-06, "loss": 0.9903, "step": 753 }, { "epoch": 0.04502567777379673, "grad_norm": 6.002746105194092, "learning_rate": 8.9248080862399e-06, "loss": 0.9765, "step": 754 }, { "epoch": 0.04508539352681237, "grad_norm": 2.2380974292755127, "learning_rate": 8.926593455703413e-06, "loss": 0.9757, "step": 755 }, { "epoch": 0.04514510927982802, "grad_norm": 2.2632992267608643, "learning_rate": 8.928376462003479e-06, "loss": 0.9615, "step": 756 }, { "epoch": 0.04520482503284366, "grad_norm": 2.0294721126556396, "learning_rate": 8.930157111387721e-06, "loss": 0.977, "step": 757 }, { "epoch": 0.04526454078585931, "grad_norm": 4.09246826171875, "learning_rate": 8.931935410079025e-06, "loss": 0.9342, "step": 758 }, { "epoch": 0.04532425653887495, "grad_norm": 3.2542624473571777, "learning_rate": 8.93371136427566e-06, "loss": 1.0074, "step": 759 }, { "epoch": 0.045383972291890604, "grad_norm": 2.487407684326172, "learning_rate": 8.935484980151413e-06, "loss": 0.9208, "step": 760 }, { "epoch": 0.04544368804490625, "grad_norm": 2.9981188774108887, "learning_rate": 8.937256263855714e-06, "loss": 0.978, "step": 761 }, { "epoch": 0.04550340379792189, "grad_norm": 1.8665274381637573, "learning_rate": 8.939025221513775e-06, "loss": 0.9759, "step": 762 }, { "epoch": 0.04556311955093754, "grad_norm": 8.25656509399414, "learning_rate": 8.9407918592267e-06, "loss": 0.9998, "step": 763 }, { "epoch": 0.04562283530395318, "grad_norm": 2.3517508506774902, "learning_rate": 8.942556183071624e-06, "loss": 0.9904, "step": 764 }, { "epoch": 0.04568255105696883, "grad_norm": 3.9162189960479736, "learning_rate": 8.944318199101834e-06, "loss": 1.003, "step": 765 }, { "epoch": 0.04574226680998447, "grad_norm": 4.084987163543701, "learning_rate": 8.946077913346897e-06, "loss": 0.9899, "step": 766 }, { "epoch": 0.04580198256300012, "grad_norm": 2.0034313201904297, "learning_rate": 8.947835331812773e-06, "loss": 0.9964, "step": 767 }, { "epoch": 0.04586169831601576, "grad_norm": 2.188553810119629, "learning_rate": 8.949590460481958e-06, "loss": 0.9809, "step": 768 }, { "epoch": 0.04592141406903141, "grad_norm": 2.402583599090576, "learning_rate": 8.95134330531358e-06, "loss": 0.969, "step": 769 }, { "epoch": 0.04598112982204706, "grad_norm": 2.268843650817871, "learning_rate": 8.953093872243545e-06, "loss": 0.9802, "step": 770 }, { "epoch": 0.0460408455750627, "grad_norm": 2.187793016433716, "learning_rate": 8.954842167184643e-06, "loss": 0.9699, "step": 771 }, { "epoch": 0.04610056132807835, "grad_norm": 3.4921305179595947, "learning_rate": 8.956588196026675e-06, "loss": 0.9668, "step": 772 }, { "epoch": 0.04616027708109399, "grad_norm": 2.011528491973877, "learning_rate": 8.958331964636564e-06, "loss": 0.9727, "step": 773 }, { "epoch": 0.04621999283410964, "grad_norm": 3.222602605819702, "learning_rate": 8.960073478858481e-06, "loss": 0.9824, "step": 774 }, { "epoch": 0.04627970858712528, "grad_norm": 5.3612895011901855, "learning_rate": 8.96181274451396e-06, "loss": 0.9954, "step": 775 }, { "epoch": 0.046339424340140926, "grad_norm": 2.246774435043335, "learning_rate": 8.963549767402015e-06, "loss": 1.013, "step": 776 }, { "epoch": 0.04639914009315658, "grad_norm": 3.0282864570617676, "learning_rate": 8.965284553299253e-06, "loss": 0.9601, "step": 777 }, { "epoch": 0.04645885584617222, "grad_norm": 2.042227268218994, "learning_rate": 8.967017107959996e-06, "loss": 0.9804, "step": 778 }, { "epoch": 0.04651857159918787, "grad_norm": 3.539534330368042, "learning_rate": 8.968747437116387e-06, "loss": 0.9738, "step": 779 }, { "epoch": 0.04657828735220351, "grad_norm": 3.364802360534668, "learning_rate": 8.970475546478512e-06, "loss": 0.9775, "step": 780 }, { "epoch": 0.04663800310521916, "grad_norm": 2.16859769821167, "learning_rate": 8.972201441734504e-06, "loss": 0.9885, "step": 781 }, { "epoch": 0.0466977188582348, "grad_norm": 3.95060133934021, "learning_rate": 8.973925128550667e-06, "loss": 0.9421, "step": 782 }, { "epoch": 0.04675743461125045, "grad_norm": 2.207834243774414, "learning_rate": 8.975646612571575e-06, "loss": 0.9423, "step": 783 }, { "epoch": 0.04681715036426609, "grad_norm": 2.652540922164917, "learning_rate": 8.977365899420194e-06, "loss": 0.9897, "step": 784 }, { "epoch": 0.046876866117281736, "grad_norm": 6.287714004516602, "learning_rate": 8.979082994697987e-06, "loss": 0.9857, "step": 785 }, { "epoch": 0.04693658187029739, "grad_norm": 2.984370470046997, "learning_rate": 8.98079790398502e-06, "loss": 0.9631, "step": 786 }, { "epoch": 0.04699629762331303, "grad_norm": 2.1319282054901123, "learning_rate": 8.982510632840077e-06, "loss": 0.9631, "step": 787 }, { "epoch": 0.04705601337632868, "grad_norm": 2.2190325260162354, "learning_rate": 8.984221186800765e-06, "loss": 0.9774, "step": 788 }, { "epoch": 0.04711572912934432, "grad_norm": 2.5911667346954346, "learning_rate": 8.985929571383619e-06, "loss": 0.9597, "step": 789 }, { "epoch": 0.04717544488235997, "grad_norm": 4.204863548278809, "learning_rate": 8.987635792084223e-06, "loss": 1.0124, "step": 790 }, { "epoch": 0.04723516063537561, "grad_norm": 2.413734197616577, "learning_rate": 8.989339854377291e-06, "loss": 1.0094, "step": 791 }, { "epoch": 0.047294876388391256, "grad_norm": 3.404919147491455, "learning_rate": 8.991041763716792e-06, "loss": 0.9348, "step": 792 }, { "epoch": 0.0473545921414069, "grad_norm": 1.928868055343628, "learning_rate": 8.992741525536052e-06, "loss": 0.9707, "step": 793 }, { "epoch": 0.047414307894422546, "grad_norm": 3.0790939331054688, "learning_rate": 8.994439145247854e-06, "loss": 0.9943, "step": 794 }, { "epoch": 0.0474740236474382, "grad_norm": 2.028738021850586, "learning_rate": 8.996134628244537e-06, "loss": 0.9832, "step": 795 }, { "epoch": 0.04753373940045384, "grad_norm": 2.4111809730529785, "learning_rate": 8.997827979898113e-06, "loss": 1.0124, "step": 796 }, { "epoch": 0.04759345515346949, "grad_norm": 3.1803500652313232, "learning_rate": 8.999519205560352e-06, "loss": 1.017, "step": 797 }, { "epoch": 0.04765317090648513, "grad_norm": 2.785914897918701, "learning_rate": 9.001208310562898e-06, "loss": 0.976, "step": 798 }, { "epoch": 0.047712886659500776, "grad_norm": 6.098819732666016, "learning_rate": 9.00289530021736e-06, "loss": 0.9968, "step": 799 }, { "epoch": 0.04777260241251642, "grad_norm": 2.123110294342041, "learning_rate": 9.004580179815417e-06, "loss": 0.9765, "step": 800 }, { "epoch": 0.04777260241251642, "eval_text_loss": 0.9918748140335083, "eval_text_runtime": 15.0767, "eval_text_samples_per_second": 265.311, "eval_text_steps_per_second": 0.531, "step": 800 }, { "epoch": 0.04777260241251642, "eval_image_loss": 0.7426975965499878, "eval_image_runtime": 5.0865, "eval_image_samples_per_second": 786.396, "eval_image_steps_per_second": 1.573, "step": 800 }, { "epoch": 0.04777260241251642, "eval_video_loss": 1.2256122827529907, "eval_video_runtime": 77.3609, "eval_video_samples_per_second": 51.706, "eval_video_steps_per_second": 0.103, "step": 800 }, { "epoch": 0.047832318165532066, "grad_norm": 3.616356611251831, "learning_rate": 9.006262954628923e-06, "loss": 1.0349, "step": 801 }, { "epoch": 0.04789203391854771, "grad_norm": 2.1511545181274414, "learning_rate": 9.007943629909987e-06, "loss": 0.942, "step": 802 }, { "epoch": 0.047951749671563355, "grad_norm": 1.7652862071990967, "learning_rate": 9.009622210891091e-06, "loss": 0.9616, "step": 803 }, { "epoch": 0.04801146542457901, "grad_norm": 2.1160237789154053, "learning_rate": 9.011298702785185e-06, "loss": 0.9924, "step": 804 }, { "epoch": 0.04807118117759465, "grad_norm": 3.158271551132202, "learning_rate": 9.012973110785771e-06, "loss": 0.9972, "step": 805 }, { "epoch": 0.0481308969306103, "grad_norm": 5.289779186248779, "learning_rate": 9.014645440067014e-06, "loss": 1.02, "step": 806 }, { "epoch": 0.04819061268362594, "grad_norm": 2.4078431129455566, "learning_rate": 9.016315695783834e-06, "loss": 0.9945, "step": 807 }, { "epoch": 0.048250328436641586, "grad_norm": 2.655348777770996, "learning_rate": 9.017983883071988e-06, "loss": 1.015, "step": 808 }, { "epoch": 0.04831004418965723, "grad_norm": 2.2565886974334717, "learning_rate": 9.01965000704819e-06, "loss": 0.9481, "step": 809 }, { "epoch": 0.048369759942672876, "grad_norm": 3.21986985206604, "learning_rate": 9.021314072810186e-06, "loss": 0.9568, "step": 810 }, { "epoch": 0.04842947569568852, "grad_norm": 2.2950737476348877, "learning_rate": 9.022976085436855e-06, "loss": 0.9561, "step": 811 }, { "epoch": 0.048489191448704165, "grad_norm": 2.152564287185669, "learning_rate": 9.024636049988292e-06, "loss": 0.9727, "step": 812 }, { "epoch": 0.04854890720171982, "grad_norm": 2.965430974960327, "learning_rate": 9.02629397150592e-06, "loss": 1.0125, "step": 813 }, { "epoch": 0.04860862295473546, "grad_norm": 2.394879102706909, "learning_rate": 9.027949855012566e-06, "loss": 0.971, "step": 814 }, { "epoch": 0.048668338707751106, "grad_norm": 2.601437568664551, "learning_rate": 9.029603705512553e-06, "loss": 0.9703, "step": 815 }, { "epoch": 0.04872805446076675, "grad_norm": 2.1320853233337402, "learning_rate": 9.031255527991797e-06, "loss": 0.9422, "step": 816 }, { "epoch": 0.048787770213782396, "grad_norm": 3.155949354171753, "learning_rate": 9.032905327417899e-06, "loss": 0.9894, "step": 817 }, { "epoch": 0.04884748596679804, "grad_norm": 2.1915736198425293, "learning_rate": 9.03455310874022e-06, "loss": 0.9684, "step": 818 }, { "epoch": 0.048907201719813685, "grad_norm": 2.811359167098999, "learning_rate": 9.036198876889996e-06, "loss": 0.9475, "step": 819 }, { "epoch": 0.04896691747282933, "grad_norm": 2.313154458999634, "learning_rate": 9.037842636780393e-06, "loss": 0.9551, "step": 820 }, { "epoch": 0.049026633225844975, "grad_norm": 3.359177350997925, "learning_rate": 9.039484393306625e-06, "loss": 0.9636, "step": 821 }, { "epoch": 0.049086348978860626, "grad_norm": 2.8656022548675537, "learning_rate": 9.041124151346025e-06, "loss": 0.9982, "step": 822 }, { "epoch": 0.04914606473187627, "grad_norm": 1.9728329181671143, "learning_rate": 9.04276191575814e-06, "loss": 0.9926, "step": 823 }, { "epoch": 0.049205780484891916, "grad_norm": 2.0464489459991455, "learning_rate": 9.044397691384808e-06, "loss": 0.965, "step": 824 }, { "epoch": 0.04926549623790756, "grad_norm": 3.998561382293701, "learning_rate": 9.046031483050253e-06, "loss": 0.9333, "step": 825 }, { "epoch": 0.049325211990923205, "grad_norm": 3.2072629928588867, "learning_rate": 9.047663295561166e-06, "loss": 1.048, "step": 826 }, { "epoch": 0.04938492774393885, "grad_norm": 3.4215681552886963, "learning_rate": 9.049293133706794e-06, "loss": 1.0231, "step": 827 }, { "epoch": 0.049444643496954495, "grad_norm": 2.75166916847229, "learning_rate": 9.050921002259017e-06, "loss": 0.9817, "step": 828 }, { "epoch": 0.04950435924997014, "grad_norm": 2.7486047744750977, "learning_rate": 9.05254690597244e-06, "loss": 1.0041, "step": 829 }, { "epoch": 0.049564075002985784, "grad_norm": 2.1625924110412598, "learning_rate": 9.054170849584463e-06, "loss": 0.9801, "step": 830 }, { "epoch": 0.049623790756001436, "grad_norm": 2.97399640083313, "learning_rate": 9.055792837815388e-06, "loss": 1.0048, "step": 831 }, { "epoch": 0.04968350650901708, "grad_norm": 1.958580732345581, "learning_rate": 9.057412875368475e-06, "loss": 0.9768, "step": 832 }, { "epoch": 0.049743222262032726, "grad_norm": 2.5164365768432617, "learning_rate": 9.059030966930037e-06, "loss": 0.9934, "step": 833 }, { "epoch": 0.04980293801504837, "grad_norm": 2.057504892349243, "learning_rate": 9.060647117169525e-06, "loss": 0.9953, "step": 834 }, { "epoch": 0.049862653768064015, "grad_norm": 3.194275140762329, "learning_rate": 9.062261330739598e-06, "loss": 0.9884, "step": 835 }, { "epoch": 0.04992236952107966, "grad_norm": 2.085387945175171, "learning_rate": 9.06387361227621e-06, "loss": 0.9921, "step": 836 }, { "epoch": 0.049982085274095304, "grad_norm": 2.2729063034057617, "learning_rate": 9.065483966398692e-06, "loss": 0.9683, "step": 837 }, { "epoch": 0.05004180102711095, "grad_norm": 2.6638357639312744, "learning_rate": 9.067092397709822e-06, "loss": 1.0187, "step": 838 }, { "epoch": 0.050101516780126594, "grad_norm": 2.924177408218384, "learning_rate": 9.068698910795918e-06, "loss": 0.961, "step": 839 }, { "epoch": 0.050161232533142246, "grad_norm": 3.4098663330078125, "learning_rate": 9.070303510226903e-06, "loss": 0.9844, "step": 840 }, { "epoch": 0.05022094828615789, "grad_norm": 2.297274112701416, "learning_rate": 9.071906200556389e-06, "loss": 0.9747, "step": 841 }, { "epoch": 0.050280664039173535, "grad_norm": 1.8416104316711426, "learning_rate": 9.073506986321754e-06, "loss": 0.9614, "step": 842 }, { "epoch": 0.05034037979218918, "grad_norm": 3.5174686908721924, "learning_rate": 9.075105872044222e-06, "loss": 0.9577, "step": 843 }, { "epoch": 0.050400095545204825, "grad_norm": 2.832805633544922, "learning_rate": 9.076702862228935e-06, "loss": 0.9786, "step": 844 }, { "epoch": 0.05045981129822047, "grad_norm": 1.9283287525177002, "learning_rate": 9.078297961365027e-06, "loss": 0.979, "step": 845 }, { "epoch": 0.050519527051236114, "grad_norm": 2.214344024658203, "learning_rate": 9.079891173925711e-06, "loss": 0.944, "step": 846 }, { "epoch": 0.05057924280425176, "grad_norm": 2.6381266117095947, "learning_rate": 9.081482504368344e-06, "loss": 0.976, "step": 847 }, { "epoch": 0.05063895855726741, "grad_norm": 2.173893451690674, "learning_rate": 9.083071957134501e-06, "loss": 0.9884, "step": 848 }, { "epoch": 0.050698674310283055, "grad_norm": 2.5070648193359375, "learning_rate": 9.084659536650059e-06, "loss": 0.9802, "step": 849 }, { "epoch": 0.0507583900632987, "grad_norm": 2.07700252532959, "learning_rate": 9.08624524732526e-06, "loss": 0.9324, "step": 850 }, { "epoch": 0.050818105816314345, "grad_norm": 2.351858139038086, "learning_rate": 9.087829093554793e-06, "loss": 0.934, "step": 851 }, { "epoch": 0.05087782156932999, "grad_norm": 2.4251272678375244, "learning_rate": 9.089411079717862e-06, "loss": 0.9374, "step": 852 }, { "epoch": 0.050937537322345634, "grad_norm": 3.9696948528289795, "learning_rate": 9.090991210178261e-06, "loss": 0.9912, "step": 853 }, { "epoch": 0.05099725307536128, "grad_norm": 3.208064317703247, "learning_rate": 9.092569489284447e-06, "loss": 0.9658, "step": 854 }, { "epoch": 0.051056968828376924, "grad_norm": 2.35335111618042, "learning_rate": 9.094145921369604e-06, "loss": 0.9182, "step": 855 }, { "epoch": 0.05111668458139257, "grad_norm": 3.317141532897949, "learning_rate": 9.09572051075173e-06, "loss": 0.9614, "step": 856 }, { "epoch": 0.05117640033440822, "grad_norm": 2.301354169845581, "learning_rate": 9.097293261733693e-06, "loss": 0.9245, "step": 857 }, { "epoch": 0.051236116087423865, "grad_norm": 5.578310489654541, "learning_rate": 9.098864178603309e-06, "loss": 0.9972, "step": 858 }, { "epoch": 0.05129583184043951, "grad_norm": 2.763871669769287, "learning_rate": 9.100433265633405e-06, "loss": 0.9804, "step": 859 }, { "epoch": 0.051355547593455154, "grad_norm": 3.083937644958496, "learning_rate": 9.102000527081905e-06, "loss": 0.9963, "step": 860 }, { "epoch": 0.0514152633464708, "grad_norm": 3.4188716411590576, "learning_rate": 9.103565967191877e-06, "loss": 0.9729, "step": 861 }, { "epoch": 0.051474979099486444, "grad_norm": 2.8453259468078613, "learning_rate": 9.105129590191622e-06, "loss": 0.9926, "step": 862 }, { "epoch": 0.05153469485250209, "grad_norm": 2.6667897701263428, "learning_rate": 9.106691400294729e-06, "loss": 0.9851, "step": 863 }, { "epoch": 0.05159441060551773, "grad_norm": 4.997043132781982, "learning_rate": 9.108251401700149e-06, "loss": 0.9605, "step": 864 }, { "epoch": 0.05165412635853338, "grad_norm": 1.9086600542068481, "learning_rate": 9.109809598592263e-06, "loss": 0.9187, "step": 865 }, { "epoch": 0.05171384211154903, "grad_norm": 3.7973368167877197, "learning_rate": 9.111365995140946e-06, "loss": 0.9651, "step": 866 }, { "epoch": 0.051773557864564675, "grad_norm": 2.395432949066162, "learning_rate": 9.112920595501638e-06, "loss": 1.012, "step": 867 }, { "epoch": 0.05183327361758032, "grad_norm": 4.500901222229004, "learning_rate": 9.114473403815409e-06, "loss": 0.9982, "step": 868 }, { "epoch": 0.051892989370595964, "grad_norm": 1.9366443157196045, "learning_rate": 9.11602442420902e-06, "loss": 0.9937, "step": 869 }, { "epoch": 0.05195270512361161, "grad_norm": 2.669516086578369, "learning_rate": 9.117573660795e-06, "loss": 1.0087, "step": 870 }, { "epoch": 0.052012420876627254, "grad_norm": 4.031538963317871, "learning_rate": 9.119121117671702e-06, "loss": 1.0118, "step": 871 }, { "epoch": 0.0520721366296429, "grad_norm": 2.5224735736846924, "learning_rate": 9.12066679892337e-06, "loss": 1.0041, "step": 872 }, { "epoch": 0.05213185238265854, "grad_norm": 2.074462652206421, "learning_rate": 9.122210708620208e-06, "loss": 0.9738, "step": 873 }, { "epoch": 0.05219156813567419, "grad_norm": 2.1799519062042236, "learning_rate": 9.123752850818435e-06, "loss": 1.0085, "step": 874 }, { "epoch": 0.05225128388868984, "grad_norm": 2.2464473247528076, "learning_rate": 9.125293229560365e-06, "loss": 0.9584, "step": 875 }, { "epoch": 0.052310999641705484, "grad_norm": 2.3859455585479736, "learning_rate": 9.12683184887445e-06, "loss": 0.9766, "step": 876 }, { "epoch": 0.05237071539472113, "grad_norm": 4.395201206207275, "learning_rate": 9.12836871277536e-06, "loss": 1.04, "step": 877 }, { "epoch": 0.052430431147736774, "grad_norm": 2.192148447036743, "learning_rate": 9.129903825264038e-06, "loss": 0.982, "step": 878 }, { "epoch": 0.05249014690075242, "grad_norm": 2.0292906761169434, "learning_rate": 9.131437190327763e-06, "loss": 0.9206, "step": 879 }, { "epoch": 0.05254986265376806, "grad_norm": 2.1952457427978516, "learning_rate": 9.132968811940216e-06, "loss": 0.9741, "step": 880 }, { "epoch": 0.05260957840678371, "grad_norm": 3.7224810123443604, "learning_rate": 9.134498694061538e-06, "loss": 0.9897, "step": 881 }, { "epoch": 0.05266929415979935, "grad_norm": 2.3715765476226807, "learning_rate": 9.136026840638389e-06, "loss": 0.9783, "step": 882 }, { "epoch": 0.052729009912815, "grad_norm": 1.8248710632324219, "learning_rate": 9.137553255604017e-06, "loss": 1.0117, "step": 883 }, { "epoch": 0.05278872566583065, "grad_norm": 2.5560638904571533, "learning_rate": 9.139077942878314e-06, "loss": 0.9696, "step": 884 }, { "epoch": 0.052848441418846294, "grad_norm": 3.2340540885925293, "learning_rate": 9.140600906367875e-06, "loss": 0.9744, "step": 885 }, { "epoch": 0.05290815717186194, "grad_norm": 2.9920694828033447, "learning_rate": 9.142122149966059e-06, "loss": 0.9907, "step": 886 }, { "epoch": 0.05296787292487758, "grad_norm": 2.5539073944091797, "learning_rate": 9.143641677553053e-06, "loss": 0.9758, "step": 887 }, { "epoch": 0.05302758867789323, "grad_norm": 2.1232733726501465, "learning_rate": 9.145159492995925e-06, "loss": 0.95, "step": 888 }, { "epoch": 0.05308730443090887, "grad_norm": 2.09468936920166, "learning_rate": 9.146675600148684e-06, "loss": 0.985, "step": 889 }, { "epoch": 0.05314702018392452, "grad_norm": 3.4349539279937744, "learning_rate": 9.148190002852347e-06, "loss": 0.961, "step": 890 }, { "epoch": 0.05320673593694016, "grad_norm": 1.8199384212493896, "learning_rate": 9.149702704934983e-06, "loss": 0.9306, "step": 891 }, { "epoch": 0.05326645168995581, "grad_norm": 2.9441959857940674, "learning_rate": 9.15121371021179e-06, "loss": 0.9671, "step": 892 }, { "epoch": 0.05332616744297146, "grad_norm": 4.624683856964111, "learning_rate": 9.152723022485129e-06, "loss": 1.0528, "step": 893 }, { "epoch": 0.053385883195987104, "grad_norm": 4.623998641967773, "learning_rate": 9.154230645544602e-06, "loss": 0.9996, "step": 894 }, { "epoch": 0.05344559894900275, "grad_norm": 2.069247007369995, "learning_rate": 9.155736583167105e-06, "loss": 0.9849, "step": 895 }, { "epoch": 0.05350531470201839, "grad_norm": 2.42966365814209, "learning_rate": 9.157240839116866e-06, "loss": 1.0011, "step": 896 }, { "epoch": 0.05356503045503404, "grad_norm": 3.405050039291382, "learning_rate": 9.158743417145534e-06, "loss": 0.9634, "step": 897 }, { "epoch": 0.05362474620804968, "grad_norm": 1.8413493633270264, "learning_rate": 9.160244320992208e-06, "loss": 0.9544, "step": 898 }, { "epoch": 0.05368446196106533, "grad_norm": 2.524807929992676, "learning_rate": 9.161743554383504e-06, "loss": 0.9938, "step": 899 }, { "epoch": 0.05374417771408097, "grad_norm": 3.704145908355713, "learning_rate": 9.163241121033611e-06, "loss": 0.965, "step": 900 }, { "epoch": 0.05380389346709662, "grad_norm": 2.3133115768432617, "learning_rate": 9.16473702464434e-06, "loss": 1.03, "step": 901 }, { "epoch": 0.05386360922011227, "grad_norm": 4.674420356750488, "learning_rate": 9.166231268905192e-06, "loss": 0.9805, "step": 902 }, { "epoch": 0.05392332497312791, "grad_norm": 4.967262268066406, "learning_rate": 9.16772385749339e-06, "loss": 1.0005, "step": 903 }, { "epoch": 0.05398304072614356, "grad_norm": 2.896496295928955, "learning_rate": 9.169214794073959e-06, "loss": 0.9793, "step": 904 }, { "epoch": 0.0540427564791592, "grad_norm": 2.849473714828491, "learning_rate": 9.170704082299765e-06, "loss": 1.0338, "step": 905 }, { "epoch": 0.05410247223217485, "grad_norm": 2.533398389816284, "learning_rate": 9.17219172581157e-06, "loss": 0.9807, "step": 906 }, { "epoch": 0.05416218798519049, "grad_norm": 4.946715354919434, "learning_rate": 9.173677728238084e-06, "loss": 0.9928, "step": 907 }, { "epoch": 0.05422190373820614, "grad_norm": 3.1598801612854004, "learning_rate": 9.17516209319603e-06, "loss": 0.97, "step": 908 }, { "epoch": 0.05428161949122178, "grad_norm": 2.068877935409546, "learning_rate": 9.17664482429018e-06, "loss": 0.9561, "step": 909 }, { "epoch": 0.054341335244237426, "grad_norm": 2.9972646236419678, "learning_rate": 9.17812592511342e-06, "loss": 0.9951, "step": 910 }, { "epoch": 0.05440105099725308, "grad_norm": 2.568974018096924, "learning_rate": 9.179605399246796e-06, "loss": 0.9767, "step": 911 }, { "epoch": 0.05446076675026872, "grad_norm": 7.539276123046875, "learning_rate": 9.181083250259567e-06, "loss": 0.9755, "step": 912 }, { "epoch": 0.05452048250328437, "grad_norm": 2.7465507984161377, "learning_rate": 9.18255948170926e-06, "loss": 1.0123, "step": 913 }, { "epoch": 0.05458019825630001, "grad_norm": 3.5642623901367188, "learning_rate": 9.18403409714172e-06, "loss": 0.9553, "step": 914 }, { "epoch": 0.05463991400931566, "grad_norm": 2.296509265899658, "learning_rate": 9.185507100091154e-06, "loss": 0.9591, "step": 915 }, { "epoch": 0.0546996297623313, "grad_norm": 2.546438694000244, "learning_rate": 9.18697849408019e-06, "loss": 1.0009, "step": 916 }, { "epoch": 0.05475934551534695, "grad_norm": 2.4354963302612305, "learning_rate": 9.18844828261993e-06, "loss": 0.9609, "step": 917 }, { "epoch": 0.05481906126836259, "grad_norm": 2.387286901473999, "learning_rate": 9.18991646920999e-06, "loss": 0.9616, "step": 918 }, { "epoch": 0.05487877702137824, "grad_norm": 2.728529930114746, "learning_rate": 9.191383057338559e-06, "loss": 0.9754, "step": 919 }, { "epoch": 0.05493849277439389, "grad_norm": 3.6697330474853516, "learning_rate": 9.192848050482443e-06, "loss": 0.9601, "step": 920 }, { "epoch": 0.05499820852740953, "grad_norm": 2.3625621795654297, "learning_rate": 9.194311452107116e-06, "loss": 0.9838, "step": 921 }, { "epoch": 0.05505792428042518, "grad_norm": 3.0533807277679443, "learning_rate": 9.195773265666775e-06, "loss": 0.9744, "step": 922 }, { "epoch": 0.05511764003344082, "grad_norm": 2.1969521045684814, "learning_rate": 9.197233494604377e-06, "loss": 0.9646, "step": 923 }, { "epoch": 0.05517735578645647, "grad_norm": 2.848646640777588, "learning_rate": 9.198692142351702e-06, "loss": 0.9001, "step": 924 }, { "epoch": 0.05523707153947211, "grad_norm": 2.5796420574188232, "learning_rate": 9.200149212329385e-06, "loss": 0.9624, "step": 925 }, { "epoch": 0.055296787292487756, "grad_norm": 2.343834638595581, "learning_rate": 9.20160470794698e-06, "loss": 0.967, "step": 926 }, { "epoch": 0.0553565030455034, "grad_norm": 2.8377902507781982, "learning_rate": 9.203058632603001e-06, "loss": 0.9648, "step": 927 }, { "epoch": 0.05541621879851905, "grad_norm": 2.9302010536193848, "learning_rate": 9.204510989684963e-06, "loss": 1.0102, "step": 928 }, { "epoch": 0.0554759345515347, "grad_norm": 2.203904628753662, "learning_rate": 9.205961782569442e-06, "loss": 0.9666, "step": 929 }, { "epoch": 0.05553565030455034, "grad_norm": 3.515131950378418, "learning_rate": 9.207411014622116e-06, "loss": 1.0236, "step": 930 }, { "epoch": 0.05559536605756599, "grad_norm": 3.3738596439361572, "learning_rate": 9.208858689197806e-06, "loss": 0.95, "step": 931 }, { "epoch": 0.05565508181058163, "grad_norm": 2.943185806274414, "learning_rate": 9.210304809640535e-06, "loss": 0.9961, "step": 932 }, { "epoch": 0.055714797563597276, "grad_norm": 4.301390647888184, "learning_rate": 9.211749379283565e-06, "loss": 0.9697, "step": 933 }, { "epoch": 0.05577451331661292, "grad_norm": 5.43228816986084, "learning_rate": 9.213192401449444e-06, "loss": 1.003, "step": 934 }, { "epoch": 0.055834229069628566, "grad_norm": 2.9424331188201904, "learning_rate": 9.214633879450057e-06, "loss": 0.9755, "step": 935 }, { "epoch": 0.05589394482264421, "grad_norm": 2.892453670501709, "learning_rate": 9.216073816586666e-06, "loss": 1.0006, "step": 936 }, { "epoch": 0.05595366057565986, "grad_norm": 2.7144880294799805, "learning_rate": 9.21751221614996e-06, "loss": 0.9881, "step": 937 }, { "epoch": 0.05601337632867551, "grad_norm": 2.4153707027435303, "learning_rate": 9.218949081420095e-06, "loss": 0.942, "step": 938 }, { "epoch": 0.05607309208169115, "grad_norm": 8.549108505249023, "learning_rate": 9.220384415666743e-06, "loss": 0.9946, "step": 939 }, { "epoch": 0.0561328078347068, "grad_norm": 2.3430347442626953, "learning_rate": 9.221818222149135e-06, "loss": 0.9818, "step": 940 }, { "epoch": 0.05619252358772244, "grad_norm": 2.596958875656128, "learning_rate": 9.223250504116107e-06, "loss": 0.991, "step": 941 }, { "epoch": 0.056252239340738086, "grad_norm": 2.517502546310425, "learning_rate": 9.224681264806145e-06, "loss": 0.9776, "step": 942 }, { "epoch": 0.05631195509375373, "grad_norm": 2.168461561203003, "learning_rate": 9.226110507447417e-06, "loss": 0.9623, "step": 943 }, { "epoch": 0.056371670846769376, "grad_norm": 6.143514156341553, "learning_rate": 9.227538235257838e-06, "loss": 0.9793, "step": 944 }, { "epoch": 0.05643138659978502, "grad_norm": 3.9101693630218506, "learning_rate": 9.228964451445097e-06, "loss": 0.9579, "step": 945 }, { "epoch": 0.05649110235280067, "grad_norm": 2.199650764465332, "learning_rate": 9.230389159206702e-06, "loss": 0.9521, "step": 946 }, { "epoch": 0.05655081810581632, "grad_norm": 3.0232934951782227, "learning_rate": 9.231812361730035e-06, "loss": 0.9931, "step": 947 }, { "epoch": 0.05661053385883196, "grad_norm": 1.9615318775177002, "learning_rate": 9.233234062192377e-06, "loss": 0.9678, "step": 948 }, { "epoch": 0.056670249611847606, "grad_norm": 2.2160985469818115, "learning_rate": 9.234654263760966e-06, "loss": 0.9616, "step": 949 }, { "epoch": 0.05672996536486325, "grad_norm": 2.2886760234832764, "learning_rate": 9.236072969593029e-06, "loss": 0.9521, "step": 950 }, { "epoch": 0.056789681117878896, "grad_norm": 4.6697211265563965, "learning_rate": 9.23749018283583e-06, "loss": 1.0052, "step": 951 }, { "epoch": 0.05684939687089454, "grad_norm": 3.350963592529297, "learning_rate": 9.238905906626707e-06, "loss": 0.908, "step": 952 }, { "epoch": 0.056909112623910185, "grad_norm": 3.233809232711792, "learning_rate": 9.240320144093122e-06, "loss": 0.9673, "step": 953 }, { "epoch": 0.05696882837692583, "grad_norm": 2.655092477798462, "learning_rate": 9.241732898352694e-06, "loss": 0.9764, "step": 954 }, { "epoch": 0.05702854412994148, "grad_norm": 2.6069819927215576, "learning_rate": 9.24314417251324e-06, "loss": 0.9877, "step": 955 }, { "epoch": 0.057088259882957126, "grad_norm": 2.0803377628326416, "learning_rate": 9.244553969672828e-06, "loss": 0.9425, "step": 956 }, { "epoch": 0.05714797563597277, "grad_norm": 3.8826026916503906, "learning_rate": 9.245962292919797e-06, "loss": 0.9882, "step": 957 }, { "epoch": 0.057207691388988416, "grad_norm": 3.885923385620117, "learning_rate": 9.247369145332823e-06, "loss": 0.9723, "step": 958 }, { "epoch": 0.05726740714200406, "grad_norm": 2.299009323120117, "learning_rate": 9.248774529980937e-06, "loss": 0.9901, "step": 959 }, { "epoch": 0.057327122895019705, "grad_norm": 2.7019717693328857, "learning_rate": 9.250178449923574e-06, "loss": 0.9571, "step": 960 }, { "epoch": 0.05738683864803535, "grad_norm": 2.444538116455078, "learning_rate": 9.25158090821062e-06, "loss": 0.9526, "step": 961 }, { "epoch": 0.057446554401050995, "grad_norm": 2.318263053894043, "learning_rate": 9.25298190788244e-06, "loss": 0.9236, "step": 962 }, { "epoch": 0.05750627015406664, "grad_norm": 2.79718017578125, "learning_rate": 9.254381451969923e-06, "loss": 0.9306, "step": 963 }, { "epoch": 0.05756598590708229, "grad_norm": 2.4152958393096924, "learning_rate": 9.255779543494523e-06, "loss": 0.9696, "step": 964 }, { "epoch": 0.057625701660097936, "grad_norm": 2.4447038173675537, "learning_rate": 9.257176185468294e-06, "loss": 0.9692, "step": 965 }, { "epoch": 0.05768541741311358, "grad_norm": 2.2582359313964844, "learning_rate": 9.258571380893927e-06, "loss": 0.9494, "step": 966 }, { "epoch": 0.057745133166129226, "grad_norm": 5.206615447998047, "learning_rate": 9.259965132764803e-06, "loss": 0.9563, "step": 967 }, { "epoch": 0.05780484891914487, "grad_norm": 2.540576219558716, "learning_rate": 9.261357444065013e-06, "loss": 0.9337, "step": 968 }, { "epoch": 0.057864564672160515, "grad_norm": 2.253744602203369, "learning_rate": 9.26274831776941e-06, "loss": 0.9746, "step": 969 }, { "epoch": 0.05792428042517616, "grad_norm": 3.2657837867736816, "learning_rate": 9.264137756843631e-06, "loss": 1.0305, "step": 970 }, { "epoch": 0.057983996178191805, "grad_norm": 3.5801992416381836, "learning_rate": 9.26552576424416e-06, "loss": 0.9893, "step": 971 }, { "epoch": 0.05804371193120745, "grad_norm": 4.485790729522705, "learning_rate": 9.266912342918343e-06, "loss": 0.9449, "step": 972 }, { "epoch": 0.0581034276842231, "grad_norm": 2.3532185554504395, "learning_rate": 9.268297495804433e-06, "loss": 0.939, "step": 973 }, { "epoch": 0.058163143437238746, "grad_norm": 2.0581278800964355, "learning_rate": 9.269681225831633e-06, "loss": 0.9557, "step": 974 }, { "epoch": 0.05822285919025439, "grad_norm": 2.517045736312866, "learning_rate": 9.271063535920128e-06, "loss": 0.9645, "step": 975 }, { "epoch": 0.058282574943270035, "grad_norm": 2.936697006225586, "learning_rate": 9.272444428981115e-06, "loss": 0.9553, "step": 976 }, { "epoch": 0.05834229069628568, "grad_norm": 3.7221314907073975, "learning_rate": 9.27382390791686e-06, "loss": 0.9816, "step": 977 }, { "epoch": 0.058402006449301325, "grad_norm": 2.6447224617004395, "learning_rate": 9.275201975620709e-06, "loss": 0.9774, "step": 978 }, { "epoch": 0.05846172220231697, "grad_norm": 2.3030996322631836, "learning_rate": 9.276578634977144e-06, "loss": 0.9773, "step": 979 }, { "epoch": 0.058521437955332614, "grad_norm": 3.007167100906372, "learning_rate": 9.277953888861812e-06, "loss": 0.983, "step": 980 }, { "epoch": 0.05858115370834826, "grad_norm": 2.5043656826019287, "learning_rate": 9.279327740141563e-06, "loss": 0.9915, "step": 981 }, { "epoch": 0.05864086946136391, "grad_norm": 2.861379861831665, "learning_rate": 9.280700191674478e-06, "loss": 0.9742, "step": 982 }, { "epoch": 0.058700585214379555, "grad_norm": 2.3815808296203613, "learning_rate": 9.282071246309918e-06, "loss": 0.9932, "step": 983 }, { "epoch": 0.0587603009673952, "grad_norm": 2.627455472946167, "learning_rate": 9.283440906888548e-06, "loss": 1.0154, "step": 984 }, { "epoch": 0.058820016720410845, "grad_norm": 4.27944803237915, "learning_rate": 9.284809176242382e-06, "loss": 0.9594, "step": 985 }, { "epoch": 0.05887973247342649, "grad_norm": 2.441603183746338, "learning_rate": 9.286176057194804e-06, "loss": 0.9598, "step": 986 }, { "epoch": 0.058939448226442134, "grad_norm": 2.736619234085083, "learning_rate": 9.287541552560621e-06, "loss": 0.9407, "step": 987 }, { "epoch": 0.05899916397945778, "grad_norm": 3.21451473236084, "learning_rate": 9.288905665146085e-06, "loss": 0.9933, "step": 988 }, { "epoch": 0.059058879732473424, "grad_norm": 3.4498307704925537, "learning_rate": 9.290268397748929e-06, "loss": 0.9737, "step": 989 }, { "epoch": 0.05911859548548907, "grad_norm": 3.4469363689422607, "learning_rate": 9.291629753158409e-06, "loss": 0.9482, "step": 990 }, { "epoch": 0.05917831123850472, "grad_norm": 2.8487467765808105, "learning_rate": 9.292989734155328e-06, "loss": 0.9618, "step": 991 }, { "epoch": 0.059238026991520365, "grad_norm": 3.1442790031433105, "learning_rate": 9.294348343512079e-06, "loss": 1.023, "step": 992 }, { "epoch": 0.05929774274453601, "grad_norm": 2.486665964126587, "learning_rate": 9.295705583992673e-06, "loss": 0.9729, "step": 993 }, { "epoch": 0.059357458497551654, "grad_norm": 2.6895034313201904, "learning_rate": 9.297061458352771e-06, "loss": 0.9984, "step": 994 }, { "epoch": 0.0594171742505673, "grad_norm": 3.5460500717163086, "learning_rate": 9.29841596933973e-06, "loss": 0.9642, "step": 995 }, { "epoch": 0.059476890003582944, "grad_norm": 3.013460397720337, "learning_rate": 9.29976911969262e-06, "loss": 0.988, "step": 996 }, { "epoch": 0.05953660575659859, "grad_norm": 2.554011821746826, "learning_rate": 9.301120912142269e-06, "loss": 0.9755, "step": 997 }, { "epoch": 0.05959632150961423, "grad_norm": 2.583444833755493, "learning_rate": 9.302471349411289e-06, "loss": 0.9702, "step": 998 }, { "epoch": 0.059656037262629885, "grad_norm": 3.254690647125244, "learning_rate": 9.303820434214117e-06, "loss": 0.962, "step": 999 }, { "epoch": 0.05971575301564553, "grad_norm": 2.560316324234009, "learning_rate": 9.305168169257035e-06, "loss": 0.967, "step": 1000 }, { "epoch": 0.05971575301564553, "eval_text_loss": 0.9835836291313171, "eval_text_runtime": 15.1407, "eval_text_samples_per_second": 264.189, "eval_text_steps_per_second": 0.528, "step": 1000 }, { "epoch": 0.05971575301564553, "eval_image_loss": 0.7296063303947449, "eval_image_runtime": 5.195, "eval_image_samples_per_second": 769.964, "eval_image_steps_per_second": 1.54, "step": 1000 }, { "epoch": 0.05971575301564553, "eval_video_loss": 1.218184232711792, "eval_video_runtime": 77.7872, "eval_video_samples_per_second": 51.422, "eval_video_steps_per_second": 0.103, "step": 1000 }, { "epoch": 0.059775468768661175, "grad_norm": 2.1005232334136963, "learning_rate": 9.306514557238217e-06, "loss": 1.0175, "step": 1001 }, { "epoch": 0.05983518452167682, "grad_norm": 2.503899097442627, "learning_rate": 9.307859600847753e-06, "loss": 0.9514, "step": 1002 }, { "epoch": 0.059894900274692464, "grad_norm": 2.018846035003662, "learning_rate": 9.30920330276768e-06, "loss": 0.9516, "step": 1003 }, { "epoch": 0.05995461602770811, "grad_norm": 2.3107056617736816, "learning_rate": 9.310545665672017e-06, "loss": 0.9357, "step": 1004 }, { "epoch": 0.060014331780723754, "grad_norm": 4.258817672729492, "learning_rate": 9.311886692226802e-06, "loss": 0.9727, "step": 1005 }, { "epoch": 0.0600740475337394, "grad_norm": 4.144606113433838, "learning_rate": 9.313226385090114e-06, "loss": 0.9863, "step": 1006 }, { "epoch": 0.06013376328675504, "grad_norm": 4.0232930183410645, "learning_rate": 9.314564746912112e-06, "loss": 0.9859, "step": 1007 }, { "epoch": 0.060193479039770695, "grad_norm": 2.5723342895507812, "learning_rate": 9.31590178033506e-06, "loss": 0.9673, "step": 1008 }, { "epoch": 0.06025319479278634, "grad_norm": 5.582655906677246, "learning_rate": 9.317237487993364e-06, "loss": 0.9654, "step": 1009 }, { "epoch": 0.060312910545801984, "grad_norm": 2.7916078567504883, "learning_rate": 9.318571872513604e-06, "loss": 0.9996, "step": 1010 }, { "epoch": 0.06037262629881763, "grad_norm": 2.6950786113739014, "learning_rate": 9.31990493651456e-06, "loss": 1.0098, "step": 1011 }, { "epoch": 0.060432342051833274, "grad_norm": 2.5218801498413086, "learning_rate": 9.32123668260724e-06, "loss": 0.947, "step": 1012 }, { "epoch": 0.06049205780484892, "grad_norm": 2.1128244400024414, "learning_rate": 9.322567113394924e-06, "loss": 0.9675, "step": 1013 }, { "epoch": 0.06055177355786456, "grad_norm": 1.9818438291549683, "learning_rate": 9.323896231473184e-06, "loss": 0.9566, "step": 1014 }, { "epoch": 0.06061148931088021, "grad_norm": 1.8199084997177124, "learning_rate": 9.32522403942991e-06, "loss": 1.0056, "step": 1015 }, { "epoch": 0.06067120506389585, "grad_norm": 3.8921666145324707, "learning_rate": 9.326550539845354e-06, "loss": 0.9786, "step": 1016 }, { "epoch": 0.060730920816911504, "grad_norm": 1.7926279306411743, "learning_rate": 9.327875735292153e-06, "loss": 0.9735, "step": 1017 }, { "epoch": 0.06079063656992715, "grad_norm": 2.8028054237365723, "learning_rate": 9.329199628335353e-06, "loss": 0.9696, "step": 1018 }, { "epoch": 0.060850352322942794, "grad_norm": 2.897024393081665, "learning_rate": 9.330522221532452e-06, "loss": 0.9899, "step": 1019 }, { "epoch": 0.06091006807595844, "grad_norm": 1.8804093599319458, "learning_rate": 9.331843517433414e-06, "loss": 0.9805, "step": 1020 }, { "epoch": 0.06096978382897408, "grad_norm": 2.1401400566101074, "learning_rate": 9.333163518580715e-06, "loss": 0.977, "step": 1021 }, { "epoch": 0.06102949958198973, "grad_norm": 2.2136714458465576, "learning_rate": 9.33448222750936e-06, "loss": 1.0049, "step": 1022 }, { "epoch": 0.06108921533500537, "grad_norm": 2.0679850578308105, "learning_rate": 9.335799646746913e-06, "loss": 0.9575, "step": 1023 }, { "epoch": 0.06114893108802102, "grad_norm": 3.215132474899292, "learning_rate": 9.337115778813537e-06, "loss": 1.0055, "step": 1024 }, { "epoch": 0.06120864684103666, "grad_norm": 2.1186797618865967, "learning_rate": 9.338430626222011e-06, "loss": 0.9673, "step": 1025 }, { "epoch": 0.061268362594052314, "grad_norm": 1.7923433780670166, "learning_rate": 9.339744191477761e-06, "loss": 0.931, "step": 1026 }, { "epoch": 0.06132807834706796, "grad_norm": 2.378812551498413, "learning_rate": 9.341056477078894e-06, "loss": 0.9384, "step": 1027 }, { "epoch": 0.061387794100083604, "grad_norm": 3.0522408485412598, "learning_rate": 9.342367485516224e-06, "loss": 0.9845, "step": 1028 }, { "epoch": 0.06144750985309925, "grad_norm": 2.58148193359375, "learning_rate": 9.343677219273298e-06, "loss": 0.955, "step": 1029 }, { "epoch": 0.06150722560611489, "grad_norm": 2.758772611618042, "learning_rate": 9.344985680826425e-06, "loss": 0.9844, "step": 1030 }, { "epoch": 0.06156694135913054, "grad_norm": 2.4246418476104736, "learning_rate": 9.346292872644707e-06, "loss": 0.9611, "step": 1031 }, { "epoch": 0.06162665711214618, "grad_norm": 2.1325526237487793, "learning_rate": 9.347598797190062e-06, "loss": 0.9676, "step": 1032 }, { "epoch": 0.06168637286516183, "grad_norm": 3.032697916030884, "learning_rate": 9.348903456917257e-06, "loss": 0.9764, "step": 1033 }, { "epoch": 0.06174608861817747, "grad_norm": 2.1676504611968994, "learning_rate": 9.350206854273934e-06, "loss": 0.9737, "step": 1034 }, { "epoch": 0.061805804371193124, "grad_norm": 3.2664146423339844, "learning_rate": 9.351508991700635e-06, "loss": 0.9889, "step": 1035 }, { "epoch": 0.06186552012420877, "grad_norm": 3.168438673019409, "learning_rate": 9.352809871630834e-06, "loss": 0.9724, "step": 1036 }, { "epoch": 0.06192523587722441, "grad_norm": 4.161407470703125, "learning_rate": 9.354109496490958e-06, "loss": 0.9901, "step": 1037 }, { "epoch": 0.06198495163024006, "grad_norm": 3.3749570846557617, "learning_rate": 9.355407868700419e-06, "loss": 0.945, "step": 1038 }, { "epoch": 0.0620446673832557, "grad_norm": 4.303521633148193, "learning_rate": 9.356704990671643e-06, "loss": 0.9633, "step": 1039 }, { "epoch": 0.06210438313627135, "grad_norm": 2.084390163421631, "learning_rate": 9.358000864810091e-06, "loss": 0.9808, "step": 1040 }, { "epoch": 0.06216409888928699, "grad_norm": 2.1855363845825195, "learning_rate": 9.359295493514287e-06, "loss": 0.9618, "step": 1041 }, { "epoch": 0.06222381464230264, "grad_norm": 3.697097063064575, "learning_rate": 9.36058887917585e-06, "loss": 0.986, "step": 1042 }, { "epoch": 0.06228353039531828, "grad_norm": 2.511913537979126, "learning_rate": 9.361881024179513e-06, "loss": 0.9782, "step": 1043 }, { "epoch": 0.06234324614833393, "grad_norm": 2.337378978729248, "learning_rate": 9.363171930903156e-06, "loss": 0.9723, "step": 1044 }, { "epoch": 0.06240296190134958, "grad_norm": 2.230921983718872, "learning_rate": 9.364461601717826e-06, "loss": 0.9357, "step": 1045 }, { "epoch": 0.06246267765436522, "grad_norm": 3.423898935317993, "learning_rate": 9.365750038987772e-06, "loss": 0.9874, "step": 1046 }, { "epoch": 0.06252239340738086, "grad_norm": 4.744316577911377, "learning_rate": 9.367037245070458e-06, "loss": 0.9758, "step": 1047 }, { "epoch": 0.06258210916039651, "grad_norm": 2.691706895828247, "learning_rate": 9.368323222316602e-06, "loss": 0.9453, "step": 1048 }, { "epoch": 0.06264182491341216, "grad_norm": 2.79390811920166, "learning_rate": 9.369607973070191e-06, "loss": 0.9531, "step": 1049 }, { "epoch": 0.0627015406664278, "grad_norm": 2.840904474258423, "learning_rate": 9.370891499668521e-06, "loss": 0.9941, "step": 1050 }, { "epoch": 0.06276125641944345, "grad_norm": 3.9460623264312744, "learning_rate": 9.372173804442202e-06, "loss": 0.9554, "step": 1051 }, { "epoch": 0.06282097217245909, "grad_norm": 3.0594096183776855, "learning_rate": 9.373454889715202e-06, "loss": 0.9825, "step": 1052 }, { "epoch": 0.06288068792547474, "grad_norm": 2.8587844371795654, "learning_rate": 9.374734757804858e-06, "loss": 0.9721, "step": 1053 }, { "epoch": 0.06294040367849038, "grad_norm": 2.7650721073150635, "learning_rate": 9.37601341102192e-06, "loss": 0.9808, "step": 1054 }, { "epoch": 0.06300011943150603, "grad_norm": 2.1911871433258057, "learning_rate": 9.377290851670551e-06, "loss": 0.9546, "step": 1055 }, { "epoch": 0.06305983518452167, "grad_norm": 2.4674158096313477, "learning_rate": 9.378567082048372e-06, "loss": 0.963, "step": 1056 }, { "epoch": 0.06311955093753732, "grad_norm": 3.4365992546081543, "learning_rate": 9.37984210444648e-06, "loss": 0.951, "step": 1057 }, { "epoch": 0.06317926669055297, "grad_norm": 1.9474444389343262, "learning_rate": 9.381115921149467e-06, "loss": 0.9738, "step": 1058 }, { "epoch": 0.06323898244356861, "grad_norm": 2.2331740856170654, "learning_rate": 9.382388534435457e-06, "loss": 0.9807, "step": 1059 }, { "epoch": 0.06329869819658426, "grad_norm": 6.952436447143555, "learning_rate": 9.383659946576118e-06, "loss": 0.9626, "step": 1060 }, { "epoch": 0.0633584139495999, "grad_norm": 2.0244126319885254, "learning_rate": 9.384930159836695e-06, "loss": 0.9456, "step": 1061 }, { "epoch": 0.06341812970261555, "grad_norm": 6.349938869476318, "learning_rate": 9.38619917647603e-06, "loss": 0.9667, "step": 1062 }, { "epoch": 0.06347784545563119, "grad_norm": 3.8817758560180664, "learning_rate": 9.387466998746589e-06, "loss": 0.9424, "step": 1063 }, { "epoch": 0.06353756120864684, "grad_norm": 4.646816253662109, "learning_rate": 9.388733628894478e-06, "loss": 0.9569, "step": 1064 }, { "epoch": 0.06359727696166248, "grad_norm": 2.3907835483551025, "learning_rate": 9.389999069159478e-06, "loss": 0.9911, "step": 1065 }, { "epoch": 0.06365699271467813, "grad_norm": 1.8192123174667358, "learning_rate": 9.391263321775065e-06, "loss": 0.9524, "step": 1066 }, { "epoch": 0.06371670846769378, "grad_norm": 2.6690618991851807, "learning_rate": 9.392526388968429e-06, "loss": 1.0051, "step": 1067 }, { "epoch": 0.06377642422070942, "grad_norm": 1.7947925329208374, "learning_rate": 9.393788272960503e-06, "loss": 0.9283, "step": 1068 }, { "epoch": 0.06383613997372507, "grad_norm": 1.8954335451126099, "learning_rate": 9.39504897596598e-06, "loss": 0.9718, "step": 1069 }, { "epoch": 0.06389585572674071, "grad_norm": 2.1652534008026123, "learning_rate": 9.396308500193349e-06, "loss": 0.9511, "step": 1070 }, { "epoch": 0.06395557147975636, "grad_norm": 2.3966338634490967, "learning_rate": 9.3975668478449e-06, "loss": 0.9875, "step": 1071 }, { "epoch": 0.064015287232772, "grad_norm": 2.2697372436523438, "learning_rate": 9.398824021116766e-06, "loss": 0.9582, "step": 1072 }, { "epoch": 0.06407500298578765, "grad_norm": 2.6585984230041504, "learning_rate": 9.40008002219893e-06, "loss": 0.9985, "step": 1073 }, { "epoch": 0.06413471873880329, "grad_norm": 2.8431143760681152, "learning_rate": 9.40133485327526e-06, "loss": 0.9611, "step": 1074 }, { "epoch": 0.06419443449181894, "grad_norm": 4.019023418426514, "learning_rate": 9.402588516523521e-06, "loss": 1.0068, "step": 1075 }, { "epoch": 0.06425415024483459, "grad_norm": 2.127415895462036, "learning_rate": 9.403841014115413e-06, "loss": 0.9889, "step": 1076 }, { "epoch": 0.06431386599785023, "grad_norm": 2.136289358139038, "learning_rate": 9.405092348216572e-06, "loss": 0.9678, "step": 1077 }, { "epoch": 0.06437358175086588, "grad_norm": 3.600966215133667, "learning_rate": 9.406342520986611e-06, "loss": 1.0022, "step": 1078 }, { "epoch": 0.06443329750388152, "grad_norm": 3.710991621017456, "learning_rate": 9.407591534579135e-06, "loss": 0.997, "step": 1079 }, { "epoch": 0.06449301325689717, "grad_norm": 3.721651554107666, "learning_rate": 9.408839391141767e-06, "loss": 0.976, "step": 1080 }, { "epoch": 0.06455272900991281, "grad_norm": 4.03542423248291, "learning_rate": 9.41008609281616e-06, "loss": 0.9436, "step": 1081 }, { "epoch": 0.06461244476292846, "grad_norm": 3.5513017177581787, "learning_rate": 9.41133164173803e-06, "loss": 0.9492, "step": 1082 }, { "epoch": 0.0646721605159441, "grad_norm": 2.309396982192993, "learning_rate": 9.412576040037179e-06, "loss": 0.9605, "step": 1083 }, { "epoch": 0.06473187626895975, "grad_norm": 2.8992881774902344, "learning_rate": 9.413819289837502e-06, "loss": 0.9962, "step": 1084 }, { "epoch": 0.0647915920219754, "grad_norm": 4.777055263519287, "learning_rate": 9.415061393257025e-06, "loss": 0.973, "step": 1085 }, { "epoch": 0.06485130777499104, "grad_norm": 2.892045497894287, "learning_rate": 9.416302352407921e-06, "loss": 0.9186, "step": 1086 }, { "epoch": 0.06491102352800669, "grad_norm": 2.82893705368042, "learning_rate": 9.41754216939653e-06, "loss": 0.9728, "step": 1087 }, { "epoch": 0.06497073928102233, "grad_norm": 2.756657838821411, "learning_rate": 9.418780846323378e-06, "loss": 0.9553, "step": 1088 }, { "epoch": 0.06503045503403798, "grad_norm": 2.6727397441864014, "learning_rate": 9.420018385283208e-06, "loss": 0.9863, "step": 1089 }, { "epoch": 0.06509017078705362, "grad_norm": 3.1403372287750244, "learning_rate": 9.421254788364987e-06, "loss": 0.9498, "step": 1090 }, { "epoch": 0.06514988654006927, "grad_norm": 4.6719560623168945, "learning_rate": 9.422490057651943e-06, "loss": 0.9857, "step": 1091 }, { "epoch": 0.06520960229308491, "grad_norm": 2.6617469787597656, "learning_rate": 9.423724195221577e-06, "loss": 1.0053, "step": 1092 }, { "epoch": 0.06526931804610056, "grad_norm": 4.123587131500244, "learning_rate": 9.424957203145678e-06, "loss": 0.9639, "step": 1093 }, { "epoch": 0.06532903379911621, "grad_norm": 2.8629937171936035, "learning_rate": 9.426189083490358e-06, "loss": 0.949, "step": 1094 }, { "epoch": 0.06538874955213185, "grad_norm": 2.487098455429077, "learning_rate": 9.427419838316068e-06, "loss": 0.9875, "step": 1095 }, { "epoch": 0.0654484653051475, "grad_norm": 1.9413772821426392, "learning_rate": 9.428649469677608e-06, "loss": 0.9466, "step": 1096 }, { "epoch": 0.06550818105816314, "grad_norm": 2.819869041442871, "learning_rate": 9.42987797962416e-06, "loss": 1.0146, "step": 1097 }, { "epoch": 0.06556789681117879, "grad_norm": 2.025890350341797, "learning_rate": 9.43110537019931e-06, "loss": 0.9296, "step": 1098 }, { "epoch": 0.06562761256419443, "grad_norm": 3.716519832611084, "learning_rate": 9.432331643441054e-06, "loss": 0.9318, "step": 1099 }, { "epoch": 0.06568732831721008, "grad_norm": 3.722999095916748, "learning_rate": 9.433556801381834e-06, "loss": 0.9666, "step": 1100 }, { "epoch": 0.06574704407022572, "grad_norm": 3.413104772567749, "learning_rate": 9.434780846048546e-06, "loss": 1.0416, "step": 1101 }, { "epoch": 0.06580675982324137, "grad_norm": 1.9150253534317017, "learning_rate": 9.436003779462573e-06, "loss": 0.9589, "step": 1102 }, { "epoch": 0.06586647557625702, "grad_norm": 1.9611150026321411, "learning_rate": 9.437225603639792e-06, "loss": 0.9447, "step": 1103 }, { "epoch": 0.06592619132927266, "grad_norm": 2.446607828140259, "learning_rate": 9.438446320590598e-06, "loss": 0.9506, "step": 1104 }, { "epoch": 0.06598590708228831, "grad_norm": 3.363103151321411, "learning_rate": 9.439665932319932e-06, "loss": 0.9571, "step": 1105 }, { "epoch": 0.06604562283530395, "grad_norm": 2.3319904804229736, "learning_rate": 9.440884440827289e-06, "loss": 0.9771, "step": 1106 }, { "epoch": 0.0661053385883196, "grad_norm": 2.781712770462036, "learning_rate": 9.442101848106741e-06, "loss": 0.9497, "step": 1107 }, { "epoch": 0.06616505434133524, "grad_norm": 2.5180697441101074, "learning_rate": 9.443318156146969e-06, "loss": 0.9566, "step": 1108 }, { "epoch": 0.06622477009435089, "grad_norm": 2.6790785789489746, "learning_rate": 9.444533366931259e-06, "loss": 0.9771, "step": 1109 }, { "epoch": 0.06628448584736654, "grad_norm": 2.8391947746276855, "learning_rate": 9.445747482437541e-06, "loss": 0.961, "step": 1110 }, { "epoch": 0.06634420160038218, "grad_norm": 1.7666058540344238, "learning_rate": 9.446960504638403e-06, "loss": 0.9203, "step": 1111 }, { "epoch": 0.06640391735339783, "grad_norm": 1.9752806425094604, "learning_rate": 9.448172435501104e-06, "loss": 0.9818, "step": 1112 }, { "epoch": 0.06646363310641347, "grad_norm": 2.83854341506958, "learning_rate": 9.449383276987604e-06, "loss": 0.9683, "step": 1113 }, { "epoch": 0.06652334885942912, "grad_norm": 2.8276381492614746, "learning_rate": 9.45059303105457e-06, "loss": 1.011, "step": 1114 }, { "epoch": 0.06658306461244476, "grad_norm": 3.0155065059661865, "learning_rate": 9.451801699653408e-06, "loss": 0.9647, "step": 1115 }, { "epoch": 0.06664278036546041, "grad_norm": 2.882449150085449, "learning_rate": 9.453009284730271e-06, "loss": 0.9779, "step": 1116 }, { "epoch": 0.06670249611847605, "grad_norm": 2.2014620304107666, "learning_rate": 9.45421578822609e-06, "loss": 0.9361, "step": 1117 }, { "epoch": 0.0667622118714917, "grad_norm": 2.9190099239349365, "learning_rate": 9.455421212076577e-06, "loss": 0.9349, "step": 1118 }, { "epoch": 0.06682192762450735, "grad_norm": 2.6115691661834717, "learning_rate": 9.45662555821226e-06, "loss": 0.9591, "step": 1119 }, { "epoch": 0.06688164337752299, "grad_norm": 2.306164026260376, "learning_rate": 9.457828828558484e-06, "loss": 0.9696, "step": 1120 }, { "epoch": 0.06694135913053864, "grad_norm": 3.153188943862915, "learning_rate": 9.459031025035448e-06, "loss": 0.9393, "step": 1121 }, { "epoch": 0.06700107488355428, "grad_norm": 2.2387585639953613, "learning_rate": 9.460232149558213e-06, "loss": 0.9438, "step": 1122 }, { "epoch": 0.06706079063656993, "grad_norm": 3.039811372756958, "learning_rate": 9.461432204036716e-06, "loss": 0.9915, "step": 1123 }, { "epoch": 0.06712050638958557, "grad_norm": 2.04219126701355, "learning_rate": 9.462631190375803e-06, "loss": 0.9945, "step": 1124 }, { "epoch": 0.06718022214260122, "grad_norm": 4.285290718078613, "learning_rate": 9.46382911047523e-06, "loss": 0.9614, "step": 1125 }, { "epoch": 0.06723993789561686, "grad_norm": 2.0672550201416016, "learning_rate": 9.46502596622969e-06, "loss": 0.98, "step": 1126 }, { "epoch": 0.06729965364863251, "grad_norm": 4.556860446929932, "learning_rate": 9.466221759528838e-06, "loss": 0.9985, "step": 1127 }, { "epoch": 0.06735936940164816, "grad_norm": 2.6482763290405273, "learning_rate": 9.467416492257292e-06, "loss": 0.9724, "step": 1128 }, { "epoch": 0.0674190851546638, "grad_norm": 2.8595056533813477, "learning_rate": 9.468610166294665e-06, "loss": 0.9669, "step": 1129 }, { "epoch": 0.06747880090767945, "grad_norm": 3.2715330123901367, "learning_rate": 9.469802783515577e-06, "loss": 0.9833, "step": 1130 }, { "epoch": 0.06753851666069509, "grad_norm": 2.7202155590057373, "learning_rate": 9.470994345789674e-06, "loss": 0.9972, "step": 1131 }, { "epoch": 0.06759823241371074, "grad_norm": 3.087395191192627, "learning_rate": 9.472184854981638e-06, "loss": 0.9486, "step": 1132 }, { "epoch": 0.06765794816672638, "grad_norm": 2.1499922275543213, "learning_rate": 9.473374312951224e-06, "loss": 0.9544, "step": 1133 }, { "epoch": 0.06771766391974203, "grad_norm": 2.6875622272491455, "learning_rate": 9.474562721553253e-06, "loss": 0.9874, "step": 1134 }, { "epoch": 0.06777737967275767, "grad_norm": 3.9413719177246094, "learning_rate": 9.475750082637649e-06, "loss": 0.9257, "step": 1135 }, { "epoch": 0.06783709542577332, "grad_norm": 2.390629768371582, "learning_rate": 9.476936398049443e-06, "loss": 0.9667, "step": 1136 }, { "epoch": 0.06789681117878897, "grad_norm": 2.374980926513672, "learning_rate": 9.478121669628799e-06, "loss": 0.9654, "step": 1137 }, { "epoch": 0.06795652693180461, "grad_norm": 2.215596914291382, "learning_rate": 9.47930589921103e-06, "loss": 1.0035, "step": 1138 }, { "epoch": 0.06801624268482026, "grad_norm": 3.586768865585327, "learning_rate": 9.480489088626606e-06, "loss": 0.9534, "step": 1139 }, { "epoch": 0.0680759584378359, "grad_norm": 2.954447031021118, "learning_rate": 9.481671239701186e-06, "loss": 0.9836, "step": 1140 }, { "epoch": 0.06813567419085155, "grad_norm": 2.5950074195861816, "learning_rate": 9.482852354255618e-06, "loss": 0.9615, "step": 1141 }, { "epoch": 0.06819538994386719, "grad_norm": 2.3255083560943604, "learning_rate": 9.484032434105973e-06, "loss": 0.9723, "step": 1142 }, { "epoch": 0.06825510569688284, "grad_norm": 8.921185493469238, "learning_rate": 9.485211481063547e-06, "loss": 0.9627, "step": 1143 }, { "epoch": 0.06831482144989848, "grad_norm": 3.414954662322998, "learning_rate": 9.486389496934888e-06, "loss": 0.9792, "step": 1144 }, { "epoch": 0.06837453720291413, "grad_norm": 17.272199630737305, "learning_rate": 9.487566483521808e-06, "loss": 0.9615, "step": 1145 }, { "epoch": 0.06843425295592978, "grad_norm": 2.7765424251556396, "learning_rate": 9.488742442621398e-06, "loss": 0.9824, "step": 1146 }, { "epoch": 0.06849396870894542, "grad_norm": 1.9309196472167969, "learning_rate": 9.489917376026047e-06, "loss": 0.9368, "step": 1147 }, { "epoch": 0.06855368446196107, "grad_norm": 3.329533576965332, "learning_rate": 9.491091285523458e-06, "loss": 0.9822, "step": 1148 }, { "epoch": 0.06861340021497671, "grad_norm": 3.0051920413970947, "learning_rate": 9.49226417289667e-06, "loss": 0.9309, "step": 1149 }, { "epoch": 0.06867311596799236, "grad_norm": 3.2343554496765137, "learning_rate": 9.49343603992406e-06, "loss": 0.9549, "step": 1150 }, { "epoch": 0.068732831721008, "grad_norm": 3.1721136569976807, "learning_rate": 9.494606888379373e-06, "loss": 0.9783, "step": 1151 }, { "epoch": 0.06879254747402365, "grad_norm": 3.4508135318756104, "learning_rate": 9.495776720031732e-06, "loss": 0.9806, "step": 1152 }, { "epoch": 0.06885226322703929, "grad_norm": 2.309682607650757, "learning_rate": 9.49694553664565e-06, "loss": 0.945, "step": 1153 }, { "epoch": 0.06891197898005494, "grad_norm": 3.0415403842926025, "learning_rate": 9.498113339981061e-06, "loss": 0.9391, "step": 1154 }, { "epoch": 0.06897169473307059, "grad_norm": 2.3755128383636475, "learning_rate": 9.499280131793318e-06, "loss": 0.9606, "step": 1155 }, { "epoch": 0.06903141048608623, "grad_norm": 2.5644538402557373, "learning_rate": 9.50044591383322e-06, "loss": 0.9823, "step": 1156 }, { "epoch": 0.06909112623910188, "grad_norm": 2.004380702972412, "learning_rate": 9.50161068784702e-06, "loss": 0.9566, "step": 1157 }, { "epoch": 0.06915084199211752, "grad_norm": 3.648146629333496, "learning_rate": 9.502774455576448e-06, "loss": 0.9557, "step": 1158 }, { "epoch": 0.06921055774513317, "grad_norm": 2.86470890045166, "learning_rate": 9.503937218758727e-06, "loss": 0.9984, "step": 1159 }, { "epoch": 0.06927027349814881, "grad_norm": 2.2162599563598633, "learning_rate": 9.50509897912658e-06, "loss": 0.9468, "step": 1160 }, { "epoch": 0.06932998925116446, "grad_norm": 2.3708624839782715, "learning_rate": 9.506259738408253e-06, "loss": 0.9548, "step": 1161 }, { "epoch": 0.0693897050041801, "grad_norm": 2.1326825618743896, "learning_rate": 9.50741949832753e-06, "loss": 0.9449, "step": 1162 }, { "epoch": 0.06944942075719575, "grad_norm": 2.212064027786255, "learning_rate": 9.50857826060374e-06, "loss": 0.9113, "step": 1163 }, { "epoch": 0.0695091365102114, "grad_norm": 4.713578224182129, "learning_rate": 9.509736026951787e-06, "loss": 0.983, "step": 1164 }, { "epoch": 0.06956885226322704, "grad_norm": 3.1190826892852783, "learning_rate": 9.510892799082152e-06, "loss": 0.9786, "step": 1165 }, { "epoch": 0.06962856801624269, "grad_norm": 2.0034894943237305, "learning_rate": 9.512048578700915e-06, "loss": 0.9649, "step": 1166 }, { "epoch": 0.06968828376925833, "grad_norm": 2.4721806049346924, "learning_rate": 9.51320336750977e-06, "loss": 0.9513, "step": 1167 }, { "epoch": 0.06974799952227398, "grad_norm": 3.1895346641540527, "learning_rate": 9.51435716720603e-06, "loss": 0.9373, "step": 1168 }, { "epoch": 0.06980771527528962, "grad_norm": 2.9333534240722656, "learning_rate": 9.515509979482663e-06, "loss": 0.9357, "step": 1169 }, { "epoch": 0.06986743102830527, "grad_norm": 2.8181467056274414, "learning_rate": 9.516661806028284e-06, "loss": 0.9468, "step": 1170 }, { "epoch": 0.0699271467813209, "grad_norm": 3.645833969116211, "learning_rate": 9.517812648527185e-06, "loss": 0.9633, "step": 1171 }, { "epoch": 0.06998686253433656, "grad_norm": 2.0714943408966064, "learning_rate": 9.518962508659342e-06, "loss": 0.9431, "step": 1172 }, { "epoch": 0.07004657828735221, "grad_norm": 2.2169291973114014, "learning_rate": 9.520111388100441e-06, "loss": 0.9548, "step": 1173 }, { "epoch": 0.07010629404036785, "grad_norm": 2.5583465099334717, "learning_rate": 9.521259288521868e-06, "loss": 0.9747, "step": 1174 }, { "epoch": 0.0701660097933835, "grad_norm": 2.4342422485351562, "learning_rate": 9.522406211590753e-06, "loss": 0.9725, "step": 1175 }, { "epoch": 0.07022572554639914, "grad_norm": 2.735177755355835, "learning_rate": 9.523552158969968e-06, "loss": 0.958, "step": 1176 }, { "epoch": 0.07028544129941479, "grad_norm": 2.0259077548980713, "learning_rate": 9.524697132318146e-06, "loss": 0.9589, "step": 1177 }, { "epoch": 0.07034515705243043, "grad_norm": 3.6213021278381348, "learning_rate": 9.525841133289689e-06, "loss": 0.9799, "step": 1178 }, { "epoch": 0.07040487280544608, "grad_norm": 2.6598713397979736, "learning_rate": 9.526984163534794e-06, "loss": 0.9864, "step": 1179 }, { "epoch": 0.07046458855846172, "grad_norm": 2.2285656929016113, "learning_rate": 9.528126224699456e-06, "loss": 0.8964, "step": 1180 }, { "epoch": 0.07052430431147737, "grad_norm": 3.351475715637207, "learning_rate": 9.529267318425487e-06, "loss": 1.0162, "step": 1181 }, { "epoch": 0.07058402006449302, "grad_norm": 2.783613681793213, "learning_rate": 9.530407446350537e-06, "loss": 0.9394, "step": 1182 }, { "epoch": 0.07064373581750866, "grad_norm": 3.104907989501953, "learning_rate": 9.531546610108094e-06, "loss": 1.0084, "step": 1183 }, { "epoch": 0.07070345157052431, "grad_norm": 4.324728965759277, "learning_rate": 9.532684811327504e-06, "loss": 0.9483, "step": 1184 }, { "epoch": 0.07076316732353995, "grad_norm": 2.971238613128662, "learning_rate": 9.533822051633995e-06, "loss": 1.0203, "step": 1185 }, { "epoch": 0.0708228830765556, "grad_norm": 1.9041610956192017, "learning_rate": 9.534958332648676e-06, "loss": 0.9399, "step": 1186 }, { "epoch": 0.07088259882957124, "grad_norm": 2.7447807788848877, "learning_rate": 9.536093655988558e-06, "loss": 0.976, "step": 1187 }, { "epoch": 0.07094231458258689, "grad_norm": 2.9202492237091064, "learning_rate": 9.537228023266566e-06, "loss": 0.9744, "step": 1188 }, { "epoch": 0.07100203033560253, "grad_norm": 2.224200487136841, "learning_rate": 9.538361436091555e-06, "loss": 0.9824, "step": 1189 }, { "epoch": 0.07106174608861818, "grad_norm": 4.979360580444336, "learning_rate": 9.539493896068323e-06, "loss": 0.9724, "step": 1190 }, { "epoch": 0.07112146184163383, "grad_norm": 2.5934810638427734, "learning_rate": 9.540625404797626e-06, "loss": 0.9811, "step": 1191 }, { "epoch": 0.07118117759464947, "grad_norm": 2.1282567977905273, "learning_rate": 9.541755963876185e-06, "loss": 0.9498, "step": 1192 }, { "epoch": 0.07124089334766512, "grad_norm": 3.0290255546569824, "learning_rate": 9.542885574896703e-06, "loss": 0.9569, "step": 1193 }, { "epoch": 0.07130060910068076, "grad_norm": 5.1697096824646, "learning_rate": 9.544014239447887e-06, "loss": 0.9682, "step": 1194 }, { "epoch": 0.07136032485369641, "grad_norm": 1.7578938007354736, "learning_rate": 9.545141959114444e-06, "loss": 0.9894, "step": 1195 }, { "epoch": 0.07142004060671205, "grad_norm": 2.546992301940918, "learning_rate": 9.546268735477115e-06, "loss": 0.9752, "step": 1196 }, { "epoch": 0.0714797563597277, "grad_norm": 2.7565433979034424, "learning_rate": 9.54739457011267e-06, "loss": 0.957, "step": 1197 }, { "epoch": 0.07153947211274334, "grad_norm": 2.19114351272583, "learning_rate": 9.548519464593931e-06, "loss": 0.9302, "step": 1198 }, { "epoch": 0.07159918786575899, "grad_norm": 3.4704649448394775, "learning_rate": 9.549643420489784e-06, "loss": 1.0111, "step": 1199 }, { "epoch": 0.07165890361877464, "grad_norm": 1.9437570571899414, "learning_rate": 9.55076643936519e-06, "loss": 0.9476, "step": 1200 }, { "epoch": 0.07165890361877464, "eval_text_loss": 0.9819061160087585, "eval_text_runtime": 15.0951, "eval_text_samples_per_second": 264.986, "eval_text_steps_per_second": 0.53, "step": 1200 }, { "epoch": 0.07165890361877464, "eval_image_loss": 0.7252105474472046, "eval_image_runtime": 5.0988, "eval_image_samples_per_second": 784.493, "eval_image_steps_per_second": 1.569, "step": 1200 }, { "epoch": 0.07165890361877464, "eval_video_loss": 1.2129234075546265, "eval_video_runtime": 76.2036, "eval_video_samples_per_second": 52.491, "eval_video_steps_per_second": 0.105, "step": 1200 }, { "epoch": 0.07171861937179028, "grad_norm": 1.7504202127456665, "learning_rate": 9.551888522781201e-06, "loss": 0.993, "step": 1201 }, { "epoch": 0.07177833512480593, "grad_norm": 2.840583562850952, "learning_rate": 9.553009672294968e-06, "loss": 0.9944, "step": 1202 }, { "epoch": 0.07183805087782157, "grad_norm": 3.3196451663970947, "learning_rate": 9.554129889459759e-06, "loss": 0.979, "step": 1203 }, { "epoch": 0.07189776663083722, "grad_norm": 2.4458281993865967, "learning_rate": 9.555249175824972e-06, "loss": 0.9638, "step": 1204 }, { "epoch": 0.07195748238385286, "grad_norm": 2.9881503582000732, "learning_rate": 9.556367532936141e-06, "loss": 0.9586, "step": 1205 }, { "epoch": 0.07201719813686851, "grad_norm": 2.673741102218628, "learning_rate": 9.557484962334959e-06, "loss": 0.9812, "step": 1206 }, { "epoch": 0.07207691388988414, "grad_norm": 3.217869281768799, "learning_rate": 9.558601465559282e-06, "loss": 0.9886, "step": 1207 }, { "epoch": 0.0721366296428998, "grad_norm": 2.339102029800415, "learning_rate": 9.55971704414315e-06, "loss": 0.9407, "step": 1208 }, { "epoch": 0.07219634539591545, "grad_norm": 3.3140673637390137, "learning_rate": 9.560831699616788e-06, "loss": 0.948, "step": 1209 }, { "epoch": 0.07225606114893109, "grad_norm": 2.7913522720336914, "learning_rate": 9.561945433506631e-06, "loss": 0.9813, "step": 1210 }, { "epoch": 0.07231577690194674, "grad_norm": 1.9296302795410156, "learning_rate": 9.563058247335328e-06, "loss": 0.9737, "step": 1211 }, { "epoch": 0.07237549265496238, "grad_norm": 1.7748595476150513, "learning_rate": 9.56417014262176e-06, "loss": 0.9811, "step": 1212 }, { "epoch": 0.07243520840797803, "grad_norm": 2.0848169326782227, "learning_rate": 9.565281120881049e-06, "loss": 0.9893, "step": 1213 }, { "epoch": 0.07249492416099367, "grad_norm": 2.781397581100464, "learning_rate": 9.566391183624567e-06, "loss": 0.9096, "step": 1214 }, { "epoch": 0.07255463991400932, "grad_norm": 4.118064880371094, "learning_rate": 9.56750033235996e-06, "loss": 0.9584, "step": 1215 }, { "epoch": 0.07261435566702495, "grad_norm": 7.483325958251953, "learning_rate": 9.56860856859115e-06, "loss": 0.9369, "step": 1216 }, { "epoch": 0.0726740714200406, "grad_norm": 1.9821693897247314, "learning_rate": 9.569715893818345e-06, "loss": 1.0003, "step": 1217 }, { "epoch": 0.07273378717305626, "grad_norm": 2.665341854095459, "learning_rate": 9.570822309538065e-06, "loss": 0.9619, "step": 1218 }, { "epoch": 0.0727935029260719, "grad_norm": 3.239248037338257, "learning_rate": 9.571927817243142e-06, "loss": 0.9879, "step": 1219 }, { "epoch": 0.07285321867908755, "grad_norm": 1.8697590827941895, "learning_rate": 9.573032418422735e-06, "loss": 0.9971, "step": 1220 }, { "epoch": 0.07291293443210319, "grad_norm": 2.4502415657043457, "learning_rate": 9.574136114562338e-06, "loss": 0.9652, "step": 1221 }, { "epoch": 0.07297265018511884, "grad_norm": 2.756678581237793, "learning_rate": 9.57523890714381e-06, "loss": 0.9656, "step": 1222 }, { "epoch": 0.07303236593813447, "grad_norm": 2.4286272525787354, "learning_rate": 9.576340797645356e-06, "loss": 0.9628, "step": 1223 }, { "epoch": 0.07309208169115013, "grad_norm": 2.767171621322632, "learning_rate": 9.577441787541571e-06, "loss": 0.9831, "step": 1224 }, { "epoch": 0.07315179744416576, "grad_norm": 2.603325605392456, "learning_rate": 9.57854187830343e-06, "loss": 0.9891, "step": 1225 }, { "epoch": 0.07321151319718142, "grad_norm": 3.4222538471221924, "learning_rate": 9.57964107139831e-06, "loss": 0.9579, "step": 1226 }, { "epoch": 0.07327122895019707, "grad_norm": 2.0610246658325195, "learning_rate": 9.580739368289995e-06, "loss": 0.9564, "step": 1227 }, { "epoch": 0.0733309447032127, "grad_norm": 3.0585718154907227, "learning_rate": 9.581836770438698e-06, "loss": 0.9292, "step": 1228 }, { "epoch": 0.07339066045622836, "grad_norm": 3.1834142208099365, "learning_rate": 9.582933279301057e-06, "loss": 0.9729, "step": 1229 }, { "epoch": 0.073450376209244, "grad_norm": 2.21299409866333, "learning_rate": 9.584028896330167e-06, "loss": 0.9589, "step": 1230 }, { "epoch": 0.07351009196225965, "grad_norm": 2.9309301376342773, "learning_rate": 9.585123622975569e-06, "loss": 0.969, "step": 1231 }, { "epoch": 0.07356980771527528, "grad_norm": 3.9605605602264404, "learning_rate": 9.586217460683281e-06, "loss": 0.9441, "step": 1232 }, { "epoch": 0.07362952346829094, "grad_norm": 2.200788736343384, "learning_rate": 9.587310410895799e-06, "loss": 0.9443, "step": 1233 }, { "epoch": 0.07368923922130657, "grad_norm": 2.2636358737945557, "learning_rate": 9.58840247505211e-06, "loss": 0.9615, "step": 1234 }, { "epoch": 0.07374895497432223, "grad_norm": 2.523216485977173, "learning_rate": 9.589493654587703e-06, "loss": 0.962, "step": 1235 }, { "epoch": 0.07380867072733788, "grad_norm": 2.5649654865264893, "learning_rate": 9.59058395093458e-06, "loss": 0.981, "step": 1236 }, { "epoch": 0.07386838648035352, "grad_norm": 2.058803081512451, "learning_rate": 9.591673365521277e-06, "loss": 0.9661, "step": 1237 }, { "epoch": 0.07392810223336917, "grad_norm": 3.058034896850586, "learning_rate": 9.59276189977286e-06, "loss": 0.9374, "step": 1238 }, { "epoch": 0.0739878179863848, "grad_norm": 5.109102249145508, "learning_rate": 9.59384955511094e-06, "loss": 0.9518, "step": 1239 }, { "epoch": 0.07404753373940046, "grad_norm": 2.0134904384613037, "learning_rate": 9.594936332953697e-06, "loss": 0.9608, "step": 1240 }, { "epoch": 0.0741072494924161, "grad_norm": 2.6604297161102295, "learning_rate": 9.59602223471587e-06, "loss": 0.9848, "step": 1241 }, { "epoch": 0.07416696524543175, "grad_norm": 2.692873477935791, "learning_rate": 9.597107261808791e-06, "loss": 0.9704, "step": 1242 }, { "epoch": 0.07422668099844738, "grad_norm": 2.211272716522217, "learning_rate": 9.598191415640376e-06, "loss": 0.9579, "step": 1243 }, { "epoch": 0.07428639675146304, "grad_norm": 1.8533573150634766, "learning_rate": 9.599274697615145e-06, "loss": 0.9226, "step": 1244 }, { "epoch": 0.07434611250447869, "grad_norm": 3.860978841781616, "learning_rate": 9.600357109134237e-06, "loss": 0.9761, "step": 1245 }, { "epoch": 0.07440582825749432, "grad_norm": 3.0274410247802734, "learning_rate": 9.601438651595412e-06, "loss": 0.9664, "step": 1246 }, { "epoch": 0.07446554401050998, "grad_norm": 2.609147548675537, "learning_rate": 9.602519326393067e-06, "loss": 1.013, "step": 1247 }, { "epoch": 0.07452525976352561, "grad_norm": 2.4946916103363037, "learning_rate": 9.603599134918248e-06, "loss": 0.9263, "step": 1248 }, { "epoch": 0.07458497551654127, "grad_norm": 2.430584192276001, "learning_rate": 9.604678078558653e-06, "loss": 0.9206, "step": 1249 }, { "epoch": 0.0746446912695569, "grad_norm": 3.5350654125213623, "learning_rate": 9.605756158698653e-06, "loss": 1.0058, "step": 1250 }, { "epoch": 0.07470440702257256, "grad_norm": 5.004161357879639, "learning_rate": 9.606833376719297e-06, "loss": 0.9478, "step": 1251 }, { "epoch": 0.07476412277558821, "grad_norm": 3.390319585800171, "learning_rate": 9.607909733998323e-06, "loss": 0.9624, "step": 1252 }, { "epoch": 0.07482383852860384, "grad_norm": 2.368764638900757, "learning_rate": 9.60898523191017e-06, "loss": 0.9832, "step": 1253 }, { "epoch": 0.0748835542816195, "grad_norm": 2.1472439765930176, "learning_rate": 9.610059871825984e-06, "loss": 0.9555, "step": 1254 }, { "epoch": 0.07494327003463513, "grad_norm": 1.989707112312317, "learning_rate": 9.611133655113634e-06, "loss": 0.9435, "step": 1255 }, { "epoch": 0.07500298578765079, "grad_norm": 2.3260810375213623, "learning_rate": 9.612206583137724e-06, "loss": 0.9868, "step": 1256 }, { "epoch": 0.07506270154066642, "grad_norm": 1.734052062034607, "learning_rate": 9.613278657259596e-06, "loss": 0.9464, "step": 1257 }, { "epoch": 0.07512241729368208, "grad_norm": 3.661897897720337, "learning_rate": 9.614349878837345e-06, "loss": 0.9605, "step": 1258 }, { "epoch": 0.07518213304669771, "grad_norm": 2.1949026584625244, "learning_rate": 9.615420249225829e-06, "loss": 0.9662, "step": 1259 }, { "epoch": 0.07524184879971337, "grad_norm": 2.844566822052002, "learning_rate": 9.616489769776676e-06, "loss": 0.9841, "step": 1260 }, { "epoch": 0.07530156455272902, "grad_norm": 2.33731746673584, "learning_rate": 9.617558441838305e-06, "loss": 0.968, "step": 1261 }, { "epoch": 0.07536128030574465, "grad_norm": 2.4315743446350098, "learning_rate": 9.618626266755918e-06, "loss": 0.954, "step": 1262 }, { "epoch": 0.0754209960587603, "grad_norm": 3.2193570137023926, "learning_rate": 9.619693245871528e-06, "loss": 0.9798, "step": 1263 }, { "epoch": 0.07548071181177594, "grad_norm": 2.4502527713775635, "learning_rate": 9.620759380523958e-06, "loss": 0.9684, "step": 1264 }, { "epoch": 0.0755404275647916, "grad_norm": 3.0498225688934326, "learning_rate": 9.621824672048858e-06, "loss": 0.9726, "step": 1265 }, { "epoch": 0.07560014331780723, "grad_norm": 2.289609670639038, "learning_rate": 9.622889121778707e-06, "loss": 0.9829, "step": 1266 }, { "epoch": 0.07565985907082289, "grad_norm": 2.2678122520446777, "learning_rate": 9.62395273104283e-06, "loss": 1.0012, "step": 1267 }, { "epoch": 0.07571957482383852, "grad_norm": 2.402386426925659, "learning_rate": 9.625015501167409e-06, "loss": 0.9699, "step": 1268 }, { "epoch": 0.07577929057685417, "grad_norm": 3.6514394283294678, "learning_rate": 9.626077433475485e-06, "loss": 0.99, "step": 1269 }, { "epoch": 0.07583900632986983, "grad_norm": 2.9940366744995117, "learning_rate": 9.627138529286972e-06, "loss": 0.9762, "step": 1270 }, { "epoch": 0.07589872208288546, "grad_norm": 2.0592305660247803, "learning_rate": 9.628198789918671e-06, "loss": 0.9434, "step": 1271 }, { "epoch": 0.07595843783590112, "grad_norm": 2.9039437770843506, "learning_rate": 9.629258216684273e-06, "loss": 0.9632, "step": 1272 }, { "epoch": 0.07601815358891675, "grad_norm": 2.7572684288024902, "learning_rate": 9.630316810894377e-06, "loss": 0.9971, "step": 1273 }, { "epoch": 0.0760778693419324, "grad_norm": 2.73563289642334, "learning_rate": 9.631374573856486e-06, "loss": 0.9554, "step": 1274 }, { "epoch": 0.07613758509494804, "grad_norm": 1.9654511213302612, "learning_rate": 9.632431506875032e-06, "loss": 0.9885, "step": 1275 }, { "epoch": 0.0761973008479637, "grad_norm": 1.9141981601715088, "learning_rate": 9.633487611251378e-06, "loss": 0.9788, "step": 1276 }, { "epoch": 0.07625701660097933, "grad_norm": 6.0679545402526855, "learning_rate": 9.634542888283828e-06, "loss": 0.9644, "step": 1277 }, { "epoch": 0.07631673235399498, "grad_norm": 2.294905185699463, "learning_rate": 9.635597339267636e-06, "loss": 0.9538, "step": 1278 }, { "epoch": 0.07637644810701064, "grad_norm": 2.459679365158081, "learning_rate": 9.636650965495017e-06, "loss": 0.9897, "step": 1279 }, { "epoch": 0.07643616386002627, "grad_norm": 6.330216884613037, "learning_rate": 9.637703768255154e-06, "loss": 0.9815, "step": 1280 }, { "epoch": 0.07649587961304193, "grad_norm": 3.115389823913574, "learning_rate": 9.638755748834219e-06, "loss": 0.944, "step": 1281 }, { "epoch": 0.07655559536605756, "grad_norm": 3.3824586868286133, "learning_rate": 9.639806908515362e-06, "loss": 0.9716, "step": 1282 }, { "epoch": 0.07661531111907322, "grad_norm": 2.0273776054382324, "learning_rate": 9.640857248578737e-06, "loss": 0.956, "step": 1283 }, { "epoch": 0.07667502687208885, "grad_norm": 6.044827938079834, "learning_rate": 9.641906770301505e-06, "loss": 0.9143, "step": 1284 }, { "epoch": 0.0767347426251045, "grad_norm": 3.347360610961914, "learning_rate": 9.642955474957843e-06, "loss": 0.9655, "step": 1285 }, { "epoch": 0.07679445837812014, "grad_norm": 2.7151761054992676, "learning_rate": 9.644003363818955e-06, "loss": 0.9657, "step": 1286 }, { "epoch": 0.0768541741311358, "grad_norm": 2.7561581134796143, "learning_rate": 9.645050438153081e-06, "loss": 0.9839, "step": 1287 }, { "epoch": 0.07691388988415145, "grad_norm": 2.6501240730285645, "learning_rate": 9.64609669922551e-06, "loss": 0.9588, "step": 1288 }, { "epoch": 0.07697360563716708, "grad_norm": 2.6371917724609375, "learning_rate": 9.647142148298574e-06, "loss": 0.9405, "step": 1289 }, { "epoch": 0.07703332139018274, "grad_norm": 2.561523914337158, "learning_rate": 9.648186786631679e-06, "loss": 0.9428, "step": 1290 }, { "epoch": 0.07709303714319837, "grad_norm": 2.3926165103912354, "learning_rate": 9.649230615481298e-06, "loss": 0.9453, "step": 1291 }, { "epoch": 0.07715275289621402, "grad_norm": 4.400655746459961, "learning_rate": 9.65027363610099e-06, "loss": 0.9672, "step": 1292 }, { "epoch": 0.07721246864922966, "grad_norm": 3.7244441509246826, "learning_rate": 9.651315849741396e-06, "loss": 0.9589, "step": 1293 }, { "epoch": 0.07727218440224531, "grad_norm": 2.355419397354126, "learning_rate": 9.652357257650265e-06, "loss": 0.9794, "step": 1294 }, { "epoch": 0.07733190015526095, "grad_norm": 1.9768890142440796, "learning_rate": 9.65339786107245e-06, "loss": 0.992, "step": 1295 }, { "epoch": 0.0773916159082766, "grad_norm": 1.896944522857666, "learning_rate": 9.654437661249923e-06, "loss": 0.9916, "step": 1296 }, { "epoch": 0.07745133166129226, "grad_norm": 2.8787479400634766, "learning_rate": 9.65547665942178e-06, "loss": 0.9599, "step": 1297 }, { "epoch": 0.0775110474143079, "grad_norm": 3.812262535095215, "learning_rate": 9.656514856824253e-06, "loss": 0.9761, "step": 1298 }, { "epoch": 0.07757076316732354, "grad_norm": 3.5554864406585693, "learning_rate": 9.65755225469072e-06, "loss": 0.9583, "step": 1299 }, { "epoch": 0.07763047892033918, "grad_norm": 1.6872810125350952, "learning_rate": 9.658588854251707e-06, "loss": 0.9678, "step": 1300 }, { "epoch": 0.07769019467335483, "grad_norm": 3.591663122177124, "learning_rate": 9.659624656734909e-06, "loss": 0.9511, "step": 1301 }, { "epoch": 0.07774991042637047, "grad_norm": 2.8604371547698975, "learning_rate": 9.660659663365181e-06, "loss": 0.9509, "step": 1302 }, { "epoch": 0.07780962617938612, "grad_norm": 2.001769781112671, "learning_rate": 9.661693875364566e-06, "loss": 0.959, "step": 1303 }, { "epoch": 0.07786934193240176, "grad_norm": 2.9214017391204834, "learning_rate": 9.66272729395229e-06, "loss": 1.0071, "step": 1304 }, { "epoch": 0.07792905768541741, "grad_norm": 2.1384241580963135, "learning_rate": 9.663759920344774e-06, "loss": 0.9525, "step": 1305 }, { "epoch": 0.07798877343843306, "grad_norm": 2.591402769088745, "learning_rate": 9.664791755755647e-06, "loss": 1.0114, "step": 1306 }, { "epoch": 0.0780484891914487, "grad_norm": 2.1884055137634277, "learning_rate": 9.665822801395749e-06, "loss": 0.9577, "step": 1307 }, { "epoch": 0.07810820494446435, "grad_norm": 1.8413934707641602, "learning_rate": 9.666853058473143e-06, "loss": 0.9432, "step": 1308 }, { "epoch": 0.07816792069747999, "grad_norm": 2.3535549640655518, "learning_rate": 9.667882528193124e-06, "loss": 0.9692, "step": 1309 }, { "epoch": 0.07822763645049564, "grad_norm": 2.7139079570770264, "learning_rate": 9.668911211758218e-06, "loss": 0.9457, "step": 1310 }, { "epoch": 0.07828735220351128, "grad_norm": 2.8807451725006104, "learning_rate": 9.669939110368209e-06, "loss": 1.0077, "step": 1311 }, { "epoch": 0.07834706795652693, "grad_norm": 3.232738494873047, "learning_rate": 9.670966225220128e-06, "loss": 0.9651, "step": 1312 }, { "epoch": 0.07840678370954257, "grad_norm": 6.091331481933594, "learning_rate": 9.671992557508278e-06, "loss": 0.9873, "step": 1313 }, { "epoch": 0.07846649946255822, "grad_norm": 2.2407443523406982, "learning_rate": 9.673018108424223e-06, "loss": 0.988, "step": 1314 }, { "epoch": 0.07852621521557387, "grad_norm": 2.2764015197753906, "learning_rate": 9.674042879156818e-06, "loss": 1.0004, "step": 1315 }, { "epoch": 0.07858593096858951, "grad_norm": 2.0341997146606445, "learning_rate": 9.675066870892202e-06, "loss": 0.9726, "step": 1316 }, { "epoch": 0.07864564672160516, "grad_norm": 2.174426555633545, "learning_rate": 9.67609008481381e-06, "loss": 0.9709, "step": 1317 }, { "epoch": 0.0787053624746208, "grad_norm": 3.2900047302246094, "learning_rate": 9.677112522102388e-06, "loss": 0.9219, "step": 1318 }, { "epoch": 0.07876507822763645, "grad_norm": 2.367582082748413, "learning_rate": 9.678134183935989e-06, "loss": 1.009, "step": 1319 }, { "epoch": 0.07882479398065209, "grad_norm": 3.274064302444458, "learning_rate": 9.67915507148999e-06, "loss": 0.9193, "step": 1320 }, { "epoch": 0.07888450973366774, "grad_norm": 3.1118617057800293, "learning_rate": 9.680175185937099e-06, "loss": 0.9499, "step": 1321 }, { "epoch": 0.07894422548668338, "grad_norm": 1.936721920967102, "learning_rate": 9.681194528447359e-06, "loss": 0.9641, "step": 1322 }, { "epoch": 0.07900394123969903, "grad_norm": 1.9704736471176147, "learning_rate": 9.682213100188162e-06, "loss": 0.9516, "step": 1323 }, { "epoch": 0.07906365699271468, "grad_norm": 3.0600194931030273, "learning_rate": 9.683230902324252e-06, "loss": 0.9388, "step": 1324 }, { "epoch": 0.07912337274573032, "grad_norm": 2.792177438735962, "learning_rate": 9.684247936017736e-06, "loss": 0.9873, "step": 1325 }, { "epoch": 0.07918308849874597, "grad_norm": 2.0896148681640625, "learning_rate": 9.685264202428088e-06, "loss": 0.9342, "step": 1326 }, { "epoch": 0.07924280425176161, "grad_norm": 2.6381633281707764, "learning_rate": 9.686279702712162e-06, "loss": 0.9683, "step": 1327 }, { "epoch": 0.07930252000477726, "grad_norm": 1.8988012075424194, "learning_rate": 9.6872944380242e-06, "loss": 0.9702, "step": 1328 }, { "epoch": 0.0793622357577929, "grad_norm": 5.1088786125183105, "learning_rate": 9.688308409515832e-06, "loss": 0.9716, "step": 1329 }, { "epoch": 0.07942195151080855, "grad_norm": 3.1697134971618652, "learning_rate": 9.689321618336094e-06, "loss": 0.9825, "step": 1330 }, { "epoch": 0.07948166726382419, "grad_norm": 1.8853338956832886, "learning_rate": 9.69033406563143e-06, "loss": 0.9827, "step": 1331 }, { "epoch": 0.07954138301683984, "grad_norm": 3.7154979705810547, "learning_rate": 9.691345752545698e-06, "loss": 0.9836, "step": 1332 }, { "epoch": 0.0796010987698555, "grad_norm": 2.180603504180908, "learning_rate": 9.692356680220185e-06, "loss": 1.0039, "step": 1333 }, { "epoch": 0.07966081452287113, "grad_norm": 3.163388967514038, "learning_rate": 9.693366849793604e-06, "loss": 0.9216, "step": 1334 }, { "epoch": 0.07972053027588678, "grad_norm": 2.0247278213500977, "learning_rate": 9.694376262402119e-06, "loss": 0.9648, "step": 1335 }, { "epoch": 0.07978024602890242, "grad_norm": 3.0099995136260986, "learning_rate": 9.695384919179333e-06, "loss": 0.9568, "step": 1336 }, { "epoch": 0.07983996178191807, "grad_norm": 2.780451774597168, "learning_rate": 9.696392821256306e-06, "loss": 0.9188, "step": 1337 }, { "epoch": 0.07989967753493371, "grad_norm": 3.488459587097168, "learning_rate": 9.697399969761562e-06, "loss": 0.9515, "step": 1338 }, { "epoch": 0.07995939328794936, "grad_norm": 2.7956104278564453, "learning_rate": 9.698406365821097e-06, "loss": 0.9907, "step": 1339 }, { "epoch": 0.080019109040965, "grad_norm": 6.071786403656006, "learning_rate": 9.699412010558383e-06, "loss": 1.0022, "step": 1340 }, { "epoch": 0.08007882479398065, "grad_norm": 2.6170830726623535, "learning_rate": 9.700416905094376e-06, "loss": 0.9618, "step": 1341 }, { "epoch": 0.0801385405469963, "grad_norm": 6.465951442718506, "learning_rate": 9.701421050547532e-06, "loss": 0.9373, "step": 1342 }, { "epoch": 0.08019825630001194, "grad_norm": 3.046771764755249, "learning_rate": 9.7024244480338e-06, "loss": 0.9716, "step": 1343 }, { "epoch": 0.0802579720530276, "grad_norm": 2.261993169784546, "learning_rate": 9.70342709866664e-06, "loss": 0.9583, "step": 1344 }, { "epoch": 0.08031768780604323, "grad_norm": 2.833782434463501, "learning_rate": 9.70442900355703e-06, "loss": 0.9759, "step": 1345 }, { "epoch": 0.08037740355905888, "grad_norm": 2.665900945663452, "learning_rate": 9.705430163813467e-06, "loss": 0.9543, "step": 1346 }, { "epoch": 0.08043711931207452, "grad_norm": 2.780339479446411, "learning_rate": 9.706430580541982e-06, "loss": 0.9173, "step": 1347 }, { "epoch": 0.08049683506509017, "grad_norm": 2.053317070007324, "learning_rate": 9.70743025484614e-06, "loss": 0.9808, "step": 1348 }, { "epoch": 0.08055655081810581, "grad_norm": 2.986788511276245, "learning_rate": 9.708429187827052e-06, "loss": 0.9462, "step": 1349 }, { "epoch": 0.08061626657112146, "grad_norm": 1.7435604333877563, "learning_rate": 9.709427380583385e-06, "loss": 0.9505, "step": 1350 }, { "epoch": 0.08067598232413711, "grad_norm": 1.6291263103485107, "learning_rate": 9.710424834211359e-06, "loss": 0.9546, "step": 1351 }, { "epoch": 0.08073569807715275, "grad_norm": 3.449828863143921, "learning_rate": 9.711421549804763e-06, "loss": 0.9905, "step": 1352 }, { "epoch": 0.0807954138301684, "grad_norm": 1.95784330368042, "learning_rate": 9.712417528454964e-06, "loss": 0.988, "step": 1353 }, { "epoch": 0.08085512958318404, "grad_norm": 2.723999500274658, "learning_rate": 9.713412771250905e-06, "loss": 0.9484, "step": 1354 }, { "epoch": 0.08091484533619969, "grad_norm": 2.4634158611297607, "learning_rate": 9.714407279279118e-06, "loss": 0.9519, "step": 1355 }, { "epoch": 0.08097456108921533, "grad_norm": 2.921862840652466, "learning_rate": 9.715401053623733e-06, "loss": 0.9323, "step": 1356 }, { "epoch": 0.08103427684223098, "grad_norm": 2.012878656387329, "learning_rate": 9.71639409536648e-06, "loss": 0.9392, "step": 1357 }, { "epoch": 0.08109399259524662, "grad_norm": 2.604191303253174, "learning_rate": 9.717386405586697e-06, "loss": 0.962, "step": 1358 }, { "epoch": 0.08115370834826227, "grad_norm": 2.782999277114868, "learning_rate": 9.718377985361343e-06, "loss": 0.9731, "step": 1359 }, { "epoch": 0.08121342410127792, "grad_norm": 3.820164918899536, "learning_rate": 9.719368835764995e-06, "loss": 0.9969, "step": 1360 }, { "epoch": 0.08127313985429356, "grad_norm": 2.3762834072113037, "learning_rate": 9.720358957869866e-06, "loss": 0.9984, "step": 1361 }, { "epoch": 0.08133285560730921, "grad_norm": 2.543034076690674, "learning_rate": 9.721348352745803e-06, "loss": 0.9686, "step": 1362 }, { "epoch": 0.08139257136032485, "grad_norm": 3.714355945587158, "learning_rate": 9.7223370214603e-06, "loss": 0.9313, "step": 1363 }, { "epoch": 0.0814522871133405, "grad_norm": 1.8929760456085205, "learning_rate": 9.723324965078494e-06, "loss": 0.9622, "step": 1364 }, { "epoch": 0.08151200286635614, "grad_norm": 2.1126978397369385, "learning_rate": 9.724312184663193e-06, "loss": 0.963, "step": 1365 }, { "epoch": 0.08157171861937179, "grad_norm": 4.721487045288086, "learning_rate": 9.725298681274862e-06, "loss": 0.9887, "step": 1366 }, { "epoch": 0.08163143437238743, "grad_norm": 1.8918575048446655, "learning_rate": 9.726284455971639e-06, "loss": 0.9646, "step": 1367 }, { "epoch": 0.08169115012540308, "grad_norm": 2.3115761280059814, "learning_rate": 9.72726950980934e-06, "loss": 0.9682, "step": 1368 }, { "epoch": 0.08175086587841873, "grad_norm": 2.3109915256500244, "learning_rate": 9.72825384384147e-06, "loss": 0.9992, "step": 1369 }, { "epoch": 0.08181058163143437, "grad_norm": 2.048529624938965, "learning_rate": 9.729237459119225e-06, "loss": 0.9535, "step": 1370 }, { "epoch": 0.08187029738445002, "grad_norm": 2.3737595081329346, "learning_rate": 9.730220356691493e-06, "loss": 0.9864, "step": 1371 }, { "epoch": 0.08193001313746566, "grad_norm": 2.674347162246704, "learning_rate": 9.731202537604878e-06, "loss": 0.9749, "step": 1372 }, { "epoch": 0.08198972889048131, "grad_norm": 2.806394100189209, "learning_rate": 9.732184002903693e-06, "loss": 0.9903, "step": 1373 }, { "epoch": 0.08204944464349695, "grad_norm": 2.41339373588562, "learning_rate": 9.733164753629964e-06, "loss": 0.9672, "step": 1374 }, { "epoch": 0.0821091603965126, "grad_norm": 2.907996892929077, "learning_rate": 9.73414479082345e-06, "loss": 0.9862, "step": 1375 }, { "epoch": 0.08216887614952824, "grad_norm": 2.5154905319213867, "learning_rate": 9.735124115521642e-06, "loss": 0.9221, "step": 1376 }, { "epoch": 0.08222859190254389, "grad_norm": 6.01469612121582, "learning_rate": 9.736102728759764e-06, "loss": 0.9579, "step": 1377 }, { "epoch": 0.08228830765555954, "grad_norm": 5.16275691986084, "learning_rate": 9.737080631570792e-06, "loss": 0.9396, "step": 1378 }, { "epoch": 0.08234802340857518, "grad_norm": 2.996187686920166, "learning_rate": 9.738057824985447e-06, "loss": 0.9903, "step": 1379 }, { "epoch": 0.08240773916159083, "grad_norm": 2.5079941749572754, "learning_rate": 9.739034310032215e-06, "loss": 0.95, "step": 1380 }, { "epoch": 0.08246745491460647, "grad_norm": 2.361856460571289, "learning_rate": 9.740010087737347e-06, "loss": 0.9782, "step": 1381 }, { "epoch": 0.08252717066762212, "grad_norm": 2.2173538208007812, "learning_rate": 9.740985159124859e-06, "loss": 0.9447, "step": 1382 }, { "epoch": 0.08258688642063776, "grad_norm": 2.757033109664917, "learning_rate": 9.741959525216549e-06, "loss": 0.9368, "step": 1383 }, { "epoch": 0.08264660217365341, "grad_norm": 2.8848166465759277, "learning_rate": 9.742933187032e-06, "loss": 0.9846, "step": 1384 }, { "epoch": 0.08270631792666905, "grad_norm": 2.187671184539795, "learning_rate": 9.743906145588585e-06, "loss": 0.9723, "step": 1385 }, { "epoch": 0.0827660336796847, "grad_norm": 3.0697362422943115, "learning_rate": 9.744878401901476e-06, "loss": 0.9545, "step": 1386 }, { "epoch": 0.08282574943270035, "grad_norm": 1.89518404006958, "learning_rate": 9.745849956983641e-06, "loss": 0.9575, "step": 1387 }, { "epoch": 0.08288546518571599, "grad_norm": 1.871745228767395, "learning_rate": 9.746820811845868e-06, "loss": 0.951, "step": 1388 }, { "epoch": 0.08294518093873164, "grad_norm": 3.0624101161956787, "learning_rate": 9.747790967496754e-06, "loss": 1.0087, "step": 1389 }, { "epoch": 0.08300489669174728, "grad_norm": 2.378063917160034, "learning_rate": 9.74876042494272e-06, "loss": 0.9428, "step": 1390 }, { "epoch": 0.08306461244476293, "grad_norm": 2.803385019302368, "learning_rate": 9.749729185188021e-06, "loss": 0.9432, "step": 1391 }, { "epoch": 0.08312432819777857, "grad_norm": 4.492353916168213, "learning_rate": 9.750697249234737e-06, "loss": 0.9392, "step": 1392 }, { "epoch": 0.08318404395079422, "grad_norm": 2.3023273944854736, "learning_rate": 9.751664618082796e-06, "loss": 1.0094, "step": 1393 }, { "epoch": 0.08324375970380986, "grad_norm": 8.269031524658203, "learning_rate": 9.75263129272997e-06, "loss": 1.0186, "step": 1394 }, { "epoch": 0.08330347545682551, "grad_norm": 2.526397705078125, "learning_rate": 9.75359727417189e-06, "loss": 0.9339, "step": 1395 }, { "epoch": 0.08336319120984116, "grad_norm": 2.3106698989868164, "learning_rate": 9.754562563402038e-06, "loss": 0.9616, "step": 1396 }, { "epoch": 0.0834229069628568, "grad_norm": 2.208939790725708, "learning_rate": 9.75552716141177e-06, "loss": 0.9532, "step": 1397 }, { "epoch": 0.08348262271587245, "grad_norm": 2.660536527633667, "learning_rate": 9.756491069190309e-06, "loss": 0.9666, "step": 1398 }, { "epoch": 0.08354233846888809, "grad_norm": 2.834152936935425, "learning_rate": 9.757454287724758e-06, "loss": 0.9188, "step": 1399 }, { "epoch": 0.08360205422190374, "grad_norm": 2.4724888801574707, "learning_rate": 9.7584168180001e-06, "loss": 0.9373, "step": 1400 }, { "epoch": 0.08360205422190374, "eval_text_loss": 0.9777473211288452, "eval_text_runtime": 15.0876, "eval_text_samples_per_second": 265.119, "eval_text_steps_per_second": 0.53, "step": 1400 }, { "epoch": 0.08360205422190374, "eval_image_loss": 0.7244358062744141, "eval_image_runtime": 5.3023, "eval_image_samples_per_second": 754.392, "eval_image_steps_per_second": 1.509, "step": 1400 }, { "epoch": 0.08360205422190374, "eval_video_loss": 1.208478331565857, "eval_video_runtime": 76.8735, "eval_video_samples_per_second": 52.034, "eval_video_steps_per_second": 0.104, "step": 1400 }, { "epoch": 0.08366176997491938, "grad_norm": 4.621466636657715, "learning_rate": 9.759378660999216e-06, "loss": 0.9541, "step": 1401 }, { "epoch": 0.08372148572793503, "grad_norm": 3.603200674057007, "learning_rate": 9.760339817702878e-06, "loss": 0.9831, "step": 1402 }, { "epoch": 0.08378120148095068, "grad_norm": 2.813096284866333, "learning_rate": 9.761300289089757e-06, "loss": 1.0268, "step": 1403 }, { "epoch": 0.08384091723396632, "grad_norm": 1.943663239479065, "learning_rate": 9.76226007613644e-06, "loss": 0.9355, "step": 1404 }, { "epoch": 0.08390063298698197, "grad_norm": 2.8748421669006348, "learning_rate": 9.76321917981742e-06, "loss": 0.9372, "step": 1405 }, { "epoch": 0.08396034873999761, "grad_norm": 3.6190085411071777, "learning_rate": 9.764177601105116e-06, "loss": 1.0216, "step": 1406 }, { "epoch": 0.08402006449301326, "grad_norm": 2.744055986404419, "learning_rate": 9.765135340969869e-06, "loss": 0.9985, "step": 1407 }, { "epoch": 0.0840797802460289, "grad_norm": 2.1634881496429443, "learning_rate": 9.766092400379953e-06, "loss": 0.9568, "step": 1408 }, { "epoch": 0.08413949599904455, "grad_norm": 3.698323965072632, "learning_rate": 9.767048780301582e-06, "loss": 0.969, "step": 1409 }, { "epoch": 0.08419921175206019, "grad_norm": 3.6284749507904053, "learning_rate": 9.768004481698909e-06, "loss": 0.928, "step": 1410 }, { "epoch": 0.08425892750507584, "grad_norm": 3.5065126419067383, "learning_rate": 9.76895950553404e-06, "loss": 0.9378, "step": 1411 }, { "epoch": 0.08431864325809149, "grad_norm": 2.193082809448242, "learning_rate": 9.769913852767037e-06, "loss": 0.9994, "step": 1412 }, { "epoch": 0.08437835901110713, "grad_norm": 2.4416327476501465, "learning_rate": 9.770867524355918e-06, "loss": 0.9736, "step": 1413 }, { "epoch": 0.08443807476412278, "grad_norm": 2.0966718196868896, "learning_rate": 9.77182052125667e-06, "loss": 0.9831, "step": 1414 }, { "epoch": 0.08449779051713842, "grad_norm": 2.6244044303894043, "learning_rate": 9.772772844423256e-06, "loss": 0.9652, "step": 1415 }, { "epoch": 0.08455750627015407, "grad_norm": 2.1613481044769287, "learning_rate": 9.773724494807611e-06, "loss": 0.9669, "step": 1416 }, { "epoch": 0.08461722202316971, "grad_norm": 2.979182243347168, "learning_rate": 9.774675473359661e-06, "loss": 0.9918, "step": 1417 }, { "epoch": 0.08467693777618536, "grad_norm": 4.702205657958984, "learning_rate": 9.775625781027312e-06, "loss": 0.9638, "step": 1418 }, { "epoch": 0.084736653529201, "grad_norm": 4.871085166931152, "learning_rate": 9.776575418756476e-06, "loss": 0.9437, "step": 1419 }, { "epoch": 0.08479636928221665, "grad_norm": 2.3034374713897705, "learning_rate": 9.77752438749106e-06, "loss": 0.9962, "step": 1420 }, { "epoch": 0.0848560850352323, "grad_norm": 2.344256639480591, "learning_rate": 9.778472688172975e-06, "loss": 0.9595, "step": 1421 }, { "epoch": 0.08491580078824794, "grad_norm": 2.78743839263916, "learning_rate": 9.779420321742151e-06, "loss": 0.9554, "step": 1422 }, { "epoch": 0.08497551654126359, "grad_norm": 2.814948797225952, "learning_rate": 9.780367289136531e-06, "loss": 0.9738, "step": 1423 }, { "epoch": 0.08503523229427923, "grad_norm": 2.1726746559143066, "learning_rate": 9.781313591292084e-06, "loss": 0.9825, "step": 1424 }, { "epoch": 0.08509494804729488, "grad_norm": 3.1828246116638184, "learning_rate": 9.782259229142803e-06, "loss": 1.001, "step": 1425 }, { "epoch": 0.08515466380031052, "grad_norm": 4.365872859954834, "learning_rate": 9.783204203620721e-06, "loss": 0.9668, "step": 1426 }, { "epoch": 0.08521437955332617, "grad_norm": 2.1307902336120605, "learning_rate": 9.784148515655908e-06, "loss": 0.9155, "step": 1427 }, { "epoch": 0.08527409530634181, "grad_norm": 2.689927816390991, "learning_rate": 9.78509216617648e-06, "loss": 0.9611, "step": 1428 }, { "epoch": 0.08533381105935746, "grad_norm": 2.643554449081421, "learning_rate": 9.786035156108606e-06, "loss": 0.9692, "step": 1429 }, { "epoch": 0.08539352681237311, "grad_norm": 3.106555700302124, "learning_rate": 9.786977486376505e-06, "loss": 0.9929, "step": 1430 }, { "epoch": 0.08545324256538875, "grad_norm": 1.9750502109527588, "learning_rate": 9.787919157902466e-06, "loss": 0.9464, "step": 1431 }, { "epoch": 0.0855129583184044, "grad_norm": 2.4903080463409424, "learning_rate": 9.78886017160684e-06, "loss": 0.9535, "step": 1432 }, { "epoch": 0.08557267407142004, "grad_norm": 2.1122665405273438, "learning_rate": 9.78980052840805e-06, "loss": 0.9254, "step": 1433 }, { "epoch": 0.08563238982443569, "grad_norm": 3.453652858734131, "learning_rate": 9.790740229222601e-06, "loss": 0.9734, "step": 1434 }, { "epoch": 0.08569210557745133, "grad_norm": 1.9280155897140503, "learning_rate": 9.791679274965076e-06, "loss": 0.9847, "step": 1435 }, { "epoch": 0.08575182133046698, "grad_norm": 4.878944396972656, "learning_rate": 9.792617666548153e-06, "loss": 0.9713, "step": 1436 }, { "epoch": 0.08581153708348262, "grad_norm": 3.2250924110412598, "learning_rate": 9.793555404882597e-06, "loss": 0.9514, "step": 1437 }, { "epoch": 0.08587125283649827, "grad_norm": 2.940849781036377, "learning_rate": 9.794492490877278e-06, "loss": 0.9906, "step": 1438 }, { "epoch": 0.08593096858951392, "grad_norm": 2.577451467514038, "learning_rate": 9.795428925439166e-06, "loss": 0.9695, "step": 1439 }, { "epoch": 0.08599068434252956, "grad_norm": 2.399200201034546, "learning_rate": 9.796364709473348e-06, "loss": 0.9583, "step": 1440 }, { "epoch": 0.08605040009554521, "grad_norm": 2.125394344329834, "learning_rate": 9.797299843883015e-06, "loss": 0.964, "step": 1441 }, { "epoch": 0.08611011584856085, "grad_norm": 2.0220651626586914, "learning_rate": 9.798234329569492e-06, "loss": 0.9716, "step": 1442 }, { "epoch": 0.0861698316015765, "grad_norm": 2.55076265335083, "learning_rate": 9.799168167432213e-06, "loss": 0.9385, "step": 1443 }, { "epoch": 0.08622954735459214, "grad_norm": 3.1206226348876953, "learning_rate": 9.800101358368758e-06, "loss": 0.9725, "step": 1444 }, { "epoch": 0.08628926310760779, "grad_norm": 2.393075466156006, "learning_rate": 9.801033903274837e-06, "loss": 0.9683, "step": 1445 }, { "epoch": 0.08634897886062343, "grad_norm": 2.1837406158447266, "learning_rate": 9.801965803044297e-06, "loss": 0.9809, "step": 1446 }, { "epoch": 0.08640869461363908, "grad_norm": 1.917853593826294, "learning_rate": 9.802897058569136e-06, "loss": 0.9499, "step": 1447 }, { "epoch": 0.08646841036665473, "grad_norm": 1.7798811197280884, "learning_rate": 9.803827670739502e-06, "loss": 0.9325, "step": 1448 }, { "epoch": 0.08652812611967037, "grad_norm": 3.2383217811584473, "learning_rate": 9.8047576404437e-06, "loss": 0.9711, "step": 1449 }, { "epoch": 0.08658784187268602, "grad_norm": 2.7541418075561523, "learning_rate": 9.805686968568197e-06, "loss": 0.9439, "step": 1450 }, { "epoch": 0.08664755762570166, "grad_norm": 2.509387731552124, "learning_rate": 9.806615655997625e-06, "loss": 0.9871, "step": 1451 }, { "epoch": 0.08670727337871731, "grad_norm": 2.068223714828491, "learning_rate": 9.807543703614787e-06, "loss": 0.9521, "step": 1452 }, { "epoch": 0.08676698913173295, "grad_norm": 3.6366991996765137, "learning_rate": 9.808471112300665e-06, "loss": 0.9843, "step": 1453 }, { "epoch": 0.0868267048847486, "grad_norm": 2.524073600769043, "learning_rate": 9.809397882934422e-06, "loss": 0.9367, "step": 1454 }, { "epoch": 0.08688642063776424, "grad_norm": 3.627765655517578, "learning_rate": 9.810324016393405e-06, "loss": 0.9993, "step": 1455 }, { "epoch": 0.08694613639077989, "grad_norm": 2.5602664947509766, "learning_rate": 9.811249513553158e-06, "loss": 0.9436, "step": 1456 }, { "epoch": 0.08700585214379554, "grad_norm": 6.343139171600342, "learning_rate": 9.812174375287413e-06, "loss": 0.9438, "step": 1457 }, { "epoch": 0.08706556789681118, "grad_norm": 1.8804597854614258, "learning_rate": 9.813098602468117e-06, "loss": 0.9727, "step": 1458 }, { "epoch": 0.08712528364982683, "grad_norm": 2.429849147796631, "learning_rate": 9.814022195965409e-06, "loss": 0.9785, "step": 1459 }, { "epoch": 0.08718499940284247, "grad_norm": 4.27019739151001, "learning_rate": 9.814945156647649e-06, "loss": 0.9554, "step": 1460 }, { "epoch": 0.08724471515585812, "grad_norm": 4.860029220581055, "learning_rate": 9.815867485381407e-06, "loss": 0.9982, "step": 1461 }, { "epoch": 0.08730443090887376, "grad_norm": 3.299024820327759, "learning_rate": 9.816789183031483e-06, "loss": 0.9274, "step": 1462 }, { "epoch": 0.08736414666188941, "grad_norm": 1.9643961191177368, "learning_rate": 9.817710250460892e-06, "loss": 0.9269, "step": 1463 }, { "epoch": 0.08742386241490505, "grad_norm": 2.158740997314453, "learning_rate": 9.818630688530889e-06, "loss": 0.9799, "step": 1464 }, { "epoch": 0.0874835781679207, "grad_norm": 1.869265079498291, "learning_rate": 9.81955049810096e-06, "loss": 0.9411, "step": 1465 }, { "epoch": 0.08754329392093635, "grad_norm": 3.7069287300109863, "learning_rate": 9.820469680028833e-06, "loss": 0.9629, "step": 1466 }, { "epoch": 0.08760300967395199, "grad_norm": 2.488513231277466, "learning_rate": 9.821388235170481e-06, "loss": 0.9728, "step": 1467 }, { "epoch": 0.08766272542696764, "grad_norm": 2.9100146293640137, "learning_rate": 9.822306164380127e-06, "loss": 0.9447, "step": 1468 }, { "epoch": 0.08772244117998328, "grad_norm": 2.905174970626831, "learning_rate": 9.82322346851025e-06, "loss": 0.9618, "step": 1469 }, { "epoch": 0.08778215693299893, "grad_norm": 2.588675022125244, "learning_rate": 9.824140148411586e-06, "loss": 0.9618, "step": 1470 }, { "epoch": 0.08784187268601457, "grad_norm": 3.9076876640319824, "learning_rate": 9.82505620493314e-06, "loss": 0.9442, "step": 1471 }, { "epoch": 0.08790158843903022, "grad_norm": 2.0193283557891846, "learning_rate": 9.825971638922178e-06, "loss": 0.9817, "step": 1472 }, { "epoch": 0.08796130419204586, "grad_norm": 2.1344704627990723, "learning_rate": 9.826886451224252e-06, "loss": 0.9468, "step": 1473 }, { "epoch": 0.08802101994506151, "grad_norm": 2.3984732627868652, "learning_rate": 9.82780064268318e-06, "loss": 0.9817, "step": 1474 }, { "epoch": 0.08808073569807716, "grad_norm": 2.5915701389312744, "learning_rate": 9.828714214141072e-06, "loss": 1.0191, "step": 1475 }, { "epoch": 0.0881404514510928, "grad_norm": 2.5258257389068604, "learning_rate": 9.829627166438322e-06, "loss": 0.9385, "step": 1476 }, { "epoch": 0.08820016720410845, "grad_norm": 4.097917556762695, "learning_rate": 9.830539500413618e-06, "loss": 0.985, "step": 1477 }, { "epoch": 0.08825988295712409, "grad_norm": 2.3058481216430664, "learning_rate": 9.831451216903938e-06, "loss": 0.9562, "step": 1478 }, { "epoch": 0.08831959871013974, "grad_norm": 2.3073675632476807, "learning_rate": 9.832362316744578e-06, "loss": 0.9707, "step": 1479 }, { "epoch": 0.08837931446315538, "grad_norm": 3.066882848739624, "learning_rate": 9.833272800769122e-06, "loss": 0.9816, "step": 1480 }, { "epoch": 0.08843903021617103, "grad_norm": 2.7750861644744873, "learning_rate": 9.834182669809476e-06, "loss": 0.9249, "step": 1481 }, { "epoch": 0.08849874596918667, "grad_norm": 2.8156819343566895, "learning_rate": 9.835091924695857e-06, "loss": 0.981, "step": 1482 }, { "epoch": 0.08855846172220232, "grad_norm": 2.24360990524292, "learning_rate": 9.836000566256806e-06, "loss": 0.9474, "step": 1483 }, { "epoch": 0.08861817747521797, "grad_norm": 3.3079299926757812, "learning_rate": 9.836908595319183e-06, "loss": 0.9385, "step": 1484 }, { "epoch": 0.0886778932282336, "grad_norm": 2.4319067001342773, "learning_rate": 9.837816012708182e-06, "loss": 0.952, "step": 1485 }, { "epoch": 0.08873760898124926, "grad_norm": 1.7982598543167114, "learning_rate": 9.838722819247327e-06, "loss": 0.9703, "step": 1486 }, { "epoch": 0.0887973247342649, "grad_norm": 2.5783329010009766, "learning_rate": 9.839629015758481e-06, "loss": 0.941, "step": 1487 }, { "epoch": 0.08885704048728055, "grad_norm": 2.078429937362671, "learning_rate": 9.840534603061852e-06, "loss": 0.9053, "step": 1488 }, { "epoch": 0.08891675624029619, "grad_norm": 3.001239776611328, "learning_rate": 9.841439581975991e-06, "loss": 0.9715, "step": 1489 }, { "epoch": 0.08897647199331184, "grad_norm": 3.05568265914917, "learning_rate": 9.842343953317803e-06, "loss": 0.9753, "step": 1490 }, { "epoch": 0.08903618774632747, "grad_norm": 3.068586587905884, "learning_rate": 9.843247717902543e-06, "loss": 0.9746, "step": 1491 }, { "epoch": 0.08909590349934313, "grad_norm": 1.90028977394104, "learning_rate": 9.844150876543839e-06, "loss": 0.9574, "step": 1492 }, { "epoch": 0.08915561925235878, "grad_norm": 2.7398927211761475, "learning_rate": 9.84505343005367e-06, "loss": 0.946, "step": 1493 }, { "epoch": 0.08921533500537442, "grad_norm": 2.370006799697876, "learning_rate": 9.845955379242394e-06, "loss": 0.9401, "step": 1494 }, { "epoch": 0.08927505075839007, "grad_norm": 2.1624224185943604, "learning_rate": 9.846856724918732e-06, "loss": 0.9504, "step": 1495 }, { "epoch": 0.0893347665114057, "grad_norm": 2.3359110355377197, "learning_rate": 9.847757467889792e-06, "loss": 0.9452, "step": 1496 }, { "epoch": 0.08939448226442136, "grad_norm": 2.318613290786743, "learning_rate": 9.848657608961065e-06, "loss": 0.96, "step": 1497 }, { "epoch": 0.089454198017437, "grad_norm": 2.3833014965057373, "learning_rate": 9.849557148936414e-06, "loss": 0.9562, "step": 1498 }, { "epoch": 0.08951391377045265, "grad_norm": 4.694254398345947, "learning_rate": 9.85045608861811e-06, "loss": 0.9414, "step": 1499 }, { "epoch": 0.08957362952346828, "grad_norm": 2.2495198249816895, "learning_rate": 9.851354428806809e-06, "loss": 0.9369, "step": 1500 }, { "epoch": 0.08963334527648394, "grad_norm": 2.6034834384918213, "learning_rate": 9.852252170301569e-06, "loss": 0.932, "step": 1501 }, { "epoch": 0.08969306102949959, "grad_norm": 1.9428685903549194, "learning_rate": 9.85314931389985e-06, "loss": 0.9351, "step": 1502 }, { "epoch": 0.08975277678251523, "grad_norm": 2.1887452602386475, "learning_rate": 9.854045860397527e-06, "loss": 0.9525, "step": 1503 }, { "epoch": 0.08981249253553088, "grad_norm": 2.16143536567688, "learning_rate": 9.854941810588874e-06, "loss": 0.9751, "step": 1504 }, { "epoch": 0.08987220828854652, "grad_norm": 2.4875011444091797, "learning_rate": 9.855837165266588e-06, "loss": 0.9806, "step": 1505 }, { "epoch": 0.08993192404156217, "grad_norm": 2.342034339904785, "learning_rate": 9.856731925221791e-06, "loss": 0.9296, "step": 1506 }, { "epoch": 0.0899916397945778, "grad_norm": 2.2162108421325684, "learning_rate": 9.857626091244022e-06, "loss": 0.9757, "step": 1507 }, { "epoch": 0.09005135554759346, "grad_norm": 2.2469513416290283, "learning_rate": 9.858519664121254e-06, "loss": 0.9786, "step": 1508 }, { "epoch": 0.0901110713006091, "grad_norm": 4.260598182678223, "learning_rate": 9.859412644639888e-06, "loss": 0.944, "step": 1509 }, { "epoch": 0.09017078705362475, "grad_norm": 2.077228307723999, "learning_rate": 9.860305033584768e-06, "loss": 0.9461, "step": 1510 }, { "epoch": 0.0902305028066404, "grad_norm": 3.6964781284332275, "learning_rate": 9.861196831739173e-06, "loss": 0.9367, "step": 1511 }, { "epoch": 0.09029021855965604, "grad_norm": 2.3934452533721924, "learning_rate": 9.862088039884832e-06, "loss": 0.9701, "step": 1512 }, { "epoch": 0.09034993431267169, "grad_norm": 1.919048547744751, "learning_rate": 9.862978658801923e-06, "loss": 0.9641, "step": 1513 }, { "epoch": 0.09040965006568732, "grad_norm": 2.2776570320129395, "learning_rate": 9.863868689269076e-06, "loss": 0.9132, "step": 1514 }, { "epoch": 0.09046936581870298, "grad_norm": 2.2559680938720703, "learning_rate": 9.864758132063378e-06, "loss": 0.9768, "step": 1515 }, { "epoch": 0.09052908157171861, "grad_norm": 1.8752321004867554, "learning_rate": 9.86564698796038e-06, "loss": 0.982, "step": 1516 }, { "epoch": 0.09058879732473427, "grad_norm": 4.1071085929870605, "learning_rate": 9.866535257734096e-06, "loss": 0.9723, "step": 1517 }, { "epoch": 0.0906485130777499, "grad_norm": 2.327677011489868, "learning_rate": 9.867422942157014e-06, "loss": 0.9233, "step": 1518 }, { "epoch": 0.09070822883076556, "grad_norm": 2.5840530395507812, "learning_rate": 9.86831004200009e-06, "loss": 0.9523, "step": 1519 }, { "epoch": 0.09076794458378121, "grad_norm": 2.409491539001465, "learning_rate": 9.869196558032766e-06, "loss": 1.0072, "step": 1520 }, { "epoch": 0.09082766033679684, "grad_norm": 2.735231876373291, "learning_rate": 9.870082491022956e-06, "loss": 0.931, "step": 1521 }, { "epoch": 0.0908873760898125, "grad_norm": 1.9090169668197632, "learning_rate": 9.870967841737067e-06, "loss": 0.9797, "step": 1522 }, { "epoch": 0.09094709184282813, "grad_norm": 2.791599750518799, "learning_rate": 9.871852610939996e-06, "loss": 0.973, "step": 1523 }, { "epoch": 0.09100680759584379, "grad_norm": 3.1368656158447266, "learning_rate": 9.872736799395128e-06, "loss": 0.9376, "step": 1524 }, { "epoch": 0.09106652334885942, "grad_norm": 2.616997241973877, "learning_rate": 9.873620407864351e-06, "loss": 0.9887, "step": 1525 }, { "epoch": 0.09112623910187508, "grad_norm": 2.993422031402588, "learning_rate": 9.874503437108052e-06, "loss": 0.92, "step": 1526 }, { "epoch": 0.09118595485489071, "grad_norm": 1.9806057214736938, "learning_rate": 9.875385887885127e-06, "loss": 0.9226, "step": 1527 }, { "epoch": 0.09124567060790637, "grad_norm": 3.605461359024048, "learning_rate": 9.876267760952977e-06, "loss": 0.9761, "step": 1528 }, { "epoch": 0.09130538636092202, "grad_norm": 2.8935790061950684, "learning_rate": 9.87714905706752e-06, "loss": 0.9519, "step": 1529 }, { "epoch": 0.09136510211393765, "grad_norm": 2.3034706115722656, "learning_rate": 9.878029776983187e-06, "loss": 0.9574, "step": 1530 }, { "epoch": 0.0914248178669533, "grad_norm": 2.6143529415130615, "learning_rate": 9.87890992145294e-06, "loss": 0.9647, "step": 1531 }, { "epoch": 0.09148453361996894, "grad_norm": 2.4179883003234863, "learning_rate": 9.87978949122825e-06, "loss": 0.9643, "step": 1532 }, { "epoch": 0.0915442493729846, "grad_norm": 2.8739330768585205, "learning_rate": 9.880668487059133e-06, "loss": 0.9619, "step": 1533 }, { "epoch": 0.09160396512600023, "grad_norm": 2.164813756942749, "learning_rate": 9.881546909694128e-06, "loss": 0.9427, "step": 1534 }, { "epoch": 0.09166368087901589, "grad_norm": 6.641565799713135, "learning_rate": 9.882424759880314e-06, "loss": 0.9348, "step": 1535 }, { "epoch": 0.09172339663203152, "grad_norm": 2.077500343322754, "learning_rate": 9.883302038363311e-06, "loss": 0.9404, "step": 1536 }, { "epoch": 0.09178311238504717, "grad_norm": 2.0725409984588623, "learning_rate": 9.88417874588728e-06, "loss": 0.9723, "step": 1537 }, { "epoch": 0.09184282813806283, "grad_norm": 2.15701961517334, "learning_rate": 9.885054883194934e-06, "loss": 0.9785, "step": 1538 }, { "epoch": 0.09190254389107846, "grad_norm": 2.596451759338379, "learning_rate": 9.885930451027535e-06, "loss": 0.9721, "step": 1539 }, { "epoch": 0.09196225964409412, "grad_norm": 2.7619287967681885, "learning_rate": 9.8868054501249e-06, "loss": 1.0059, "step": 1540 }, { "epoch": 0.09202197539710975, "grad_norm": 2.3672189712524414, "learning_rate": 9.887679881225407e-06, "loss": 0.9716, "step": 1541 }, { "epoch": 0.0920816911501254, "grad_norm": 2.5070559978485107, "learning_rate": 9.888553745065998e-06, "loss": 0.9953, "step": 1542 }, { "epoch": 0.09214140690314104, "grad_norm": 2.4060745239257812, "learning_rate": 9.889427042382179e-06, "loss": 0.96, "step": 1543 }, { "epoch": 0.0922011226561567, "grad_norm": 3.506112575531006, "learning_rate": 9.89029977390803e-06, "loss": 0.9612, "step": 1544 }, { "epoch": 0.09226083840917235, "grad_norm": 2.3267204761505127, "learning_rate": 9.891171940376197e-06, "loss": 0.9668, "step": 1545 }, { "epoch": 0.09232055416218798, "grad_norm": 2.3345248699188232, "learning_rate": 9.892043542517917e-06, "loss": 1.0047, "step": 1546 }, { "epoch": 0.09238026991520364, "grad_norm": 2.52351975440979, "learning_rate": 9.892914581062997e-06, "loss": 0.9558, "step": 1547 }, { "epoch": 0.09243998566821927, "grad_norm": 2.420984983444214, "learning_rate": 9.893785056739836e-06, "loss": 0.9733, "step": 1548 }, { "epoch": 0.09249970142123493, "grad_norm": 2.9757089614868164, "learning_rate": 9.894654970275414e-06, "loss": 1.006, "step": 1549 }, { "epoch": 0.09255941717425056, "grad_norm": 2.9671225547790527, "learning_rate": 9.895524322395315e-06, "loss": 0.9879, "step": 1550 }, { "epoch": 0.09261913292726622, "grad_norm": 4.074149131774902, "learning_rate": 9.896393113823708e-06, "loss": 0.9334, "step": 1551 }, { "epoch": 0.09267884868028185, "grad_norm": 2.0462286472320557, "learning_rate": 9.897261345283368e-06, "loss": 0.9412, "step": 1552 }, { "epoch": 0.0927385644332975, "grad_norm": 2.3307697772979736, "learning_rate": 9.898129017495674e-06, "loss": 0.982, "step": 1553 }, { "epoch": 0.09279828018631316, "grad_norm": 2.280366897583008, "learning_rate": 9.898996131180608e-06, "loss": 0.9117, "step": 1554 }, { "epoch": 0.0928579959393288, "grad_norm": 2.0048673152923584, "learning_rate": 9.899862687056763e-06, "loss": 0.9419, "step": 1555 }, { "epoch": 0.09291771169234445, "grad_norm": 1.8325861692428589, "learning_rate": 9.900728685841349e-06, "loss": 0.9695, "step": 1556 }, { "epoch": 0.09297742744536008, "grad_norm": 2.346127986907959, "learning_rate": 9.901594128250192e-06, "loss": 0.9437, "step": 1557 }, { "epoch": 0.09303714319837574, "grad_norm": 2.6553282737731934, "learning_rate": 9.90245901499774e-06, "loss": 0.9241, "step": 1558 }, { "epoch": 0.09309685895139137, "grad_norm": 2.133387804031372, "learning_rate": 9.903323346797064e-06, "loss": 0.9635, "step": 1559 }, { "epoch": 0.09315657470440702, "grad_norm": 2.4592700004577637, "learning_rate": 9.904187124359865e-06, "loss": 0.9599, "step": 1560 }, { "epoch": 0.09321629045742266, "grad_norm": 1.9658846855163574, "learning_rate": 9.905050348396473e-06, "loss": 0.9487, "step": 1561 }, { "epoch": 0.09327600621043831, "grad_norm": 2.54309344291687, "learning_rate": 9.905913019615857e-06, "loss": 0.993, "step": 1562 }, { "epoch": 0.09333572196345397, "grad_norm": 2.6750311851501465, "learning_rate": 9.906775138725624e-06, "loss": 0.9689, "step": 1563 }, { "epoch": 0.0933954377164696, "grad_norm": 7.864263534545898, "learning_rate": 9.90763670643202e-06, "loss": 0.937, "step": 1564 }, { "epoch": 0.09345515346948526, "grad_norm": 2.7566616535186768, "learning_rate": 9.90849772343994e-06, "loss": 0.9974, "step": 1565 }, { "epoch": 0.0935148692225009, "grad_norm": 3.519085645675659, "learning_rate": 9.90935819045293e-06, "loss": 0.9528, "step": 1566 }, { "epoch": 0.09357458497551654, "grad_norm": 2.3062448501586914, "learning_rate": 9.910218108173181e-06, "loss": 1.0018, "step": 1567 }, { "epoch": 0.09363430072853218, "grad_norm": 3.0882298946380615, "learning_rate": 9.911077477301549e-06, "loss": 0.9297, "step": 1568 }, { "epoch": 0.09369401648154783, "grad_norm": 2.7672278881073, "learning_rate": 9.911936298537544e-06, "loss": 0.9229, "step": 1569 }, { "epoch": 0.09375373223456347, "grad_norm": 2.128671884536743, "learning_rate": 9.912794572579342e-06, "loss": 0.9438, "step": 1570 }, { "epoch": 0.09381344798757912, "grad_norm": 2.171874523162842, "learning_rate": 9.913652300123782e-06, "loss": 0.9625, "step": 1571 }, { "epoch": 0.09387316374059478, "grad_norm": 1.9667373895645142, "learning_rate": 9.914509481866374e-06, "loss": 0.9338, "step": 1572 }, { "epoch": 0.09393287949361041, "grad_norm": 1.994928002357483, "learning_rate": 9.915366118501304e-06, "loss": 0.959, "step": 1573 }, { "epoch": 0.09399259524662607, "grad_norm": 2.7410197257995605, "learning_rate": 9.91622221072143e-06, "loss": 0.9593, "step": 1574 }, { "epoch": 0.0940523109996417, "grad_norm": 6.188695907592773, "learning_rate": 9.917077759218295e-06, "loss": 0.9498, "step": 1575 }, { "epoch": 0.09411202675265735, "grad_norm": 2.717928647994995, "learning_rate": 9.917932764682118e-06, "loss": 0.9731, "step": 1576 }, { "epoch": 0.09417174250567299, "grad_norm": 2.676053762435913, "learning_rate": 9.918787227801812e-06, "loss": 0.9077, "step": 1577 }, { "epoch": 0.09423145825868864, "grad_norm": 3.6225385665893555, "learning_rate": 9.919641149264974e-06, "loss": 0.9533, "step": 1578 }, { "epoch": 0.09429117401170428, "grad_norm": 2.537158489227295, "learning_rate": 9.9204945297579e-06, "loss": 0.9373, "step": 1579 }, { "epoch": 0.09435088976471993, "grad_norm": 3.3584110736846924, "learning_rate": 9.921347369965576e-06, "loss": 0.972, "step": 1580 }, { "epoch": 0.09441060551773559, "grad_norm": 2.1724414825439453, "learning_rate": 9.922199670571693e-06, "loss": 0.9213, "step": 1581 }, { "epoch": 0.09447032127075122, "grad_norm": 2.463266372680664, "learning_rate": 9.923051432258642e-06, "loss": 0.9431, "step": 1582 }, { "epoch": 0.09453003702376687, "grad_norm": 2.505704641342163, "learning_rate": 9.923902655707524e-06, "loss": 0.9696, "step": 1583 }, { "epoch": 0.09458975277678251, "grad_norm": 2.4301462173461914, "learning_rate": 9.924753341598145e-06, "loss": 0.9629, "step": 1584 }, { "epoch": 0.09464946852979816, "grad_norm": 3.8737130165100098, "learning_rate": 9.925603490609027e-06, "loss": 0.9642, "step": 1585 }, { "epoch": 0.0947091842828138, "grad_norm": 2.1546173095703125, "learning_rate": 9.926453103417406e-06, "loss": 1.0034, "step": 1586 }, { "epoch": 0.09476890003582945, "grad_norm": 1.986375331878662, "learning_rate": 9.92730218069924e-06, "loss": 0.9516, "step": 1587 }, { "epoch": 0.09482861578884509, "grad_norm": 2.954131841659546, "learning_rate": 9.928150723129206e-06, "loss": 0.9683, "step": 1588 }, { "epoch": 0.09488833154186074, "grad_norm": 2.353050947189331, "learning_rate": 9.928998731380714e-06, "loss": 0.892, "step": 1589 }, { "epoch": 0.0949480472948764, "grad_norm": 2.327507972717285, "learning_rate": 9.929846206125891e-06, "loss": 0.9373, "step": 1590 }, { "epoch": 0.09500776304789203, "grad_norm": 3.3470618724823, "learning_rate": 9.930693148035608e-06, "loss": 0.9443, "step": 1591 }, { "epoch": 0.09506747880090768, "grad_norm": 2.6139063835144043, "learning_rate": 9.931539557779466e-06, "loss": 0.976, "step": 1592 }, { "epoch": 0.09512719455392332, "grad_norm": 1.9969176054000854, "learning_rate": 9.932385436025804e-06, "loss": 0.9691, "step": 1593 }, { "epoch": 0.09518691030693897, "grad_norm": 2.566249132156372, "learning_rate": 9.933230783441705e-06, "loss": 0.9748, "step": 1594 }, { "epoch": 0.09524662605995461, "grad_norm": 2.0351476669311523, "learning_rate": 9.934075600692997e-06, "loss": 0.9422, "step": 1595 }, { "epoch": 0.09530634181297026, "grad_norm": 3.747969627380371, "learning_rate": 9.934919888444252e-06, "loss": 0.9528, "step": 1596 }, { "epoch": 0.0953660575659859, "grad_norm": 2.591815233230591, "learning_rate": 9.935763647358797e-06, "loss": 0.9846, "step": 1597 }, { "epoch": 0.09542577331900155, "grad_norm": 2.360111951828003, "learning_rate": 9.936606878098713e-06, "loss": 0.9569, "step": 1598 }, { "epoch": 0.0954854890720172, "grad_norm": 3.7294669151306152, "learning_rate": 9.937449581324839e-06, "loss": 0.9365, "step": 1599 }, { "epoch": 0.09554520482503284, "grad_norm": 2.854501962661743, "learning_rate": 9.938291757696772e-06, "loss": 1.0064, "step": 1600 }, { "epoch": 0.09554520482503284, "eval_text_loss": 0.9748233556747437, "eval_text_runtime": 15.1339, "eval_text_samples_per_second": 264.308, "eval_text_steps_per_second": 0.529, "step": 1600 }, { "epoch": 0.09554520482503284, "eval_image_loss": 0.7212097644805908, "eval_image_runtime": 5.043, "eval_image_samples_per_second": 793.172, "eval_image_steps_per_second": 1.586, "step": 1600 }, { "epoch": 0.09554520482503284, "eval_video_loss": 1.2042174339294434, "eval_video_runtime": 76.5061, "eval_video_samples_per_second": 52.283, "eval_video_steps_per_second": 0.105, "step": 1600 }, { "epoch": 0.0956049205780485, "grad_norm": 2.4073193073272705, "learning_rate": 9.939133407872874e-06, "loss": 0.9618, "step": 1601 }, { "epoch": 0.09566463633106413, "grad_norm": 3.412125587463379, "learning_rate": 9.939974532510275e-06, "loss": 0.922, "step": 1602 }, { "epoch": 0.09572435208407978, "grad_norm": 5.060946464538574, "learning_rate": 9.940815132264873e-06, "loss": 0.9624, "step": 1603 }, { "epoch": 0.09578406783709542, "grad_norm": 3.268812417984009, "learning_rate": 9.94165520779134e-06, "loss": 0.9939, "step": 1604 }, { "epoch": 0.09584378359011107, "grad_norm": 3.056178569793701, "learning_rate": 9.942494759743123e-06, "loss": 0.9946, "step": 1605 }, { "epoch": 0.09590349934312671, "grad_norm": 2.98907208442688, "learning_rate": 9.943333788772446e-06, "loss": 0.972, "step": 1606 }, { "epoch": 0.09596321509614236, "grad_norm": 1.947657585144043, "learning_rate": 9.94417229553032e-06, "loss": 0.9582, "step": 1607 }, { "epoch": 0.09602293084915801, "grad_norm": 2.602945566177368, "learning_rate": 9.94501028066654e-06, "loss": 0.9445, "step": 1608 }, { "epoch": 0.09608264660217365, "grad_norm": 2.854067325592041, "learning_rate": 9.945847744829683e-06, "loss": 0.964, "step": 1609 }, { "epoch": 0.0961423623551893, "grad_norm": 3.042375326156616, "learning_rate": 9.946684688667126e-06, "loss": 0.9363, "step": 1610 }, { "epoch": 0.09620207810820494, "grad_norm": 2.0815114974975586, "learning_rate": 9.947521112825033e-06, "loss": 0.9147, "step": 1611 }, { "epoch": 0.0962617938612206, "grad_norm": 2.479557752609253, "learning_rate": 9.94835701794837e-06, "loss": 0.9954, "step": 1612 }, { "epoch": 0.09632150961423623, "grad_norm": 3.084824562072754, "learning_rate": 9.949192404680898e-06, "loss": 0.971, "step": 1613 }, { "epoch": 0.09638122536725188, "grad_norm": 5.076114177703857, "learning_rate": 9.950027273665187e-06, "loss": 0.9929, "step": 1614 }, { "epoch": 0.09644094112026752, "grad_norm": 2.962782144546509, "learning_rate": 9.950861625542607e-06, "loss": 0.9534, "step": 1615 }, { "epoch": 0.09650065687328317, "grad_norm": 2.9032175540924072, "learning_rate": 9.95169546095334e-06, "loss": 1.0295, "step": 1616 }, { "epoch": 0.09656037262629882, "grad_norm": 2.9695675373077393, "learning_rate": 9.952528780536385e-06, "loss": 0.9453, "step": 1617 }, { "epoch": 0.09662008837931446, "grad_norm": 1.9731545448303223, "learning_rate": 9.953361584929544e-06, "loss": 1.0001, "step": 1618 }, { "epoch": 0.09667980413233011, "grad_norm": 2.5635032653808594, "learning_rate": 9.954193874769447e-06, "loss": 0.9264, "step": 1619 }, { "epoch": 0.09673951988534575, "grad_norm": 2.4697256088256836, "learning_rate": 9.95502565069154e-06, "loss": 0.9697, "step": 1620 }, { "epoch": 0.0967992356383614, "grad_norm": 2.9319005012512207, "learning_rate": 9.955856913330096e-06, "loss": 0.9428, "step": 1621 }, { "epoch": 0.09685895139137704, "grad_norm": 2.046875476837158, "learning_rate": 9.956687663318208e-06, "loss": 0.9795, "step": 1622 }, { "epoch": 0.09691866714439269, "grad_norm": 2.583575487136841, "learning_rate": 9.957517901287804e-06, "loss": 1.0078, "step": 1623 }, { "epoch": 0.09697838289740833, "grad_norm": 2.7092466354370117, "learning_rate": 9.958347627869646e-06, "loss": 0.9865, "step": 1624 }, { "epoch": 0.09703809865042398, "grad_norm": 2.2585036754608154, "learning_rate": 9.959176843693326e-06, "loss": 0.9286, "step": 1625 }, { "epoch": 0.09709781440343963, "grad_norm": 1.8679158687591553, "learning_rate": 9.960005549387274e-06, "loss": 0.9629, "step": 1626 }, { "epoch": 0.09715753015645527, "grad_norm": 2.1835498809814453, "learning_rate": 9.960833745578768e-06, "loss": 0.9788, "step": 1627 }, { "epoch": 0.09721724590947092, "grad_norm": 1.9383740425109863, "learning_rate": 9.96166143289392e-06, "loss": 0.9653, "step": 1628 }, { "epoch": 0.09727696166248656, "grad_norm": 2.289262056350708, "learning_rate": 9.962488611957695e-06, "loss": 0.9442, "step": 1629 }, { "epoch": 0.09733667741550221, "grad_norm": 3.1148126125335693, "learning_rate": 9.963315283393906e-06, "loss": 0.9956, "step": 1630 }, { "epoch": 0.09739639316851785, "grad_norm": 2.7392935752868652, "learning_rate": 9.964141447825219e-06, "loss": 1.0099, "step": 1631 }, { "epoch": 0.0974561089215335, "grad_norm": 1.7516872882843018, "learning_rate": 9.964967105873152e-06, "loss": 0.9429, "step": 1632 }, { "epoch": 0.09751582467454914, "grad_norm": 2.804483652114868, "learning_rate": 9.965792258158083e-06, "loss": 0.9605, "step": 1633 }, { "epoch": 0.09757554042756479, "grad_norm": 3.019495964050293, "learning_rate": 9.966616905299252e-06, "loss": 0.935, "step": 1634 }, { "epoch": 0.09763525618058044, "grad_norm": 1.9671796560287476, "learning_rate": 9.96744104791476e-06, "loss": 0.9222, "step": 1635 }, { "epoch": 0.09769497193359608, "grad_norm": 2.343477487564087, "learning_rate": 9.968264686621576e-06, "loss": 0.9346, "step": 1636 }, { "epoch": 0.09775468768661173, "grad_norm": 1.9424734115600586, "learning_rate": 9.969087822035536e-06, "loss": 0.9994, "step": 1637 }, { "epoch": 0.09781440343962737, "grad_norm": 1.7673194408416748, "learning_rate": 9.96991045477135e-06, "loss": 1.008, "step": 1638 }, { "epoch": 0.09787411919264302, "grad_norm": 2.836904764175415, "learning_rate": 9.9707325854426e-06, "loss": 0.9586, "step": 1639 }, { "epoch": 0.09793383494565866, "grad_norm": 1.797542929649353, "learning_rate": 9.971554214661746e-06, "loss": 0.9242, "step": 1640 }, { "epoch": 0.09799355069867431, "grad_norm": 2.317730188369751, "learning_rate": 9.972375343040132e-06, "loss": 0.9724, "step": 1641 }, { "epoch": 0.09805326645168995, "grad_norm": 2.4522647857666016, "learning_rate": 9.973195971187978e-06, "loss": 0.9845, "step": 1642 }, { "epoch": 0.0981129822047056, "grad_norm": 2.4560070037841797, "learning_rate": 9.974016099714396e-06, "loss": 0.9289, "step": 1643 }, { "epoch": 0.09817269795772125, "grad_norm": 1.9157036542892456, "learning_rate": 9.97483572922738e-06, "loss": 0.9536, "step": 1644 }, { "epoch": 0.09823241371073689, "grad_norm": 5.302563190460205, "learning_rate": 9.975654860333818e-06, "loss": 0.8899, "step": 1645 }, { "epoch": 0.09829212946375254, "grad_norm": 2.1167633533477783, "learning_rate": 9.976473493639493e-06, "loss": 0.9573, "step": 1646 }, { "epoch": 0.09835184521676818, "grad_norm": 3.2055165767669678, "learning_rate": 9.977291629749083e-06, "loss": 0.9916, "step": 1647 }, { "epoch": 0.09841156096978383, "grad_norm": 2.0436606407165527, "learning_rate": 9.978109269266161e-06, "loss": 0.9856, "step": 1648 }, { "epoch": 0.09847127672279947, "grad_norm": 2.17315411567688, "learning_rate": 9.978926412793209e-06, "loss": 0.9483, "step": 1649 }, { "epoch": 0.09853099247581512, "grad_norm": 1.8838001489639282, "learning_rate": 9.979743060931608e-06, "loss": 0.9411, "step": 1650 }, { "epoch": 0.09859070822883076, "grad_norm": 3.2590017318725586, "learning_rate": 9.980559214281645e-06, "loss": 0.9938, "step": 1651 }, { "epoch": 0.09865042398184641, "grad_norm": 2.649745225906372, "learning_rate": 9.981374873442521e-06, "loss": 0.9574, "step": 1652 }, { "epoch": 0.09871013973486206, "grad_norm": 2.191652536392212, "learning_rate": 9.982190039012347e-06, "loss": 0.9757, "step": 1653 }, { "epoch": 0.0987698554878777, "grad_norm": 2.199453115463257, "learning_rate": 9.983004711588149e-06, "loss": 0.9397, "step": 1654 }, { "epoch": 0.09882957124089335, "grad_norm": 2.2526113986968994, "learning_rate": 9.98381889176587e-06, "loss": 0.9474, "step": 1655 }, { "epoch": 0.09888928699390899, "grad_norm": 2.806528091430664, "learning_rate": 9.984632580140372e-06, "loss": 0.9056, "step": 1656 }, { "epoch": 0.09894900274692464, "grad_norm": 2.2335331439971924, "learning_rate": 9.985445777305441e-06, "loss": 0.9289, "step": 1657 }, { "epoch": 0.09900871849994028, "grad_norm": 3.028223991394043, "learning_rate": 9.986258483853793e-06, "loss": 0.9949, "step": 1658 }, { "epoch": 0.09906843425295593, "grad_norm": 2.0525903701782227, "learning_rate": 9.98707070037706e-06, "loss": 0.9552, "step": 1659 }, { "epoch": 0.09912815000597157, "grad_norm": 2.1729397773742676, "learning_rate": 9.987882427465818e-06, "loss": 0.9232, "step": 1660 }, { "epoch": 0.09918786575898722, "grad_norm": 1.9373722076416016, "learning_rate": 9.988693665709565e-06, "loss": 0.9368, "step": 1661 }, { "epoch": 0.09924758151200287, "grad_norm": 1.601537823677063, "learning_rate": 9.989504415696742e-06, "loss": 0.9311, "step": 1662 }, { "epoch": 0.09930729726501851, "grad_norm": 2.478997230529785, "learning_rate": 9.990314678014724e-06, "loss": 0.9626, "step": 1663 }, { "epoch": 0.09936701301803416, "grad_norm": 2.0853562355041504, "learning_rate": 9.991124453249828e-06, "loss": 0.9595, "step": 1664 }, { "epoch": 0.0994267287710498, "grad_norm": 3.001095771789551, "learning_rate": 9.991933741987315e-06, "loss": 0.9442, "step": 1665 }, { "epoch": 0.09948644452406545, "grad_norm": 19.351362228393555, "learning_rate": 9.99274254481139e-06, "loss": 0.982, "step": 1666 }, { "epoch": 0.09954616027708109, "grad_norm": 1.8025662899017334, "learning_rate": 9.99355086230521e-06, "loss": 0.9054, "step": 1667 }, { "epoch": 0.09960587603009674, "grad_norm": 3.5720736980438232, "learning_rate": 9.994358695050878e-06, "loss": 0.9606, "step": 1668 }, { "epoch": 0.09966559178311238, "grad_norm": 2.4651107788085938, "learning_rate": 9.995166043629453e-06, "loss": 0.9557, "step": 1669 }, { "epoch": 0.09972530753612803, "grad_norm": 6.551342964172363, "learning_rate": 9.99597290862095e-06, "loss": 0.9578, "step": 1670 }, { "epoch": 0.09978502328914368, "grad_norm": 3.6278932094573975, "learning_rate": 9.996779290604343e-06, "loss": 0.9653, "step": 1671 }, { "epoch": 0.09984473904215932, "grad_norm": 2.2462997436523438, "learning_rate": 9.997585190157563e-06, "loss": 0.9779, "step": 1672 }, { "epoch": 0.09990445479517497, "grad_norm": 2.1442179679870605, "learning_rate": 9.99839060785751e-06, "loss": 0.9712, "step": 1673 }, { "epoch": 0.09996417054819061, "grad_norm": 1.8892767429351807, "learning_rate": 9.999195544280045e-06, "loss": 0.9576, "step": 1674 }, { "epoch": 0.10002388630120626, "grad_norm": 3.0572571754455566, "learning_rate": 9.999999999999999e-06, "loss": 0.9733, "step": 1675 }, { "epoch": 0.1000836020542219, "grad_norm": 4.368823528289795, "learning_rate": 1e-05, "loss": 0.9429, "step": 1676 }, { "epoch": 0.10014331780723755, "grad_norm": 2.4268417358398438, "learning_rate": 9.999336474022959e-06, "loss": 0.9922, "step": 1677 }, { "epoch": 0.10020303356025319, "grad_norm": 1.834030032157898, "learning_rate": 9.998672948045917e-06, "loss": 0.9403, "step": 1678 }, { "epoch": 0.10026274931326884, "grad_norm": 4.893860340118408, "learning_rate": 9.998009422068875e-06, "loss": 0.9948, "step": 1679 }, { "epoch": 0.10032246506628449, "grad_norm": 2.166015386581421, "learning_rate": 9.997345896091832e-06, "loss": 0.9871, "step": 1680 }, { "epoch": 0.10038218081930013, "grad_norm": 2.92741060256958, "learning_rate": 9.996682370114791e-06, "loss": 0.9496, "step": 1681 }, { "epoch": 0.10044189657231578, "grad_norm": 2.013141632080078, "learning_rate": 9.99601884413775e-06, "loss": 0.9247, "step": 1682 }, { "epoch": 0.10050161232533142, "grad_norm": 1.8509708642959595, "learning_rate": 9.995355318160706e-06, "loss": 0.9623, "step": 1683 }, { "epoch": 0.10056132807834707, "grad_norm": 2.1132688522338867, "learning_rate": 9.994691792183666e-06, "loss": 0.9141, "step": 1684 }, { "epoch": 0.10062104383136271, "grad_norm": 2.5320990085601807, "learning_rate": 9.994028266206622e-06, "loss": 0.9885, "step": 1685 }, { "epoch": 0.10068075958437836, "grad_norm": 2.4680306911468506, "learning_rate": 9.99336474022958e-06, "loss": 0.961, "step": 1686 }, { "epoch": 0.100740475337394, "grad_norm": 2.043250799179077, "learning_rate": 9.99270121425254e-06, "loss": 0.961, "step": 1687 }, { "epoch": 0.10080019109040965, "grad_norm": 2.890397310256958, "learning_rate": 9.992037688275496e-06, "loss": 0.9795, "step": 1688 }, { "epoch": 0.1008599068434253, "grad_norm": 2.3038113117218018, "learning_rate": 9.991374162298454e-06, "loss": 0.9394, "step": 1689 }, { "epoch": 0.10091962259644094, "grad_norm": 2.3695969581604004, "learning_rate": 9.990710636321413e-06, "loss": 0.9397, "step": 1690 }, { "epoch": 0.10097933834945659, "grad_norm": 1.9206695556640625, "learning_rate": 9.99004711034437e-06, "loss": 0.9957, "step": 1691 }, { "epoch": 0.10103905410247223, "grad_norm": 2.581817150115967, "learning_rate": 9.989383584367329e-06, "loss": 0.9455, "step": 1692 }, { "epoch": 0.10109876985548788, "grad_norm": 2.785466432571411, "learning_rate": 9.988720058390287e-06, "loss": 0.9788, "step": 1693 }, { "epoch": 0.10115848560850352, "grad_norm": 1.721388339996338, "learning_rate": 9.988056532413245e-06, "loss": 0.9081, "step": 1694 }, { "epoch": 0.10121820136151917, "grad_norm": 3.504908561706543, "learning_rate": 9.987393006436203e-06, "loss": 0.9647, "step": 1695 }, { "epoch": 0.10127791711453482, "grad_norm": 2.8126578330993652, "learning_rate": 9.986729480459161e-06, "loss": 0.9536, "step": 1696 }, { "epoch": 0.10133763286755046, "grad_norm": 2.36466646194458, "learning_rate": 9.98606595448212e-06, "loss": 0.9744, "step": 1697 }, { "epoch": 0.10139734862056611, "grad_norm": 1.9925328493118286, "learning_rate": 9.985402428505077e-06, "loss": 0.9786, "step": 1698 }, { "epoch": 0.10145706437358175, "grad_norm": 2.546348810195923, "learning_rate": 9.984738902528035e-06, "loss": 0.9864, "step": 1699 }, { "epoch": 0.1015167801265974, "grad_norm": 1.8998777866363525, "learning_rate": 9.984075376550992e-06, "loss": 0.9108, "step": 1700 }, { "epoch": 0.10157649587961304, "grad_norm": 2.093977212905884, "learning_rate": 9.983411850573952e-06, "loss": 0.9735, "step": 1701 }, { "epoch": 0.10163621163262869, "grad_norm": 2.2449352741241455, "learning_rate": 9.98274832459691e-06, "loss": 0.9318, "step": 1702 }, { "epoch": 0.10169592738564433, "grad_norm": 2.0450193881988525, "learning_rate": 9.982084798619866e-06, "loss": 0.971, "step": 1703 }, { "epoch": 0.10175564313865998, "grad_norm": 2.8621928691864014, "learning_rate": 9.981421272642824e-06, "loss": 0.9631, "step": 1704 }, { "epoch": 0.10181535889167563, "grad_norm": 2.234243631362915, "learning_rate": 9.980757746665782e-06, "loss": 0.9941, "step": 1705 }, { "epoch": 0.10187507464469127, "grad_norm": 2.4098899364471436, "learning_rate": 9.98009422068874e-06, "loss": 0.9668, "step": 1706 }, { "epoch": 0.10193479039770692, "grad_norm": 2.8150036334991455, "learning_rate": 9.979430694711699e-06, "loss": 0.9652, "step": 1707 }, { "epoch": 0.10199450615072256, "grad_norm": 2.31229829788208, "learning_rate": 9.978767168734657e-06, "loss": 0.9735, "step": 1708 }, { "epoch": 0.10205422190373821, "grad_norm": 2.5191314220428467, "learning_rate": 9.978103642757615e-06, "loss": 0.9503, "step": 1709 }, { "epoch": 0.10211393765675385, "grad_norm": 3.056422233581543, "learning_rate": 9.977440116780573e-06, "loss": 0.97, "step": 1710 }, { "epoch": 0.1021736534097695, "grad_norm": 2.113741397857666, "learning_rate": 9.976776590803531e-06, "loss": 0.9778, "step": 1711 }, { "epoch": 0.10223336916278514, "grad_norm": 1.9892958402633667, "learning_rate": 9.976113064826489e-06, "loss": 0.9946, "step": 1712 }, { "epoch": 0.10229308491580079, "grad_norm": 2.274740695953369, "learning_rate": 9.975449538849447e-06, "loss": 0.9617, "step": 1713 }, { "epoch": 0.10235280066881644, "grad_norm": 2.8041234016418457, "learning_rate": 9.974786012872405e-06, "loss": 0.9412, "step": 1714 }, { "epoch": 0.10241251642183208, "grad_norm": 2.571856737136841, "learning_rate": 9.974122486895362e-06, "loss": 0.9851, "step": 1715 }, { "epoch": 0.10247223217484773, "grad_norm": 5.845896244049072, "learning_rate": 9.973458960918321e-06, "loss": 0.9583, "step": 1716 }, { "epoch": 0.10253194792786337, "grad_norm": 2.6837875843048096, "learning_rate": 9.97279543494128e-06, "loss": 0.9561, "step": 1717 }, { "epoch": 0.10259166368087902, "grad_norm": 7.103476524353027, "learning_rate": 9.972131908964236e-06, "loss": 0.9218, "step": 1718 }, { "epoch": 0.10265137943389466, "grad_norm": 2.8963708877563477, "learning_rate": 9.971468382987196e-06, "loss": 0.9617, "step": 1719 }, { "epoch": 0.10271109518691031, "grad_norm": 2.2254176139831543, "learning_rate": 9.970804857010152e-06, "loss": 0.9408, "step": 1720 }, { "epoch": 0.10277081093992595, "grad_norm": 2.2854413986206055, "learning_rate": 9.97014133103311e-06, "loss": 0.9639, "step": 1721 }, { "epoch": 0.1028305266929416, "grad_norm": 2.257300615310669, "learning_rate": 9.96947780505607e-06, "loss": 1.0055, "step": 1722 }, { "epoch": 0.10289024244595725, "grad_norm": 2.736185073852539, "learning_rate": 9.968814279079026e-06, "loss": 0.9689, "step": 1723 }, { "epoch": 0.10294995819897289, "grad_norm": 4.317873001098633, "learning_rate": 9.968150753101985e-06, "loss": 0.9382, "step": 1724 }, { "epoch": 0.10300967395198854, "grad_norm": 6.0984063148498535, "learning_rate": 9.967487227124943e-06, "loss": 0.9661, "step": 1725 }, { "epoch": 0.10306938970500418, "grad_norm": 3.3418807983398438, "learning_rate": 9.9668237011479e-06, "loss": 0.9805, "step": 1726 }, { "epoch": 0.10312910545801983, "grad_norm": 2.314797878265381, "learning_rate": 9.966160175170859e-06, "loss": 0.9382, "step": 1727 }, { "epoch": 0.10318882121103547, "grad_norm": 3.5587990283966064, "learning_rate": 9.965496649193817e-06, "loss": 0.9776, "step": 1728 }, { "epoch": 0.10324853696405112, "grad_norm": 2.7581536769866943, "learning_rate": 9.964833123216775e-06, "loss": 0.975, "step": 1729 }, { "epoch": 0.10330825271706676, "grad_norm": 2.866307020187378, "learning_rate": 9.964169597239733e-06, "loss": 0.9674, "step": 1730 }, { "epoch": 0.10336796847008241, "grad_norm": 2.309119462966919, "learning_rate": 9.963506071262691e-06, "loss": 0.9301, "step": 1731 }, { "epoch": 0.10342768422309806, "grad_norm": 3.3773181438446045, "learning_rate": 9.96284254528565e-06, "loss": 0.9358, "step": 1732 }, { "epoch": 0.1034873999761137, "grad_norm": 1.7336453199386597, "learning_rate": 9.962179019308606e-06, "loss": 0.9475, "step": 1733 }, { "epoch": 0.10354711572912935, "grad_norm": 2.0573058128356934, "learning_rate": 9.961515493331566e-06, "loss": 0.9526, "step": 1734 }, { "epoch": 0.10360683148214499, "grad_norm": 3.0106027126312256, "learning_rate": 9.960851967354522e-06, "loss": 0.9456, "step": 1735 }, { "epoch": 0.10366654723516064, "grad_norm": 2.032872200012207, "learning_rate": 9.96018844137748e-06, "loss": 0.9275, "step": 1736 }, { "epoch": 0.10372626298817628, "grad_norm": 2.8432788848876953, "learning_rate": 9.95952491540044e-06, "loss": 0.9309, "step": 1737 }, { "epoch": 0.10378597874119193, "grad_norm": 2.788360595703125, "learning_rate": 9.958861389423396e-06, "loss": 0.9652, "step": 1738 }, { "epoch": 0.10384569449420757, "grad_norm": 2.109553575515747, "learning_rate": 9.958197863446354e-06, "loss": 0.9897, "step": 1739 }, { "epoch": 0.10390541024722322, "grad_norm": 1.988051414489746, "learning_rate": 9.957534337469312e-06, "loss": 0.9617, "step": 1740 }, { "epoch": 0.10396512600023887, "grad_norm": 2.555471420288086, "learning_rate": 9.95687081149227e-06, "loss": 0.928, "step": 1741 }, { "epoch": 0.10402484175325451, "grad_norm": 2.448058605194092, "learning_rate": 9.956207285515229e-06, "loss": 0.9441, "step": 1742 }, { "epoch": 0.10408455750627016, "grad_norm": 1.9578969478607178, "learning_rate": 9.955543759538187e-06, "loss": 0.9107, "step": 1743 }, { "epoch": 0.1041442732592858, "grad_norm": 2.9742181301116943, "learning_rate": 9.954880233561145e-06, "loss": 0.9169, "step": 1744 }, { "epoch": 0.10420398901230145, "grad_norm": 3.176879644393921, "learning_rate": 9.954216707584103e-06, "loss": 0.98, "step": 1745 }, { "epoch": 0.10426370476531709, "grad_norm": 3.040602684020996, "learning_rate": 9.953553181607061e-06, "loss": 0.9618, "step": 1746 }, { "epoch": 0.10432342051833274, "grad_norm": 6.078052997589111, "learning_rate": 9.95288965563002e-06, "loss": 0.962, "step": 1747 }, { "epoch": 0.10438313627134838, "grad_norm": 1.9096070528030396, "learning_rate": 9.952226129652977e-06, "loss": 0.9194, "step": 1748 }, { "epoch": 0.10444285202436403, "grad_norm": 2.1437599658966064, "learning_rate": 9.951562603675935e-06, "loss": 0.9786, "step": 1749 }, { "epoch": 0.10450256777737968, "grad_norm": 3.3527698516845703, "learning_rate": 9.950899077698892e-06, "loss": 0.9749, "step": 1750 }, { "epoch": 0.10456228353039532, "grad_norm": 2.5813560485839844, "learning_rate": 9.950235551721852e-06, "loss": 0.9885, "step": 1751 }, { "epoch": 0.10462199928341097, "grad_norm": 1.9816681146621704, "learning_rate": 9.94957202574481e-06, "loss": 0.965, "step": 1752 }, { "epoch": 0.1046817150364266, "grad_norm": 1.8262110948562622, "learning_rate": 9.948908499767766e-06, "loss": 0.9635, "step": 1753 }, { "epoch": 0.10474143078944226, "grad_norm": 1.9715797901153564, "learning_rate": 9.948244973790724e-06, "loss": 0.9622, "step": 1754 }, { "epoch": 0.1048011465424579, "grad_norm": 2.8856098651885986, "learning_rate": 9.947581447813682e-06, "loss": 0.9885, "step": 1755 }, { "epoch": 0.10486086229547355, "grad_norm": 2.2130167484283447, "learning_rate": 9.94691792183664e-06, "loss": 0.9665, "step": 1756 }, { "epoch": 0.10492057804848919, "grad_norm": 3.5822348594665527, "learning_rate": 9.946254395859598e-06, "loss": 0.9552, "step": 1757 }, { "epoch": 0.10498029380150484, "grad_norm": 2.8058531284332275, "learning_rate": 9.945590869882557e-06, "loss": 0.9292, "step": 1758 }, { "epoch": 0.10504000955452049, "grad_norm": 3.2134299278259277, "learning_rate": 9.944927343905515e-06, "loss": 0.9845, "step": 1759 }, { "epoch": 0.10509972530753613, "grad_norm": 1.8020225763320923, "learning_rate": 9.944263817928473e-06, "loss": 0.9019, "step": 1760 }, { "epoch": 0.10515944106055178, "grad_norm": 2.0034849643707275, "learning_rate": 9.943600291951431e-06, "loss": 0.9349, "step": 1761 }, { "epoch": 0.10521915681356742, "grad_norm": 2.3691084384918213, "learning_rate": 9.942936765974389e-06, "loss": 0.9508, "step": 1762 }, { "epoch": 0.10527887256658307, "grad_norm": 2.8820724487304688, "learning_rate": 9.942273239997347e-06, "loss": 0.9457, "step": 1763 }, { "epoch": 0.1053385883195987, "grad_norm": 3.842334032058716, "learning_rate": 9.941609714020305e-06, "loss": 1.0086, "step": 1764 }, { "epoch": 0.10539830407261436, "grad_norm": 1.7027997970581055, "learning_rate": 9.940946188043262e-06, "loss": 0.8922, "step": 1765 }, { "epoch": 0.10545801982563, "grad_norm": 2.397592306137085, "learning_rate": 9.940282662066221e-06, "loss": 0.9775, "step": 1766 }, { "epoch": 0.10551773557864565, "grad_norm": 2.1994612216949463, "learning_rate": 9.93961913608918e-06, "loss": 0.9454, "step": 1767 }, { "epoch": 0.1055774513316613, "grad_norm": 2.811350107192993, "learning_rate": 9.938955610112136e-06, "loss": 0.9499, "step": 1768 }, { "epoch": 0.10563716708467694, "grad_norm": 2.9583263397216797, "learning_rate": 9.938292084135096e-06, "loss": 0.957, "step": 1769 }, { "epoch": 0.10569688283769259, "grad_norm": 2.6507246494293213, "learning_rate": 9.937628558158052e-06, "loss": 0.9347, "step": 1770 }, { "epoch": 0.10575659859070823, "grad_norm": 2.6551737785339355, "learning_rate": 9.93696503218101e-06, "loss": 1.0021, "step": 1771 }, { "epoch": 0.10581631434372388, "grad_norm": 3.2473201751708984, "learning_rate": 9.93630150620397e-06, "loss": 0.9547, "step": 1772 }, { "epoch": 0.10587603009673952, "grad_norm": 2.006945848464966, "learning_rate": 9.935637980226926e-06, "loss": 0.9255, "step": 1773 }, { "epoch": 0.10593574584975517, "grad_norm": 2.6840317249298096, "learning_rate": 9.934974454249885e-06, "loss": 0.9612, "step": 1774 }, { "epoch": 0.1059954616027708, "grad_norm": 2.584773063659668, "learning_rate": 9.934310928272843e-06, "loss": 0.962, "step": 1775 }, { "epoch": 0.10605517735578646, "grad_norm": 2.246666193008423, "learning_rate": 9.9336474022958e-06, "loss": 0.9512, "step": 1776 }, { "epoch": 0.10611489310880211, "grad_norm": 2.3163950443267822, "learning_rate": 9.932983876318759e-06, "loss": 0.9463, "step": 1777 }, { "epoch": 0.10617460886181775, "grad_norm": 1.919481873512268, "learning_rate": 9.932320350341717e-06, "loss": 0.9707, "step": 1778 }, { "epoch": 0.1062343246148334, "grad_norm": 2.0123963356018066, "learning_rate": 9.931656824364675e-06, "loss": 0.9216, "step": 1779 }, { "epoch": 0.10629404036784904, "grad_norm": 2.0392038822174072, "learning_rate": 9.930993298387633e-06, "loss": 0.9335, "step": 1780 }, { "epoch": 0.10635375612086469, "grad_norm": 1.7452186346054077, "learning_rate": 9.930329772410591e-06, "loss": 0.9085, "step": 1781 }, { "epoch": 0.10641347187388032, "grad_norm": 2.7071049213409424, "learning_rate": 9.92966624643355e-06, "loss": 0.9455, "step": 1782 }, { "epoch": 0.10647318762689598, "grad_norm": 2.4573938846588135, "learning_rate": 9.929002720456506e-06, "loss": 0.9471, "step": 1783 }, { "epoch": 0.10653290337991161, "grad_norm": 3.380774974822998, "learning_rate": 9.928339194479466e-06, "loss": 0.9692, "step": 1784 }, { "epoch": 0.10659261913292727, "grad_norm": 2.619807481765747, "learning_rate": 9.927675668502422e-06, "loss": 0.9714, "step": 1785 }, { "epoch": 0.10665233488594292, "grad_norm": 3.585150957107544, "learning_rate": 9.92701214252538e-06, "loss": 0.9602, "step": 1786 }, { "epoch": 0.10671205063895856, "grad_norm": 4.044834613800049, "learning_rate": 9.92634861654834e-06, "loss": 1.0006, "step": 1787 }, { "epoch": 0.10677176639197421, "grad_norm": 3.391227960586548, "learning_rate": 9.925685090571296e-06, "loss": 0.9283, "step": 1788 }, { "epoch": 0.10683148214498984, "grad_norm": 2.619863748550415, "learning_rate": 9.925021564594254e-06, "loss": 0.9751, "step": 1789 }, { "epoch": 0.1068911978980055, "grad_norm": 2.1924490928649902, "learning_rate": 9.924358038617212e-06, "loss": 0.9645, "step": 1790 }, { "epoch": 0.10695091365102113, "grad_norm": 2.1198019981384277, "learning_rate": 9.92369451264017e-06, "loss": 0.9721, "step": 1791 }, { "epoch": 0.10701062940403679, "grad_norm": 2.1255218982696533, "learning_rate": 9.923030986663129e-06, "loss": 0.9531, "step": 1792 }, { "epoch": 0.10707034515705242, "grad_norm": 2.328720808029175, "learning_rate": 9.922367460686087e-06, "loss": 0.9619, "step": 1793 }, { "epoch": 0.10713006091006808, "grad_norm": 4.603243350982666, "learning_rate": 9.921703934709045e-06, "loss": 0.9977, "step": 1794 }, { "epoch": 0.10718977666308373, "grad_norm": 2.153879404067993, "learning_rate": 9.921040408732003e-06, "loss": 0.9496, "step": 1795 }, { "epoch": 0.10724949241609937, "grad_norm": 2.9979937076568604, "learning_rate": 9.920376882754961e-06, "loss": 0.9892, "step": 1796 }, { "epoch": 0.10730920816911502, "grad_norm": 2.2824809551239014, "learning_rate": 9.919713356777919e-06, "loss": 0.9631, "step": 1797 }, { "epoch": 0.10736892392213065, "grad_norm": 1.934565544128418, "learning_rate": 9.919049830800877e-06, "loss": 0.9733, "step": 1798 }, { "epoch": 0.1074286396751463, "grad_norm": 3.6738672256469727, "learning_rate": 9.918386304823835e-06, "loss": 0.9711, "step": 1799 }, { "epoch": 0.10748835542816194, "grad_norm": 2.010162115097046, "learning_rate": 9.917722778846792e-06, "loss": 1.0023, "step": 1800 }, { "epoch": 0.10748835542816194, "eval_text_loss": 0.9724955558776855, "eval_text_runtime": 15.7007, "eval_text_samples_per_second": 254.766, "eval_text_steps_per_second": 0.51, "step": 1800 }, { "epoch": 0.10748835542816194, "eval_image_loss": 0.7153726816177368, "eval_image_runtime": 5.5555, "eval_image_samples_per_second": 720.012, "eval_image_steps_per_second": 1.44, "step": 1800 }, { "epoch": 0.10748835542816194, "eval_video_loss": 1.198481559753418, "eval_video_runtime": 79.2624, "eval_video_samples_per_second": 50.465, "eval_video_steps_per_second": 0.101, "step": 1800 }, { "epoch": 0.1075480711811776, "grad_norm": 2.10638427734375, "learning_rate": 9.917059252869752e-06, "loss": 0.9832, "step": 1801 }, { "epoch": 0.10760778693419323, "grad_norm": 2.709418535232544, "learning_rate": 9.91639572689271e-06, "loss": 0.9708, "step": 1802 }, { "epoch": 0.10766750268720889, "grad_norm": 2.6116435527801514, "learning_rate": 9.915732200915666e-06, "loss": 0.94, "step": 1803 }, { "epoch": 0.10772721844022454, "grad_norm": 2.717661142349243, "learning_rate": 9.915068674938624e-06, "loss": 0.9721, "step": 1804 }, { "epoch": 0.10778693419324017, "grad_norm": 2.4000821113586426, "learning_rate": 9.914405148961582e-06, "loss": 0.9253, "step": 1805 }, { "epoch": 0.10784664994625583, "grad_norm": 2.391366720199585, "learning_rate": 9.91374162298454e-06, "loss": 0.9572, "step": 1806 }, { "epoch": 0.10790636569927146, "grad_norm": 2.2454774379730225, "learning_rate": 9.913078097007498e-06, "loss": 0.9508, "step": 1807 }, { "epoch": 0.10796608145228712, "grad_norm": 2.626335382461548, "learning_rate": 9.912414571030457e-06, "loss": 0.9677, "step": 1808 }, { "epoch": 0.10802579720530275, "grad_norm": 2.645608901977539, "learning_rate": 9.911751045053415e-06, "loss": 0.9637, "step": 1809 }, { "epoch": 0.1080855129583184, "grad_norm": 3.066579580307007, "learning_rate": 9.911087519076373e-06, "loss": 0.9433, "step": 1810 }, { "epoch": 0.10814522871133404, "grad_norm": 2.2861859798431396, "learning_rate": 9.91042399309933e-06, "loss": 0.9699, "step": 1811 }, { "epoch": 0.1082049444643497, "grad_norm": 2.041300058364868, "learning_rate": 9.909760467122289e-06, "loss": 0.9235, "step": 1812 }, { "epoch": 0.10826466021736535, "grad_norm": 2.0370213985443115, "learning_rate": 9.909096941145247e-06, "loss": 0.9376, "step": 1813 }, { "epoch": 0.10832437597038098, "grad_norm": 2.401902675628662, "learning_rate": 9.908433415168205e-06, "loss": 0.9599, "step": 1814 }, { "epoch": 0.10838409172339664, "grad_norm": 3.110536575317383, "learning_rate": 9.907769889191162e-06, "loss": 0.946, "step": 1815 }, { "epoch": 0.10844380747641227, "grad_norm": 3.0521247386932373, "learning_rate": 9.907106363214121e-06, "loss": 0.981, "step": 1816 }, { "epoch": 0.10850352322942793, "grad_norm": 1.883689284324646, "learning_rate": 9.90644283723708e-06, "loss": 0.9901, "step": 1817 }, { "epoch": 0.10856323898244356, "grad_norm": 1.9077306985855103, "learning_rate": 9.905779311260036e-06, "loss": 0.9292, "step": 1818 }, { "epoch": 0.10862295473545922, "grad_norm": 2.0376272201538086, "learning_rate": 9.905115785282996e-06, "loss": 0.9881, "step": 1819 }, { "epoch": 0.10868267048847485, "grad_norm": 2.6642236709594727, "learning_rate": 9.904452259305952e-06, "loss": 0.9195, "step": 1820 }, { "epoch": 0.1087423862414905, "grad_norm": 2.3209714889526367, "learning_rate": 9.90378873332891e-06, "loss": 0.9486, "step": 1821 }, { "epoch": 0.10880210199450616, "grad_norm": 2.079763412475586, "learning_rate": 9.90312520735187e-06, "loss": 0.9862, "step": 1822 }, { "epoch": 0.1088618177475218, "grad_norm": 2.559060573577881, "learning_rate": 9.902461681374826e-06, "loss": 0.9377, "step": 1823 }, { "epoch": 0.10892153350053745, "grad_norm": 2.915557384490967, "learning_rate": 9.901798155397784e-06, "loss": 0.9435, "step": 1824 }, { "epoch": 0.10898124925355308, "grad_norm": 4.027493953704834, "learning_rate": 9.901134629420743e-06, "loss": 0.9553, "step": 1825 }, { "epoch": 0.10904096500656874, "grad_norm": 2.012164831161499, "learning_rate": 9.9004711034437e-06, "loss": 0.9288, "step": 1826 }, { "epoch": 0.10910068075958437, "grad_norm": 2.3347649574279785, "learning_rate": 9.899807577466659e-06, "loss": 0.94, "step": 1827 }, { "epoch": 0.10916039651260002, "grad_norm": 2.393767833709717, "learning_rate": 9.899144051489617e-06, "loss": 0.9618, "step": 1828 }, { "epoch": 0.10922011226561566, "grad_norm": 2.1398863792419434, "learning_rate": 9.898480525512575e-06, "loss": 0.9714, "step": 1829 }, { "epoch": 0.10927982801863131, "grad_norm": 1.9780782461166382, "learning_rate": 9.897816999535533e-06, "loss": 0.8743, "step": 1830 }, { "epoch": 0.10933954377164697, "grad_norm": 2.1803884506225586, "learning_rate": 9.897153473558491e-06, "loss": 0.9292, "step": 1831 }, { "epoch": 0.1093992595246626, "grad_norm": 2.7002933025360107, "learning_rate": 9.89648994758145e-06, "loss": 0.9644, "step": 1832 }, { "epoch": 0.10945897527767826, "grad_norm": 2.54801607131958, "learning_rate": 9.895826421604406e-06, "loss": 0.955, "step": 1833 }, { "epoch": 0.1095186910306939, "grad_norm": 3.4302737712860107, "learning_rate": 9.895162895627365e-06, "loss": 0.9528, "step": 1834 }, { "epoch": 0.10957840678370954, "grad_norm": 2.904883861541748, "learning_rate": 9.894499369650322e-06, "loss": 0.9132, "step": 1835 }, { "epoch": 0.10963812253672518, "grad_norm": 9.59903335571289, "learning_rate": 9.89383584367328e-06, "loss": 0.9708, "step": 1836 }, { "epoch": 0.10969783828974083, "grad_norm": 2.229255437850952, "learning_rate": 9.89317231769624e-06, "loss": 0.9746, "step": 1837 }, { "epoch": 0.10975755404275649, "grad_norm": 7.220165252685547, "learning_rate": 9.892508791719196e-06, "loss": 0.9564, "step": 1838 }, { "epoch": 0.10981726979577212, "grad_norm": 2.1689164638519287, "learning_rate": 9.891845265742154e-06, "loss": 0.9616, "step": 1839 }, { "epoch": 0.10987698554878778, "grad_norm": 3.069720506668091, "learning_rate": 9.891181739765112e-06, "loss": 0.9455, "step": 1840 }, { "epoch": 0.10993670130180341, "grad_norm": 2.169830083847046, "learning_rate": 9.89051821378807e-06, "loss": 0.9639, "step": 1841 }, { "epoch": 0.10999641705481907, "grad_norm": 1.976776361465454, "learning_rate": 9.889854687811029e-06, "loss": 0.9269, "step": 1842 }, { "epoch": 0.1100561328078347, "grad_norm": 2.330679416656494, "learning_rate": 9.889191161833987e-06, "loss": 0.9807, "step": 1843 }, { "epoch": 0.11011584856085035, "grad_norm": 2.576253890991211, "learning_rate": 9.888527635856945e-06, "loss": 0.9772, "step": 1844 }, { "epoch": 0.11017556431386599, "grad_norm": 2.8548784255981445, "learning_rate": 9.887864109879903e-06, "loss": 0.9372, "step": 1845 }, { "epoch": 0.11023528006688164, "grad_norm": 2.4203011989593506, "learning_rate": 9.887200583902861e-06, "loss": 0.9562, "step": 1846 }, { "epoch": 0.1102949958198973, "grad_norm": 1.9335168600082397, "learning_rate": 9.886537057925819e-06, "loss": 0.9657, "step": 1847 }, { "epoch": 0.11035471157291293, "grad_norm": 4.4310479164123535, "learning_rate": 9.885873531948777e-06, "loss": 0.9638, "step": 1848 }, { "epoch": 0.11041442732592859, "grad_norm": 1.8487868309020996, "learning_rate": 9.885210005971735e-06, "loss": 0.9858, "step": 1849 }, { "epoch": 0.11047414307894422, "grad_norm": 4.140319347381592, "learning_rate": 9.884546479994692e-06, "loss": 0.9603, "step": 1850 }, { "epoch": 0.11053385883195987, "grad_norm": 2.3752408027648926, "learning_rate": 9.883882954017651e-06, "loss": 0.9562, "step": 1851 }, { "epoch": 0.11059357458497551, "grad_norm": 2.898974657058716, "learning_rate": 9.88321942804061e-06, "loss": 0.9582, "step": 1852 }, { "epoch": 0.11065329033799116, "grad_norm": 2.3866240978240967, "learning_rate": 9.882555902063566e-06, "loss": 0.9699, "step": 1853 }, { "epoch": 0.1107130060910068, "grad_norm": 2.7219338417053223, "learning_rate": 9.881892376086524e-06, "loss": 0.9876, "step": 1854 }, { "epoch": 0.11077272184402245, "grad_norm": 2.8474268913269043, "learning_rate": 9.881228850109482e-06, "loss": 0.961, "step": 1855 }, { "epoch": 0.1108324375970381, "grad_norm": 2.0984559059143066, "learning_rate": 9.88056532413244e-06, "loss": 0.9539, "step": 1856 }, { "epoch": 0.11089215335005374, "grad_norm": 1.982486605644226, "learning_rate": 9.879901798155398e-06, "loss": 0.9354, "step": 1857 }, { "epoch": 0.1109518691030694, "grad_norm": 1.9634655714035034, "learning_rate": 9.879238272178356e-06, "loss": 0.9454, "step": 1858 }, { "epoch": 0.11101158485608503, "grad_norm": 1.6608093976974487, "learning_rate": 9.878574746201315e-06, "loss": 0.9898, "step": 1859 }, { "epoch": 0.11107130060910068, "grad_norm": 5.871853828430176, "learning_rate": 9.877911220224273e-06, "loss": 0.9686, "step": 1860 }, { "epoch": 0.11113101636211632, "grad_norm": 2.799132823944092, "learning_rate": 9.87724769424723e-06, "loss": 0.9466, "step": 1861 }, { "epoch": 0.11119073211513197, "grad_norm": 2.597491979598999, "learning_rate": 9.876584168270189e-06, "loss": 0.9206, "step": 1862 }, { "epoch": 0.11125044786814761, "grad_norm": 2.405299186706543, "learning_rate": 9.875920642293147e-06, "loss": 0.9606, "step": 1863 }, { "epoch": 0.11131016362116326, "grad_norm": 2.324640989303589, "learning_rate": 9.875257116316105e-06, "loss": 0.954, "step": 1864 }, { "epoch": 0.11136987937417892, "grad_norm": 4.333029270172119, "learning_rate": 9.874593590339061e-06, "loss": 0.9511, "step": 1865 }, { "epoch": 0.11142959512719455, "grad_norm": 2.784792423248291, "learning_rate": 9.873930064362021e-06, "loss": 0.9845, "step": 1866 }, { "epoch": 0.1114893108802102, "grad_norm": 2.519042730331421, "learning_rate": 9.87326653838498e-06, "loss": 0.938, "step": 1867 }, { "epoch": 0.11154902663322584, "grad_norm": 2.380586624145508, "learning_rate": 9.872603012407936e-06, "loss": 0.9759, "step": 1868 }, { "epoch": 0.1116087423862415, "grad_norm": 2.7330245971679688, "learning_rate": 9.871939486430896e-06, "loss": 0.9479, "step": 1869 }, { "epoch": 0.11166845813925713, "grad_norm": 2.0197842121124268, "learning_rate": 9.871275960453852e-06, "loss": 0.9472, "step": 1870 }, { "epoch": 0.11172817389227278, "grad_norm": 2.4566309452056885, "learning_rate": 9.87061243447681e-06, "loss": 0.9566, "step": 1871 }, { "epoch": 0.11178788964528842, "grad_norm": 4.254244327545166, "learning_rate": 9.86994890849977e-06, "loss": 0.9359, "step": 1872 }, { "epoch": 0.11184760539830407, "grad_norm": 2.669933557510376, "learning_rate": 9.869285382522726e-06, "loss": 0.9413, "step": 1873 }, { "epoch": 0.11190732115131972, "grad_norm": 2.3318426609039307, "learning_rate": 9.868621856545684e-06, "loss": 0.957, "step": 1874 }, { "epoch": 0.11196703690433536, "grad_norm": 3.3313024044036865, "learning_rate": 9.867958330568642e-06, "loss": 0.9688, "step": 1875 }, { "epoch": 0.11202675265735101, "grad_norm": 1.9643701314926147, "learning_rate": 9.8672948045916e-06, "loss": 0.939, "step": 1876 }, { "epoch": 0.11208646841036665, "grad_norm": 2.2332611083984375, "learning_rate": 9.866631278614559e-06, "loss": 0.9774, "step": 1877 }, { "epoch": 0.1121461841633823, "grad_norm": 2.5034117698669434, "learning_rate": 9.865967752637517e-06, "loss": 0.9309, "step": 1878 }, { "epoch": 0.11220589991639794, "grad_norm": 1.932389736175537, "learning_rate": 9.865304226660475e-06, "loss": 0.958, "step": 1879 }, { "epoch": 0.1122656156694136, "grad_norm": 2.1817667484283447, "learning_rate": 9.864640700683433e-06, "loss": 0.9456, "step": 1880 }, { "epoch": 0.11232533142242923, "grad_norm": 1.6971477270126343, "learning_rate": 9.863977174706391e-06, "loss": 0.9726, "step": 1881 }, { "epoch": 0.11238504717544488, "grad_norm": 2.3516018390655518, "learning_rate": 9.86331364872935e-06, "loss": 0.9491, "step": 1882 }, { "epoch": 0.11244476292846053, "grad_norm": 2.6061649322509766, "learning_rate": 9.862650122752306e-06, "loss": 0.9404, "step": 1883 }, { "epoch": 0.11250447868147617, "grad_norm": 2.6635143756866455, "learning_rate": 9.861986596775265e-06, "loss": 0.9748, "step": 1884 }, { "epoch": 0.11256419443449182, "grad_norm": 1.942016839981079, "learning_rate": 9.861323070798222e-06, "loss": 0.9516, "step": 1885 }, { "epoch": 0.11262391018750746, "grad_norm": 2.165121078491211, "learning_rate": 9.86065954482118e-06, "loss": 0.9408, "step": 1886 }, { "epoch": 0.11268362594052311, "grad_norm": 2.250401496887207, "learning_rate": 9.85999601884414e-06, "loss": 0.9355, "step": 1887 }, { "epoch": 0.11274334169353875, "grad_norm": 2.1387250423431396, "learning_rate": 9.859332492867096e-06, "loss": 0.9257, "step": 1888 }, { "epoch": 0.1128030574465544, "grad_norm": 2.0219645500183105, "learning_rate": 9.858668966890054e-06, "loss": 0.9591, "step": 1889 }, { "epoch": 0.11286277319957004, "grad_norm": 2.808161735534668, "learning_rate": 9.858005440913012e-06, "loss": 0.9547, "step": 1890 }, { "epoch": 0.11292248895258569, "grad_norm": 1.8317586183547974, "learning_rate": 9.85734191493597e-06, "loss": 0.9626, "step": 1891 }, { "epoch": 0.11298220470560134, "grad_norm": 4.243472099304199, "learning_rate": 9.856678388958928e-06, "loss": 0.9593, "step": 1892 }, { "epoch": 0.11304192045861698, "grad_norm": 2.5729517936706543, "learning_rate": 9.856014862981887e-06, "loss": 0.9815, "step": 1893 }, { "epoch": 0.11310163621163263, "grad_norm": 2.0708110332489014, "learning_rate": 9.855351337004845e-06, "loss": 1.0076, "step": 1894 }, { "epoch": 0.11316135196464827, "grad_norm": 2.370821237564087, "learning_rate": 9.854687811027803e-06, "loss": 0.9586, "step": 1895 }, { "epoch": 0.11322106771766392, "grad_norm": 3.192310094833374, "learning_rate": 9.854024285050761e-06, "loss": 1.0012, "step": 1896 }, { "epoch": 0.11328078347067956, "grad_norm": 3.4966845512390137, "learning_rate": 9.853360759073719e-06, "loss": 0.933, "step": 1897 }, { "epoch": 0.11334049922369521, "grad_norm": 2.4825186729431152, "learning_rate": 9.852697233096677e-06, "loss": 0.9669, "step": 1898 }, { "epoch": 0.11340021497671085, "grad_norm": 2.4912712574005127, "learning_rate": 9.852033707119635e-06, "loss": 0.9707, "step": 1899 }, { "epoch": 0.1134599307297265, "grad_norm": 3.119382381439209, "learning_rate": 9.851370181142592e-06, "loss": 0.9146, "step": 1900 }, { "epoch": 0.11351964648274215, "grad_norm": 2.010324716567993, "learning_rate": 9.850706655165551e-06, "loss": 0.9499, "step": 1901 }, { "epoch": 0.11357936223575779, "grad_norm": 2.415587902069092, "learning_rate": 9.85004312918851e-06, "loss": 0.9261, "step": 1902 }, { "epoch": 0.11363907798877344, "grad_norm": 3.438488006591797, "learning_rate": 9.849379603211466e-06, "loss": 0.9573, "step": 1903 }, { "epoch": 0.11369879374178908, "grad_norm": 2.104966402053833, "learning_rate": 9.848716077234424e-06, "loss": 0.956, "step": 1904 }, { "epoch": 0.11375850949480473, "grad_norm": 1.9827605485916138, "learning_rate": 9.848052551257382e-06, "loss": 0.9683, "step": 1905 }, { "epoch": 0.11381822524782037, "grad_norm": 2.364044189453125, "learning_rate": 9.84738902528034e-06, "loss": 0.9503, "step": 1906 }, { "epoch": 0.11387794100083602, "grad_norm": 2.3690860271453857, "learning_rate": 9.846725499303298e-06, "loss": 0.9511, "step": 1907 }, { "epoch": 0.11393765675385166, "grad_norm": 1.9861074686050415, "learning_rate": 9.846061973326256e-06, "loss": 0.9432, "step": 1908 }, { "epoch": 0.11399737250686731, "grad_norm": 2.265547037124634, "learning_rate": 9.845398447349215e-06, "loss": 0.943, "step": 1909 }, { "epoch": 0.11405708825988296, "grad_norm": 1.6734541654586792, "learning_rate": 9.844734921372173e-06, "loss": 0.9592, "step": 1910 }, { "epoch": 0.1141168040128986, "grad_norm": 2.6340901851654053, "learning_rate": 9.84407139539513e-06, "loss": 0.9577, "step": 1911 }, { "epoch": 0.11417651976591425, "grad_norm": 3.5233242511749268, "learning_rate": 9.843407869418089e-06, "loss": 0.9763, "step": 1912 }, { "epoch": 0.11423623551892989, "grad_norm": 1.8477104902267456, "learning_rate": 9.842744343441047e-06, "loss": 0.9282, "step": 1913 }, { "epoch": 0.11429595127194554, "grad_norm": 3.5320045948028564, "learning_rate": 9.842080817464005e-06, "loss": 0.9462, "step": 1914 }, { "epoch": 0.11435566702496118, "grad_norm": 2.2089173793792725, "learning_rate": 9.841417291486961e-06, "loss": 0.9395, "step": 1915 }, { "epoch": 0.11441538277797683, "grad_norm": 2.7077579498291016, "learning_rate": 9.840753765509921e-06, "loss": 0.9748, "step": 1916 }, { "epoch": 0.11447509853099247, "grad_norm": 2.0621211528778076, "learning_rate": 9.84009023953288e-06, "loss": 0.951, "step": 1917 }, { "epoch": 0.11453481428400812, "grad_norm": 1.90308678150177, "learning_rate": 9.839426713555836e-06, "loss": 1.0033, "step": 1918 }, { "epoch": 0.11459453003702377, "grad_norm": 1.9491262435913086, "learning_rate": 9.838763187578796e-06, "loss": 0.9335, "step": 1919 }, { "epoch": 0.11465424579003941, "grad_norm": 2.00720477104187, "learning_rate": 9.838099661601752e-06, "loss": 0.9797, "step": 1920 }, { "epoch": 0.11471396154305506, "grad_norm": 2.108703851699829, "learning_rate": 9.83743613562471e-06, "loss": 0.9596, "step": 1921 }, { "epoch": 0.1147736772960707, "grad_norm": 1.825371503829956, "learning_rate": 9.83677260964767e-06, "loss": 0.9063, "step": 1922 }, { "epoch": 0.11483339304908635, "grad_norm": 2.6392672061920166, "learning_rate": 9.836109083670626e-06, "loss": 0.9434, "step": 1923 }, { "epoch": 0.11489310880210199, "grad_norm": 2.4090449810028076, "learning_rate": 9.835445557693584e-06, "loss": 0.9611, "step": 1924 }, { "epoch": 0.11495282455511764, "grad_norm": 2.4762678146362305, "learning_rate": 9.834782031716542e-06, "loss": 0.9115, "step": 1925 }, { "epoch": 0.11501254030813328, "grad_norm": 1.940429449081421, "learning_rate": 9.8341185057395e-06, "loss": 0.9131, "step": 1926 }, { "epoch": 0.11507225606114893, "grad_norm": 2.381274938583374, "learning_rate": 9.833454979762459e-06, "loss": 0.9714, "step": 1927 }, { "epoch": 0.11513197181416458, "grad_norm": 2.8062450885772705, "learning_rate": 9.832791453785417e-06, "loss": 0.9961, "step": 1928 }, { "epoch": 0.11519168756718022, "grad_norm": 1.9265730381011963, "learning_rate": 9.832127927808375e-06, "loss": 0.9262, "step": 1929 }, { "epoch": 0.11525140332019587, "grad_norm": 2.419003963470459, "learning_rate": 9.831464401831333e-06, "loss": 0.9498, "step": 1930 }, { "epoch": 0.11531111907321151, "grad_norm": 2.9860899448394775, "learning_rate": 9.830800875854291e-06, "loss": 0.9769, "step": 1931 }, { "epoch": 0.11537083482622716, "grad_norm": 2.1843695640563965, "learning_rate": 9.830137349877249e-06, "loss": 0.9606, "step": 1932 }, { "epoch": 0.1154305505792428, "grad_norm": 2.513758659362793, "learning_rate": 9.829473823900206e-06, "loss": 0.9236, "step": 1933 }, { "epoch": 0.11549026633225845, "grad_norm": 1.9593255519866943, "learning_rate": 9.828810297923165e-06, "loss": 0.9317, "step": 1934 }, { "epoch": 0.11554998208527409, "grad_norm": 2.263248920440674, "learning_rate": 9.828146771946122e-06, "loss": 0.945, "step": 1935 }, { "epoch": 0.11560969783828974, "grad_norm": 1.9412468671798706, "learning_rate": 9.82748324596908e-06, "loss": 0.9517, "step": 1936 }, { "epoch": 0.11566941359130539, "grad_norm": 2.7730677127838135, "learning_rate": 9.82681971999204e-06, "loss": 0.9462, "step": 1937 }, { "epoch": 0.11572912934432103, "grad_norm": 2.050442934036255, "learning_rate": 9.826156194014996e-06, "loss": 0.9048, "step": 1938 }, { "epoch": 0.11578884509733668, "grad_norm": 3.428734540939331, "learning_rate": 9.825492668037954e-06, "loss": 0.9427, "step": 1939 }, { "epoch": 0.11584856085035232, "grad_norm": 3.110895872116089, "learning_rate": 9.824829142060912e-06, "loss": 0.9682, "step": 1940 }, { "epoch": 0.11590827660336797, "grad_norm": 2.086308479309082, "learning_rate": 9.82416561608387e-06, "loss": 0.9274, "step": 1941 }, { "epoch": 0.11596799235638361, "grad_norm": 2.451181411743164, "learning_rate": 9.823502090106828e-06, "loss": 0.9327, "step": 1942 }, { "epoch": 0.11602770810939926, "grad_norm": 1.9063315391540527, "learning_rate": 9.822838564129787e-06, "loss": 0.999, "step": 1943 }, { "epoch": 0.1160874238624149, "grad_norm": 3.756960153579712, "learning_rate": 9.822175038152745e-06, "loss": 0.9675, "step": 1944 }, { "epoch": 0.11614713961543055, "grad_norm": 2.299571990966797, "learning_rate": 9.821511512175703e-06, "loss": 0.9368, "step": 1945 }, { "epoch": 0.1162068553684462, "grad_norm": 3.2139954566955566, "learning_rate": 9.82084798619866e-06, "loss": 0.9416, "step": 1946 }, { "epoch": 0.11626657112146184, "grad_norm": 2.2990729808807373, "learning_rate": 9.820184460221619e-06, "loss": 0.9367, "step": 1947 }, { "epoch": 0.11632628687447749, "grad_norm": 6.293529033660889, "learning_rate": 9.819520934244577e-06, "loss": 0.9392, "step": 1948 }, { "epoch": 0.11638600262749313, "grad_norm": 2.479614019393921, "learning_rate": 9.818857408267535e-06, "loss": 0.9419, "step": 1949 }, { "epoch": 0.11644571838050878, "grad_norm": 4.367955207824707, "learning_rate": 9.818193882290492e-06, "loss": 0.9789, "step": 1950 }, { "epoch": 0.11650543413352442, "grad_norm": 2.3617947101593018, "learning_rate": 9.817530356313451e-06, "loss": 0.9627, "step": 1951 }, { "epoch": 0.11656514988654007, "grad_norm": 1.7658040523529053, "learning_rate": 9.81686683033641e-06, "loss": 0.9226, "step": 1952 }, { "epoch": 0.11662486563955571, "grad_norm": 2.065690279006958, "learning_rate": 9.816203304359366e-06, "loss": 0.9771, "step": 1953 }, { "epoch": 0.11668458139257136, "grad_norm": 3.761719226837158, "learning_rate": 9.815539778382324e-06, "loss": 0.9344, "step": 1954 }, { "epoch": 0.11674429714558701, "grad_norm": 2.081486463546753, "learning_rate": 9.814876252405282e-06, "loss": 0.9674, "step": 1955 }, { "epoch": 0.11680401289860265, "grad_norm": 2.078737258911133, "learning_rate": 9.81421272642824e-06, "loss": 0.9715, "step": 1956 }, { "epoch": 0.1168637286516183, "grad_norm": 2.0288572311401367, "learning_rate": 9.813549200451198e-06, "loss": 0.9118, "step": 1957 }, { "epoch": 0.11692344440463394, "grad_norm": 3.611548662185669, "learning_rate": 9.812885674474156e-06, "loss": 0.9441, "step": 1958 }, { "epoch": 0.11698316015764959, "grad_norm": 5.154158115386963, "learning_rate": 9.812222148497114e-06, "loss": 0.9128, "step": 1959 }, { "epoch": 0.11704287591066523, "grad_norm": 2.4446558952331543, "learning_rate": 9.811558622520073e-06, "loss": 0.9907, "step": 1960 }, { "epoch": 0.11710259166368088, "grad_norm": 2.2798209190368652, "learning_rate": 9.81089509654303e-06, "loss": 0.9501, "step": 1961 }, { "epoch": 0.11716230741669652, "grad_norm": 3.1370432376861572, "learning_rate": 9.810231570565989e-06, "loss": 0.9423, "step": 1962 }, { "epoch": 0.11722202316971217, "grad_norm": 3.605175733566284, "learning_rate": 9.809568044588947e-06, "loss": 0.9761, "step": 1963 }, { "epoch": 0.11728173892272782, "grad_norm": 3.1189491748809814, "learning_rate": 9.808904518611905e-06, "loss": 0.9553, "step": 1964 }, { "epoch": 0.11734145467574346, "grad_norm": 2.1472699642181396, "learning_rate": 9.808240992634861e-06, "loss": 0.9499, "step": 1965 }, { "epoch": 0.11740117042875911, "grad_norm": 2.6482412815093994, "learning_rate": 9.807577466657821e-06, "loss": 0.9404, "step": 1966 }, { "epoch": 0.11746088618177475, "grad_norm": 2.522897958755493, "learning_rate": 9.80691394068078e-06, "loss": 0.9489, "step": 1967 }, { "epoch": 0.1175206019347904, "grad_norm": 4.581874370574951, "learning_rate": 9.806250414703736e-06, "loss": 0.9532, "step": 1968 }, { "epoch": 0.11758031768780604, "grad_norm": 4.286259651184082, "learning_rate": 9.805586888726695e-06, "loss": 0.9407, "step": 1969 }, { "epoch": 0.11764003344082169, "grad_norm": 2.631070613861084, "learning_rate": 9.804923362749652e-06, "loss": 0.931, "step": 1970 }, { "epoch": 0.11769974919383733, "grad_norm": 7.786059379577637, "learning_rate": 9.80425983677261e-06, "loss": 0.9653, "step": 1971 }, { "epoch": 0.11775946494685298, "grad_norm": 2.015613317489624, "learning_rate": 9.80359631079557e-06, "loss": 0.9318, "step": 1972 }, { "epoch": 0.11781918069986863, "grad_norm": 1.8406562805175781, "learning_rate": 9.802932784818526e-06, "loss": 0.9378, "step": 1973 }, { "epoch": 0.11787889645288427, "grad_norm": 2.1313769817352295, "learning_rate": 9.802269258841484e-06, "loss": 0.9618, "step": 1974 }, { "epoch": 0.11793861220589992, "grad_norm": 2.6293137073516846, "learning_rate": 9.801605732864442e-06, "loss": 0.9253, "step": 1975 }, { "epoch": 0.11799832795891556, "grad_norm": 2.6875970363616943, "learning_rate": 9.8009422068874e-06, "loss": 0.936, "step": 1976 }, { "epoch": 0.11805804371193121, "grad_norm": 1.8740342855453491, "learning_rate": 9.800278680910359e-06, "loss": 0.9602, "step": 1977 }, { "epoch": 0.11811775946494685, "grad_norm": 2.6297476291656494, "learning_rate": 9.799615154933317e-06, "loss": 0.9165, "step": 1978 }, { "epoch": 0.1181774752179625, "grad_norm": 1.9881212711334229, "learning_rate": 9.798951628956275e-06, "loss": 0.9665, "step": 1979 }, { "epoch": 0.11823719097097814, "grad_norm": 2.3606672286987305, "learning_rate": 9.798288102979233e-06, "loss": 0.9191, "step": 1980 }, { "epoch": 0.11829690672399379, "grad_norm": 2.1070754528045654, "learning_rate": 9.797624577002191e-06, "loss": 0.9358, "step": 1981 }, { "epoch": 0.11835662247700944, "grad_norm": 2.215045690536499, "learning_rate": 9.796961051025149e-06, "loss": 0.9623, "step": 1982 }, { "epoch": 0.11841633823002508, "grad_norm": 1.8507821559906006, "learning_rate": 9.796297525048105e-06, "loss": 0.9673, "step": 1983 }, { "epoch": 0.11847605398304073, "grad_norm": 2.006153106689453, "learning_rate": 9.795633999071065e-06, "loss": 0.9411, "step": 1984 }, { "epoch": 0.11853576973605637, "grad_norm": 3.1989071369171143, "learning_rate": 9.794970473094022e-06, "loss": 0.9493, "step": 1985 }, { "epoch": 0.11859548548907202, "grad_norm": 1.8044915199279785, "learning_rate": 9.79430694711698e-06, "loss": 0.9546, "step": 1986 }, { "epoch": 0.11865520124208766, "grad_norm": 2.4647839069366455, "learning_rate": 9.79364342113994e-06, "loss": 0.9528, "step": 1987 }, { "epoch": 0.11871491699510331, "grad_norm": 1.9886246919631958, "learning_rate": 9.792979895162896e-06, "loss": 0.9313, "step": 1988 }, { "epoch": 0.11877463274811896, "grad_norm": 2.3288674354553223, "learning_rate": 9.792316369185854e-06, "loss": 0.9273, "step": 1989 }, { "epoch": 0.1188343485011346, "grad_norm": 2.2233028411865234, "learning_rate": 9.791652843208812e-06, "loss": 0.9579, "step": 1990 }, { "epoch": 0.11889406425415025, "grad_norm": 2.61769437789917, "learning_rate": 9.79098931723177e-06, "loss": 0.9349, "step": 1991 }, { "epoch": 0.11895378000716589, "grad_norm": 2.1279547214508057, "learning_rate": 9.790325791254728e-06, "loss": 0.9811, "step": 1992 }, { "epoch": 0.11901349576018154, "grad_norm": 3.9398858547210693, "learning_rate": 9.789662265277686e-06, "loss": 0.9546, "step": 1993 }, { "epoch": 0.11907321151319718, "grad_norm": 1.7539253234863281, "learning_rate": 9.788998739300645e-06, "loss": 0.9615, "step": 1994 }, { "epoch": 0.11913292726621283, "grad_norm": 2.5451440811157227, "learning_rate": 9.788335213323603e-06, "loss": 0.9389, "step": 1995 }, { "epoch": 0.11919264301922847, "grad_norm": 2.546262264251709, "learning_rate": 9.78767168734656e-06, "loss": 0.925, "step": 1996 }, { "epoch": 0.11925235877224412, "grad_norm": 10.307685852050781, "learning_rate": 9.787008161369519e-06, "loss": 0.9331, "step": 1997 }, { "epoch": 0.11931207452525977, "grad_norm": 2.4063363075256348, "learning_rate": 9.786344635392477e-06, "loss": 0.9114, "step": 1998 }, { "epoch": 0.11937179027827541, "grad_norm": 2.2842094898223877, "learning_rate": 9.785681109415435e-06, "loss": 0.9679, "step": 1999 }, { "epoch": 0.11943150603129106, "grad_norm": 2.6639585494995117, "learning_rate": 9.785017583438391e-06, "loss": 0.9418, "step": 2000 }, { "epoch": 0.11943150603129106, "eval_text_loss": 0.9725351333618164, "eval_text_runtime": 15.1957, "eval_text_samples_per_second": 263.232, "eval_text_steps_per_second": 0.526, "step": 2000 }, { "epoch": 0.11943150603129106, "eval_image_loss": 0.7095500826835632, "eval_image_runtime": 5.2108, "eval_image_samples_per_second": 767.644, "eval_image_steps_per_second": 1.535, "step": 2000 }, { "epoch": 0.11943150603129106, "eval_video_loss": 1.1905970573425293, "eval_video_runtime": 77.5562, "eval_video_samples_per_second": 51.576, "eval_video_steps_per_second": 0.103, "step": 2000 }, { "epoch": 0.1194912217843067, "grad_norm": 2.2001516819000244, "learning_rate": 9.784354057461351e-06, "loss": 0.9839, "step": 2001 }, { "epoch": 0.11955093753732235, "grad_norm": 2.0910422801971436, "learning_rate": 9.78369053148431e-06, "loss": 1.0028, "step": 2002 }, { "epoch": 0.11961065329033799, "grad_norm": 3.315824270248413, "learning_rate": 9.783027005507266e-06, "loss": 0.934, "step": 2003 }, { "epoch": 0.11967036904335364, "grad_norm": 2.15985369682312, "learning_rate": 9.782363479530224e-06, "loss": 0.9521, "step": 2004 }, { "epoch": 0.11973008479636928, "grad_norm": 1.7584803104400635, "learning_rate": 9.781699953553182e-06, "loss": 0.9485, "step": 2005 }, { "epoch": 0.11978980054938493, "grad_norm": 2.5777359008789062, "learning_rate": 9.78103642757614e-06, "loss": 0.9273, "step": 2006 }, { "epoch": 0.11984951630240058, "grad_norm": 2.84293270111084, "learning_rate": 9.780372901599098e-06, "loss": 0.996, "step": 2007 }, { "epoch": 0.11990923205541622, "grad_norm": 2.774588108062744, "learning_rate": 9.779709375622056e-06, "loss": 0.9637, "step": 2008 }, { "epoch": 0.11996894780843187, "grad_norm": 1.9841859340667725, "learning_rate": 9.779045849645014e-06, "loss": 0.9164, "step": 2009 }, { "epoch": 0.12002866356144751, "grad_norm": 2.206651449203491, "learning_rate": 9.778382323667972e-06, "loss": 0.9425, "step": 2010 }, { "epoch": 0.12008837931446316, "grad_norm": 1.9396288394927979, "learning_rate": 9.77771879769093e-06, "loss": 0.8767, "step": 2011 }, { "epoch": 0.1201480950674788, "grad_norm": 2.5516271591186523, "learning_rate": 9.777055271713889e-06, "loss": 0.9972, "step": 2012 }, { "epoch": 0.12020781082049445, "grad_norm": 2.3010966777801514, "learning_rate": 9.776391745736847e-06, "loss": 0.9497, "step": 2013 }, { "epoch": 0.12026752657351009, "grad_norm": 3.185213565826416, "learning_rate": 9.775728219759805e-06, "loss": 0.9558, "step": 2014 }, { "epoch": 0.12032724232652574, "grad_norm": 2.881706476211548, "learning_rate": 9.775064693782761e-06, "loss": 0.9417, "step": 2015 }, { "epoch": 0.12038695807954139, "grad_norm": 2.2019078731536865, "learning_rate": 9.774401167805721e-06, "loss": 0.9916, "step": 2016 }, { "epoch": 0.12044667383255703, "grad_norm": 2.7013440132141113, "learning_rate": 9.77373764182868e-06, "loss": 0.9246, "step": 2017 }, { "epoch": 0.12050638958557268, "grad_norm": 2.037562608718872, "learning_rate": 9.773074115851636e-06, "loss": 0.9369, "step": 2018 }, { "epoch": 0.12056610533858832, "grad_norm": 2.606954574584961, "learning_rate": 9.772410589874595e-06, "loss": 0.9725, "step": 2019 }, { "epoch": 0.12062582109160397, "grad_norm": 3.559291124343872, "learning_rate": 9.771747063897552e-06, "loss": 0.9634, "step": 2020 }, { "epoch": 0.1206855368446196, "grad_norm": 2.1714048385620117, "learning_rate": 9.77108353792051e-06, "loss": 0.9247, "step": 2021 }, { "epoch": 0.12074525259763526, "grad_norm": 2.450511932373047, "learning_rate": 9.77042001194347e-06, "loss": 0.9448, "step": 2022 }, { "epoch": 0.1208049683506509, "grad_norm": 2.8046231269836426, "learning_rate": 9.769756485966426e-06, "loss": 0.9614, "step": 2023 }, { "epoch": 0.12086468410366655, "grad_norm": 1.9157999753952026, "learning_rate": 9.769092959989384e-06, "loss": 0.9103, "step": 2024 }, { "epoch": 0.1209243998566822, "grad_norm": 2.0613725185394287, "learning_rate": 9.768429434012342e-06, "loss": 0.9488, "step": 2025 }, { "epoch": 0.12098411560969784, "grad_norm": 2.327023506164551, "learning_rate": 9.7677659080353e-06, "loss": 0.9705, "step": 2026 }, { "epoch": 0.12104383136271349, "grad_norm": 3.233715057373047, "learning_rate": 9.767102382058259e-06, "loss": 0.9529, "step": 2027 }, { "epoch": 0.12110354711572913, "grad_norm": 2.414097547531128, "learning_rate": 9.766438856081217e-06, "loss": 0.9426, "step": 2028 }, { "epoch": 0.12116326286874478, "grad_norm": 1.9671084880828857, "learning_rate": 9.765775330104175e-06, "loss": 0.943, "step": 2029 }, { "epoch": 0.12122297862176042, "grad_norm": 3.0878384113311768, "learning_rate": 9.765111804127133e-06, "loss": 0.9637, "step": 2030 }, { "epoch": 0.12128269437477607, "grad_norm": 1.9706274271011353, "learning_rate": 9.764448278150091e-06, "loss": 0.9602, "step": 2031 }, { "epoch": 0.1213424101277917, "grad_norm": 2.803487777709961, "learning_rate": 9.763784752173049e-06, "loss": 0.946, "step": 2032 }, { "epoch": 0.12140212588080736, "grad_norm": 3.140592336654663, "learning_rate": 9.763121226196005e-06, "loss": 0.984, "step": 2033 }, { "epoch": 0.12146184163382301, "grad_norm": 2.655550956726074, "learning_rate": 9.762457700218965e-06, "loss": 0.9864, "step": 2034 }, { "epoch": 0.12152155738683865, "grad_norm": 7.56434440612793, "learning_rate": 9.761794174241922e-06, "loss": 0.9568, "step": 2035 }, { "epoch": 0.1215812731398543, "grad_norm": 2.8046014308929443, "learning_rate": 9.76113064826488e-06, "loss": 0.9464, "step": 2036 }, { "epoch": 0.12164098889286994, "grad_norm": 1.8381541967391968, "learning_rate": 9.76046712228784e-06, "loss": 0.923, "step": 2037 }, { "epoch": 0.12170070464588559, "grad_norm": 3.842211961746216, "learning_rate": 9.759803596310796e-06, "loss": 0.9146, "step": 2038 }, { "epoch": 0.12176042039890123, "grad_norm": 3.299694061279297, "learning_rate": 9.759140070333754e-06, "loss": 0.9754, "step": 2039 }, { "epoch": 0.12182013615191688, "grad_norm": 2.0209972858428955, "learning_rate": 9.758476544356712e-06, "loss": 0.9396, "step": 2040 }, { "epoch": 0.12187985190493252, "grad_norm": 2.573321580886841, "learning_rate": 9.75781301837967e-06, "loss": 0.9287, "step": 2041 }, { "epoch": 0.12193956765794817, "grad_norm": 2.553544759750366, "learning_rate": 9.757149492402628e-06, "loss": 0.9227, "step": 2042 }, { "epoch": 0.12199928341096382, "grad_norm": 3.0172219276428223, "learning_rate": 9.756485966425586e-06, "loss": 0.9304, "step": 2043 }, { "epoch": 0.12205899916397946, "grad_norm": 3.456076145172119, "learning_rate": 9.755822440448545e-06, "loss": 0.9413, "step": 2044 }, { "epoch": 0.12211871491699511, "grad_norm": 2.248490571975708, "learning_rate": 9.755158914471503e-06, "loss": 0.9591, "step": 2045 }, { "epoch": 0.12217843067001075, "grad_norm": 1.7556055784225464, "learning_rate": 9.75449538849446e-06, "loss": 0.9374, "step": 2046 }, { "epoch": 0.1222381464230264, "grad_norm": 1.9169014692306519, "learning_rate": 9.753831862517419e-06, "loss": 0.9492, "step": 2047 }, { "epoch": 0.12229786217604204, "grad_norm": 1.9849591255187988, "learning_rate": 9.753168336540377e-06, "loss": 0.9198, "step": 2048 }, { "epoch": 0.12235757792905769, "grad_norm": 2.7823517322540283, "learning_rate": 9.752504810563335e-06, "loss": 0.9445, "step": 2049 }, { "epoch": 0.12241729368207332, "grad_norm": 2.612704038619995, "learning_rate": 9.751841284586291e-06, "loss": 0.9462, "step": 2050 }, { "epoch": 0.12247700943508898, "grad_norm": 2.6445937156677246, "learning_rate": 9.751177758609251e-06, "loss": 0.9792, "step": 2051 }, { "epoch": 0.12253672518810463, "grad_norm": 2.061824083328247, "learning_rate": 9.75051423263221e-06, "loss": 0.933, "step": 2052 }, { "epoch": 0.12259644094112027, "grad_norm": 2.2723796367645264, "learning_rate": 9.749850706655166e-06, "loss": 0.9477, "step": 2053 }, { "epoch": 0.12265615669413592, "grad_norm": 2.792851686477661, "learning_rate": 9.749187180678124e-06, "loss": 0.928, "step": 2054 }, { "epoch": 0.12271587244715156, "grad_norm": 1.5616534948349, "learning_rate": 9.748523654701082e-06, "loss": 0.91, "step": 2055 }, { "epoch": 0.12277558820016721, "grad_norm": 2.3104913234710693, "learning_rate": 9.74786012872404e-06, "loss": 0.8947, "step": 2056 }, { "epoch": 0.12283530395318285, "grad_norm": 2.5875797271728516, "learning_rate": 9.747196602746998e-06, "loss": 0.9103, "step": 2057 }, { "epoch": 0.1228950197061985, "grad_norm": 2.5921754837036133, "learning_rate": 9.746533076769956e-06, "loss": 0.9587, "step": 2058 }, { "epoch": 0.12295473545921413, "grad_norm": 2.9262025356292725, "learning_rate": 9.745869550792914e-06, "loss": 0.9783, "step": 2059 }, { "epoch": 0.12301445121222979, "grad_norm": 2.2566633224487305, "learning_rate": 9.745206024815872e-06, "loss": 0.9021, "step": 2060 }, { "epoch": 0.12307416696524544, "grad_norm": 3.7398977279663086, "learning_rate": 9.74454249883883e-06, "loss": 0.924, "step": 2061 }, { "epoch": 0.12313388271826108, "grad_norm": 3.828958511352539, "learning_rate": 9.743878972861789e-06, "loss": 0.9484, "step": 2062 }, { "epoch": 0.12319359847127673, "grad_norm": 2.310964584350586, "learning_rate": 9.743215446884747e-06, "loss": 0.9506, "step": 2063 }, { "epoch": 0.12325331422429237, "grad_norm": 2.4287619590759277, "learning_rate": 9.742551920907705e-06, "loss": 0.963, "step": 2064 }, { "epoch": 0.12331302997730802, "grad_norm": 4.242123603820801, "learning_rate": 9.741888394930661e-06, "loss": 0.9427, "step": 2065 }, { "epoch": 0.12337274573032365, "grad_norm": 2.171922206878662, "learning_rate": 9.741224868953621e-06, "loss": 0.9572, "step": 2066 }, { "epoch": 0.1234324614833393, "grad_norm": 2.4413838386535645, "learning_rate": 9.740561342976579e-06, "loss": 0.9592, "step": 2067 }, { "epoch": 0.12349217723635494, "grad_norm": 2.236208915710449, "learning_rate": 9.739897816999536e-06, "loss": 0.9758, "step": 2068 }, { "epoch": 0.1235518929893706, "grad_norm": 3.2450668811798096, "learning_rate": 9.739234291022495e-06, "loss": 0.9371, "step": 2069 }, { "epoch": 0.12361160874238625, "grad_norm": 3.2923734188079834, "learning_rate": 9.738570765045452e-06, "loss": 0.9869, "step": 2070 }, { "epoch": 0.12367132449540189, "grad_norm": 2.075455665588379, "learning_rate": 9.73790723906841e-06, "loss": 0.9598, "step": 2071 }, { "epoch": 0.12373104024841754, "grad_norm": 2.2269482612609863, "learning_rate": 9.73724371309137e-06, "loss": 0.9782, "step": 2072 }, { "epoch": 0.12379075600143317, "grad_norm": 2.412881851196289, "learning_rate": 9.736580187114326e-06, "loss": 0.9646, "step": 2073 }, { "epoch": 0.12385047175444883, "grad_norm": 2.2538163661956787, "learning_rate": 9.735916661137284e-06, "loss": 0.9159, "step": 2074 }, { "epoch": 0.12391018750746446, "grad_norm": 3.3621976375579834, "learning_rate": 9.735253135160242e-06, "loss": 0.9231, "step": 2075 }, { "epoch": 0.12396990326048012, "grad_norm": 2.584824800491333, "learning_rate": 9.7345896091832e-06, "loss": 0.9472, "step": 2076 }, { "epoch": 0.12402961901349575, "grad_norm": 1.8666423559188843, "learning_rate": 9.733926083206158e-06, "loss": 0.9497, "step": 2077 }, { "epoch": 0.1240893347665114, "grad_norm": 2.7260305881500244, "learning_rate": 9.733262557229117e-06, "loss": 0.9251, "step": 2078 }, { "epoch": 0.12414905051952706, "grad_norm": 2.7512714862823486, "learning_rate": 9.732599031252075e-06, "loss": 0.964, "step": 2079 }, { "epoch": 0.1242087662725427, "grad_norm": 2.445754051208496, "learning_rate": 9.731935505275033e-06, "loss": 0.9603, "step": 2080 }, { "epoch": 0.12426848202555835, "grad_norm": 2.2909910678863525, "learning_rate": 9.731271979297991e-06, "loss": 0.9769, "step": 2081 }, { "epoch": 0.12432819777857398, "grad_norm": 5.103335857391357, "learning_rate": 9.730608453320949e-06, "loss": 1.0085, "step": 2082 }, { "epoch": 0.12438791353158964, "grad_norm": 1.6689850091934204, "learning_rate": 9.729944927343905e-06, "loss": 0.954, "step": 2083 }, { "epoch": 0.12444762928460527, "grad_norm": 3.0701656341552734, "learning_rate": 9.729281401366865e-06, "loss": 0.9784, "step": 2084 }, { "epoch": 0.12450734503762093, "grad_norm": 1.9376896619796753, "learning_rate": 9.728617875389822e-06, "loss": 0.9298, "step": 2085 }, { "epoch": 0.12456706079063656, "grad_norm": 2.8187248706817627, "learning_rate": 9.72795434941278e-06, "loss": 0.9906, "step": 2086 }, { "epoch": 0.12462677654365222, "grad_norm": 2.830277919769287, "learning_rate": 9.72729082343574e-06, "loss": 0.9288, "step": 2087 }, { "epoch": 0.12468649229666787, "grad_norm": 3.1692779064178467, "learning_rate": 9.726627297458696e-06, "loss": 0.9352, "step": 2088 }, { "epoch": 0.1247462080496835, "grad_norm": 4.597174167633057, "learning_rate": 9.725963771481654e-06, "loss": 0.9906, "step": 2089 }, { "epoch": 0.12480592380269916, "grad_norm": 2.9217689037323, "learning_rate": 9.725300245504612e-06, "loss": 0.9439, "step": 2090 }, { "epoch": 0.1248656395557148, "grad_norm": 2.436505079269409, "learning_rate": 9.72463671952757e-06, "loss": 0.9411, "step": 2091 }, { "epoch": 0.12492535530873045, "grad_norm": 2.2766623497009277, "learning_rate": 9.723973193550528e-06, "loss": 0.9615, "step": 2092 }, { "epoch": 0.12498507106174608, "grad_norm": 2.117144823074341, "learning_rate": 9.723309667573486e-06, "loss": 0.9204, "step": 2093 }, { "epoch": 0.12504478681476172, "grad_norm": 2.440621852874756, "learning_rate": 9.722646141596444e-06, "loss": 0.9554, "step": 2094 }, { "epoch": 0.1251045025677774, "grad_norm": 5.315438270568848, "learning_rate": 9.721982615619403e-06, "loss": 0.9824, "step": 2095 }, { "epoch": 0.12516421832079302, "grad_norm": 2.54000186920166, "learning_rate": 9.72131908964236e-06, "loss": 0.9537, "step": 2096 }, { "epoch": 0.12522393407380866, "grad_norm": 2.3832998275756836, "learning_rate": 9.720655563665319e-06, "loss": 0.9269, "step": 2097 }, { "epoch": 0.12528364982682433, "grad_norm": 2.581632614135742, "learning_rate": 9.719992037688277e-06, "loss": 0.9575, "step": 2098 }, { "epoch": 0.12534336557983997, "grad_norm": 3.1381492614746094, "learning_rate": 9.719328511711235e-06, "loss": 0.9876, "step": 2099 }, { "epoch": 0.1254030813328556, "grad_norm": 2.3585715293884277, "learning_rate": 9.718664985734191e-06, "loss": 0.9681, "step": 2100 }, { "epoch": 0.12546279708587124, "grad_norm": 2.692841053009033, "learning_rate": 9.718001459757151e-06, "loss": 0.9812, "step": 2101 }, { "epoch": 0.1255225128388869, "grad_norm": 1.8595139980316162, "learning_rate": 9.71733793378011e-06, "loss": 0.9822, "step": 2102 }, { "epoch": 0.12558222859190254, "grad_norm": 2.6447360515594482, "learning_rate": 9.716674407803066e-06, "loss": 0.9576, "step": 2103 }, { "epoch": 0.12564194434491818, "grad_norm": 2.0017168521881104, "learning_rate": 9.716010881826024e-06, "loss": 0.938, "step": 2104 }, { "epoch": 0.12570166009793385, "grad_norm": 2.83103084564209, "learning_rate": 9.715347355848982e-06, "loss": 0.9705, "step": 2105 }, { "epoch": 0.12576137585094949, "grad_norm": 2.289621114730835, "learning_rate": 9.71468382987194e-06, "loss": 0.9682, "step": 2106 }, { "epoch": 0.12582109160396512, "grad_norm": 2.8709561824798584, "learning_rate": 9.714020303894898e-06, "loss": 0.9788, "step": 2107 }, { "epoch": 0.12588080735698076, "grad_norm": 2.4826462268829346, "learning_rate": 9.713356777917856e-06, "loss": 0.938, "step": 2108 }, { "epoch": 0.12594052310999643, "grad_norm": 3.5889134407043457, "learning_rate": 9.712693251940814e-06, "loss": 0.887, "step": 2109 }, { "epoch": 0.12600023886301207, "grad_norm": 2.278634548187256, "learning_rate": 9.712029725963772e-06, "loss": 0.9431, "step": 2110 }, { "epoch": 0.1260599546160277, "grad_norm": 3.604796886444092, "learning_rate": 9.71136619998673e-06, "loss": 0.9549, "step": 2111 }, { "epoch": 0.12611967036904334, "grad_norm": 2.225193500518799, "learning_rate": 9.710702674009689e-06, "loss": 0.9531, "step": 2112 }, { "epoch": 0.126179386122059, "grad_norm": 2.0624871253967285, "learning_rate": 9.710039148032647e-06, "loss": 0.9331, "step": 2113 }, { "epoch": 0.12623910187507464, "grad_norm": 2.1743032932281494, "learning_rate": 9.709375622055605e-06, "loss": 0.8824, "step": 2114 }, { "epoch": 0.12629881762809028, "grad_norm": 2.8223676681518555, "learning_rate": 9.708712096078561e-06, "loss": 0.9453, "step": 2115 }, { "epoch": 0.12635853338110595, "grad_norm": 2.3368613719940186, "learning_rate": 9.708048570101521e-06, "loss": 0.9174, "step": 2116 }, { "epoch": 0.12641824913412159, "grad_norm": 3.619382858276367, "learning_rate": 9.707385044124479e-06, "loss": 0.9438, "step": 2117 }, { "epoch": 0.12647796488713722, "grad_norm": 3.3558309078216553, "learning_rate": 9.706721518147435e-06, "loss": 0.9562, "step": 2118 }, { "epoch": 0.12653768064015286, "grad_norm": 2.7818119525909424, "learning_rate": 9.706057992170395e-06, "loss": 0.9282, "step": 2119 }, { "epoch": 0.12659739639316853, "grad_norm": 2.298646926879883, "learning_rate": 9.705394466193352e-06, "loss": 0.9509, "step": 2120 }, { "epoch": 0.12665711214618416, "grad_norm": 1.8335939645767212, "learning_rate": 9.70473094021631e-06, "loss": 0.9467, "step": 2121 }, { "epoch": 0.1267168278991998, "grad_norm": 2.157273769378662, "learning_rate": 9.70406741423927e-06, "loss": 0.9391, "step": 2122 }, { "epoch": 0.12677654365221547, "grad_norm": 3.908951997756958, "learning_rate": 9.703403888262226e-06, "loss": 0.9333, "step": 2123 }, { "epoch": 0.1268362594052311, "grad_norm": 2.663054943084717, "learning_rate": 9.702740362285184e-06, "loss": 0.9711, "step": 2124 }, { "epoch": 0.12689597515824674, "grad_norm": 2.2689218521118164, "learning_rate": 9.702076836308142e-06, "loss": 0.9406, "step": 2125 }, { "epoch": 0.12695569091126238, "grad_norm": 3.5643117427825928, "learning_rate": 9.7014133103311e-06, "loss": 0.9628, "step": 2126 }, { "epoch": 0.12701540666427805, "grad_norm": 2.9241764545440674, "learning_rate": 9.700749784354058e-06, "loss": 0.9877, "step": 2127 }, { "epoch": 0.12707512241729368, "grad_norm": 3.056406021118164, "learning_rate": 9.700086258377016e-06, "loss": 0.9521, "step": 2128 }, { "epoch": 0.12713483817030932, "grad_norm": 2.9966375827789307, "learning_rate": 9.699422732399975e-06, "loss": 0.9692, "step": 2129 }, { "epoch": 0.12719455392332496, "grad_norm": 2.316579580307007, "learning_rate": 9.698759206422933e-06, "loss": 0.9329, "step": 2130 }, { "epoch": 0.12725426967634063, "grad_norm": 2.5928924083709717, "learning_rate": 9.69809568044589e-06, "loss": 0.9462, "step": 2131 }, { "epoch": 0.12731398542935626, "grad_norm": 2.35325026512146, "learning_rate": 9.697432154468849e-06, "loss": 0.9484, "step": 2132 }, { "epoch": 0.1273737011823719, "grad_norm": 3.8541271686553955, "learning_rate": 9.696768628491805e-06, "loss": 0.9359, "step": 2133 }, { "epoch": 0.12743341693538757, "grad_norm": 6.091820240020752, "learning_rate": 9.696105102514765e-06, "loss": 0.9683, "step": 2134 }, { "epoch": 0.1274931326884032, "grad_norm": 17.05366325378418, "learning_rate": 9.695441576537722e-06, "loss": 0.9685, "step": 2135 }, { "epoch": 0.12755284844141884, "grad_norm": 4.0993757247924805, "learning_rate": 9.69477805056068e-06, "loss": 0.9471, "step": 2136 }, { "epoch": 0.12761256419443448, "grad_norm": 3.4941298961639404, "learning_rate": 9.69411452458364e-06, "loss": 0.9743, "step": 2137 }, { "epoch": 0.12767227994745015, "grad_norm": 3.9133036136627197, "learning_rate": 9.693450998606596e-06, "loss": 0.9372, "step": 2138 }, { "epoch": 0.12773199570046578, "grad_norm": 2.2815465927124023, "learning_rate": 9.692787472629554e-06, "loss": 0.9733, "step": 2139 }, { "epoch": 0.12779171145348142, "grad_norm": 1.8033037185668945, "learning_rate": 9.692123946652512e-06, "loss": 0.9663, "step": 2140 }, { "epoch": 0.1278514272064971, "grad_norm": 2.4902477264404297, "learning_rate": 9.69146042067547e-06, "loss": 0.9326, "step": 2141 }, { "epoch": 0.12791114295951272, "grad_norm": 2.1638810634613037, "learning_rate": 9.690796894698428e-06, "loss": 0.9427, "step": 2142 }, { "epoch": 0.12797085871252836, "grad_norm": 2.247882843017578, "learning_rate": 9.690133368721386e-06, "loss": 0.9365, "step": 2143 }, { "epoch": 0.128030574465544, "grad_norm": 3.475841999053955, "learning_rate": 9.689469842744344e-06, "loss": 0.9634, "step": 2144 }, { "epoch": 0.12809029021855967, "grad_norm": 2.3892061710357666, "learning_rate": 9.688806316767303e-06, "loss": 0.945, "step": 2145 }, { "epoch": 0.1281500059715753, "grad_norm": 3.749202251434326, "learning_rate": 9.68814279079026e-06, "loss": 0.9275, "step": 2146 }, { "epoch": 0.12820972172459094, "grad_norm": 2.281891345977783, "learning_rate": 9.687479264813219e-06, "loss": 0.9678, "step": 2147 }, { "epoch": 0.12826943747760658, "grad_norm": 1.642456293106079, "learning_rate": 9.686815738836177e-06, "loss": 0.9011, "step": 2148 }, { "epoch": 0.12832915323062224, "grad_norm": 2.686452627182007, "learning_rate": 9.686152212859135e-06, "loss": 0.9238, "step": 2149 }, { "epoch": 0.12838886898363788, "grad_norm": 2.914985179901123, "learning_rate": 9.685488686882091e-06, "loss": 0.9317, "step": 2150 }, { "epoch": 0.12844858473665352, "grad_norm": 4.021772861480713, "learning_rate": 9.684825160905051e-06, "loss": 0.9427, "step": 2151 }, { "epoch": 0.12850830048966919, "grad_norm": 2.65598726272583, "learning_rate": 9.68416163492801e-06, "loss": 0.9642, "step": 2152 }, { "epoch": 0.12856801624268482, "grad_norm": 2.6419529914855957, "learning_rate": 9.683498108950966e-06, "loss": 0.9384, "step": 2153 }, { "epoch": 0.12862773199570046, "grad_norm": 2.5202577114105225, "learning_rate": 9.682834582973924e-06, "loss": 0.9452, "step": 2154 }, { "epoch": 0.1286874477487161, "grad_norm": 3.0112862586975098, "learning_rate": 9.682171056996882e-06, "loss": 0.9343, "step": 2155 }, { "epoch": 0.12874716350173177, "grad_norm": 2.2572858333587646, "learning_rate": 9.68150753101984e-06, "loss": 0.9562, "step": 2156 }, { "epoch": 0.1288068792547474, "grad_norm": 2.140878677368164, "learning_rate": 9.680844005042798e-06, "loss": 0.9417, "step": 2157 }, { "epoch": 0.12886659500776304, "grad_norm": 2.247877359390259, "learning_rate": 9.680180479065756e-06, "loss": 0.9461, "step": 2158 }, { "epoch": 0.1289263107607787, "grad_norm": 2.815713882446289, "learning_rate": 9.679516953088714e-06, "loss": 0.9375, "step": 2159 }, { "epoch": 0.12898602651379434, "grad_norm": 4.180449962615967, "learning_rate": 9.678853427111672e-06, "loss": 0.9596, "step": 2160 }, { "epoch": 0.12904574226680998, "grad_norm": 2.446726083755493, "learning_rate": 9.67818990113463e-06, "loss": 0.9995, "step": 2161 }, { "epoch": 0.12910545801982562, "grad_norm": 2.500922679901123, "learning_rate": 9.677526375157589e-06, "loss": 0.93, "step": 2162 }, { "epoch": 0.12916517377284129, "grad_norm": 2.3024866580963135, "learning_rate": 9.676862849180547e-06, "loss": 0.94, "step": 2163 }, { "epoch": 0.12922488952585692, "grad_norm": 3.0966527462005615, "learning_rate": 9.676199323203505e-06, "loss": 0.9604, "step": 2164 }, { "epoch": 0.12928460527887256, "grad_norm": 1.6731079816818237, "learning_rate": 9.675535797226461e-06, "loss": 0.9326, "step": 2165 }, { "epoch": 0.1293443210318882, "grad_norm": 2.626783847808838, "learning_rate": 9.674872271249421e-06, "loss": 0.9644, "step": 2166 }, { "epoch": 0.12940403678490386, "grad_norm": 2.557246208190918, "learning_rate": 9.674208745272379e-06, "loss": 0.9184, "step": 2167 }, { "epoch": 0.1294637525379195, "grad_norm": 2.931532144546509, "learning_rate": 9.673545219295335e-06, "loss": 0.9675, "step": 2168 }, { "epoch": 0.12952346829093514, "grad_norm": 2.069143772125244, "learning_rate": 9.672881693318295e-06, "loss": 0.9169, "step": 2169 }, { "epoch": 0.1295831840439508, "grad_norm": 2.302187442779541, "learning_rate": 9.672218167341252e-06, "loss": 0.9514, "step": 2170 }, { "epoch": 0.12964289979696644, "grad_norm": 2.134202241897583, "learning_rate": 9.67155464136421e-06, "loss": 0.9403, "step": 2171 }, { "epoch": 0.12970261554998208, "grad_norm": 1.8561838865280151, "learning_rate": 9.67089111538717e-06, "loss": 0.9423, "step": 2172 }, { "epoch": 0.12976233130299772, "grad_norm": 1.9229326248168945, "learning_rate": 9.670227589410126e-06, "loss": 0.8736, "step": 2173 }, { "epoch": 0.12982204705601338, "grad_norm": 5.69291353225708, "learning_rate": 9.669564063433084e-06, "loss": 0.9222, "step": 2174 }, { "epoch": 0.12988176280902902, "grad_norm": 4.0390825271606445, "learning_rate": 9.668900537456042e-06, "loss": 0.9042, "step": 2175 }, { "epoch": 0.12994147856204466, "grad_norm": 2.4586713314056396, "learning_rate": 9.668237011479e-06, "loss": 0.9907, "step": 2176 }, { "epoch": 0.13000119431506033, "grad_norm": 4.918962001800537, "learning_rate": 9.667573485501958e-06, "loss": 0.9498, "step": 2177 }, { "epoch": 0.13006091006807596, "grad_norm": 3.8588879108428955, "learning_rate": 9.666909959524916e-06, "loss": 0.9301, "step": 2178 }, { "epoch": 0.1301206258210916, "grad_norm": 2.2753942012786865, "learning_rate": 9.666246433547875e-06, "loss": 0.9269, "step": 2179 }, { "epoch": 0.13018034157410724, "grad_norm": 3.4065277576446533, "learning_rate": 9.665582907570833e-06, "loss": 0.9481, "step": 2180 }, { "epoch": 0.1302400573271229, "grad_norm": 2.625694751739502, "learning_rate": 9.66491938159379e-06, "loss": 0.9878, "step": 2181 }, { "epoch": 0.13029977308013854, "grad_norm": 2.5460522174835205, "learning_rate": 9.664255855616749e-06, "loss": 0.9101, "step": 2182 }, { "epoch": 0.13035948883315418, "grad_norm": 2.308380365371704, "learning_rate": 9.663592329639705e-06, "loss": 0.9263, "step": 2183 }, { "epoch": 0.13041920458616982, "grad_norm": 2.6952528953552246, "learning_rate": 9.662928803662665e-06, "loss": 0.9109, "step": 2184 }, { "epoch": 0.13047892033918548, "grad_norm": 2.853161096572876, "learning_rate": 9.662265277685621e-06, "loss": 0.9715, "step": 2185 }, { "epoch": 0.13053863609220112, "grad_norm": 2.2451839447021484, "learning_rate": 9.66160175170858e-06, "loss": 0.9462, "step": 2186 }, { "epoch": 0.13059835184521676, "grad_norm": 2.807676315307617, "learning_rate": 9.66093822573154e-06, "loss": 0.942, "step": 2187 }, { "epoch": 0.13065806759823242, "grad_norm": 2.159196376800537, "learning_rate": 9.660274699754496e-06, "loss": 0.9351, "step": 2188 }, { "epoch": 0.13071778335124806, "grad_norm": 2.6073896884918213, "learning_rate": 9.659611173777454e-06, "loss": 0.9431, "step": 2189 }, { "epoch": 0.1307774991042637, "grad_norm": 2.7195982933044434, "learning_rate": 9.658947647800412e-06, "loss": 0.9558, "step": 2190 }, { "epoch": 0.13083721485727934, "grad_norm": 2.4629271030426025, "learning_rate": 9.65828412182337e-06, "loss": 0.9713, "step": 2191 }, { "epoch": 0.130896930610295, "grad_norm": 2.223003625869751, "learning_rate": 9.657620595846328e-06, "loss": 0.9304, "step": 2192 }, { "epoch": 0.13095664636331064, "grad_norm": 3.0893805027008057, "learning_rate": 9.656957069869286e-06, "loss": 0.922, "step": 2193 }, { "epoch": 0.13101636211632628, "grad_norm": 2.7414510250091553, "learning_rate": 9.656293543892244e-06, "loss": 0.9861, "step": 2194 }, { "epoch": 0.13107607786934194, "grad_norm": 4.965097427368164, "learning_rate": 9.655630017915202e-06, "loss": 0.9658, "step": 2195 }, { "epoch": 0.13113579362235758, "grad_norm": 2.7031028270721436, "learning_rate": 9.65496649193816e-06, "loss": 0.9578, "step": 2196 }, { "epoch": 0.13119550937537322, "grad_norm": 2.5375213623046875, "learning_rate": 9.654302965961119e-06, "loss": 0.9069, "step": 2197 }, { "epoch": 0.13125522512838886, "grad_norm": 2.953123092651367, "learning_rate": 9.653639439984077e-06, "loss": 0.9631, "step": 2198 }, { "epoch": 0.13131494088140452, "grad_norm": 2.3947081565856934, "learning_rate": 9.652975914007035e-06, "loss": 0.9558, "step": 2199 }, { "epoch": 0.13137465663442016, "grad_norm": 2.215740203857422, "learning_rate": 9.652312388029991e-06, "loss": 0.9831, "step": 2200 }, { "epoch": 0.13137465663442016, "eval_text_loss": 0.9669127464294434, "eval_text_runtime": 15.2068, "eval_text_samples_per_second": 263.04, "eval_text_steps_per_second": 0.526, "step": 2200 }, { "epoch": 0.13137465663442016, "eval_image_loss": 0.706820011138916, "eval_image_runtime": 5.2142, "eval_image_samples_per_second": 767.13, "eval_image_steps_per_second": 1.534, "step": 2200 }, { "epoch": 0.13137465663442016, "eval_video_loss": 1.1821802854537964, "eval_video_runtime": 78.0212, "eval_video_samples_per_second": 51.268, "eval_video_steps_per_second": 0.103, "step": 2200 }, { "epoch": 0.1314343723874358, "grad_norm": 2.500906229019165, "learning_rate": 9.651648862052951e-06, "loss": 0.9469, "step": 2201 }, { "epoch": 0.13149408814045144, "grad_norm": 2.881819009780884, "learning_rate": 9.65098533607591e-06, "loss": 0.9337, "step": 2202 }, { "epoch": 0.1315538038934671, "grad_norm": 2.5447654724121094, "learning_rate": 9.650321810098866e-06, "loss": 0.9209, "step": 2203 }, { "epoch": 0.13161351964648274, "grad_norm": 3.5477612018585205, "learning_rate": 9.649658284121824e-06, "loss": 0.9522, "step": 2204 }, { "epoch": 0.13167323539949838, "grad_norm": 2.030647039413452, "learning_rate": 9.648994758144782e-06, "loss": 0.9418, "step": 2205 }, { "epoch": 0.13173295115251404, "grad_norm": 4.48914909362793, "learning_rate": 9.64833123216774e-06, "loss": 0.9882, "step": 2206 }, { "epoch": 0.13179266690552968, "grad_norm": 3.102018117904663, "learning_rate": 9.647667706190698e-06, "loss": 0.9155, "step": 2207 }, { "epoch": 0.13185238265854532, "grad_norm": 2.1380715370178223, "learning_rate": 9.647004180213656e-06, "loss": 0.9228, "step": 2208 }, { "epoch": 0.13191209841156096, "grad_norm": 2.2545084953308105, "learning_rate": 9.646340654236614e-06, "loss": 0.9324, "step": 2209 }, { "epoch": 0.13197181416457662, "grad_norm": 3.5020694732666016, "learning_rate": 9.645677128259572e-06, "loss": 0.956, "step": 2210 }, { "epoch": 0.13203152991759226, "grad_norm": 3.064666748046875, "learning_rate": 9.64501360228253e-06, "loss": 0.9405, "step": 2211 }, { "epoch": 0.1320912456706079, "grad_norm": 2.0048601627349854, "learning_rate": 9.644350076305488e-06, "loss": 0.9368, "step": 2212 }, { "epoch": 0.13215096142362356, "grad_norm": 2.371765613555908, "learning_rate": 9.643686550328447e-06, "loss": 0.9677, "step": 2213 }, { "epoch": 0.1322106771766392, "grad_norm": 2.3253393173217773, "learning_rate": 9.643023024351405e-06, "loss": 0.9477, "step": 2214 }, { "epoch": 0.13227039292965484, "grad_norm": 4.279414653778076, "learning_rate": 9.642359498374361e-06, "loss": 0.9452, "step": 2215 }, { "epoch": 0.13233010868267048, "grad_norm": 5.251429080963135, "learning_rate": 9.641695972397321e-06, "loss": 0.9845, "step": 2216 }, { "epoch": 0.13238982443568614, "grad_norm": 2.4564049243927, "learning_rate": 9.641032446420279e-06, "loss": 0.9154, "step": 2217 }, { "epoch": 0.13244954018870178, "grad_norm": 2.4258925914764404, "learning_rate": 9.640368920443235e-06, "loss": 0.9317, "step": 2218 }, { "epoch": 0.13250925594171742, "grad_norm": 1.7307080030441284, "learning_rate": 9.639705394466195e-06, "loss": 0.9133, "step": 2219 }, { "epoch": 0.13256897169473308, "grad_norm": 2.385366678237915, "learning_rate": 9.639041868489152e-06, "loss": 0.9399, "step": 2220 }, { "epoch": 0.13262868744774872, "grad_norm": 2.0980477333068848, "learning_rate": 9.63837834251211e-06, "loss": 0.9684, "step": 2221 }, { "epoch": 0.13268840320076436, "grad_norm": 2.1041171550750732, "learning_rate": 9.63771481653507e-06, "loss": 0.9197, "step": 2222 }, { "epoch": 0.13274811895378, "grad_norm": 3.27050518989563, "learning_rate": 9.637051290558026e-06, "loss": 0.95, "step": 2223 }, { "epoch": 0.13280783470679566, "grad_norm": 2.0542776584625244, "learning_rate": 9.636387764580984e-06, "loss": 0.8906, "step": 2224 }, { "epoch": 0.1328675504598113, "grad_norm": 2.480851411819458, "learning_rate": 9.635724238603942e-06, "loss": 0.9389, "step": 2225 }, { "epoch": 0.13292726621282694, "grad_norm": 2.7982211112976074, "learning_rate": 9.6350607126269e-06, "loss": 0.9599, "step": 2226 }, { "epoch": 0.13298698196584258, "grad_norm": 2.604722499847412, "learning_rate": 9.634397186649858e-06, "loss": 0.932, "step": 2227 }, { "epoch": 0.13304669771885824, "grad_norm": 2.507033586502075, "learning_rate": 9.633733660672816e-06, "loss": 0.8923, "step": 2228 }, { "epoch": 0.13310641347187388, "grad_norm": 2.5094175338745117, "learning_rate": 9.633070134695774e-06, "loss": 0.9277, "step": 2229 }, { "epoch": 0.13316612922488952, "grad_norm": 2.5238893032073975, "learning_rate": 9.632406608718733e-06, "loss": 0.9392, "step": 2230 }, { "epoch": 0.13322584497790518, "grad_norm": 2.9961628913879395, "learning_rate": 9.63174308274169e-06, "loss": 0.9595, "step": 2231 }, { "epoch": 0.13328556073092082, "grad_norm": 2.2765727043151855, "learning_rate": 9.631079556764649e-06, "loss": 0.9369, "step": 2232 }, { "epoch": 0.13334527648393646, "grad_norm": 2.2723655700683594, "learning_rate": 9.630416030787605e-06, "loss": 0.9105, "step": 2233 }, { "epoch": 0.1334049922369521, "grad_norm": 1.8596372604370117, "learning_rate": 9.629752504810565e-06, "loss": 0.9572, "step": 2234 }, { "epoch": 0.13346470798996776, "grad_norm": 2.7218222618103027, "learning_rate": 9.629088978833521e-06, "loss": 0.9094, "step": 2235 }, { "epoch": 0.1335244237429834, "grad_norm": 3.2812628746032715, "learning_rate": 9.62842545285648e-06, "loss": 0.9818, "step": 2236 }, { "epoch": 0.13358413949599904, "grad_norm": 2.3800535202026367, "learning_rate": 9.62776192687944e-06, "loss": 0.9362, "step": 2237 }, { "epoch": 0.1336438552490147, "grad_norm": 3.866211414337158, "learning_rate": 9.627098400902396e-06, "loss": 0.9559, "step": 2238 }, { "epoch": 0.13370357100203034, "grad_norm": 2.220233917236328, "learning_rate": 9.626434874925354e-06, "loss": 0.987, "step": 2239 }, { "epoch": 0.13376328675504598, "grad_norm": 2.595576286315918, "learning_rate": 9.625771348948312e-06, "loss": 0.9701, "step": 2240 }, { "epoch": 0.13382300250806162, "grad_norm": 2.4766790866851807, "learning_rate": 9.62510782297127e-06, "loss": 0.9572, "step": 2241 }, { "epoch": 0.13388271826107728, "grad_norm": 2.2053563594818115, "learning_rate": 9.624444296994228e-06, "loss": 0.9574, "step": 2242 }, { "epoch": 0.13394243401409292, "grad_norm": 2.9797914028167725, "learning_rate": 9.623780771017186e-06, "loss": 0.9308, "step": 2243 }, { "epoch": 0.13400214976710856, "grad_norm": 1.9266573190689087, "learning_rate": 9.623117245040144e-06, "loss": 0.923, "step": 2244 }, { "epoch": 0.1340618655201242, "grad_norm": 1.9930305480957031, "learning_rate": 9.622453719063102e-06, "loss": 0.9274, "step": 2245 }, { "epoch": 0.13412158127313986, "grad_norm": 2.913747787475586, "learning_rate": 9.62179019308606e-06, "loss": 0.8984, "step": 2246 }, { "epoch": 0.1341812970261555, "grad_norm": 2.1983752250671387, "learning_rate": 9.621126667109019e-06, "loss": 0.9365, "step": 2247 }, { "epoch": 0.13424101277917114, "grad_norm": 3.0942060947418213, "learning_rate": 9.620463141131977e-06, "loss": 0.9638, "step": 2248 }, { "epoch": 0.1343007285321868, "grad_norm": 2.264604330062866, "learning_rate": 9.619799615154935e-06, "loss": 0.9694, "step": 2249 }, { "epoch": 0.13436044428520244, "grad_norm": 1.9265389442443848, "learning_rate": 9.619136089177891e-06, "loss": 0.959, "step": 2250 }, { "epoch": 0.13442016003821808, "grad_norm": 2.4878945350646973, "learning_rate": 9.618472563200851e-06, "loss": 0.9524, "step": 2251 }, { "epoch": 0.13447987579123372, "grad_norm": 2.4008543491363525, "learning_rate": 9.617809037223809e-06, "loss": 0.9039, "step": 2252 }, { "epoch": 0.13453959154424938, "grad_norm": 5.059183120727539, "learning_rate": 9.617145511246766e-06, "loss": 0.9305, "step": 2253 }, { "epoch": 0.13459930729726502, "grad_norm": 2.1404736042022705, "learning_rate": 9.616481985269724e-06, "loss": 0.94, "step": 2254 }, { "epoch": 0.13465902305028066, "grad_norm": 2.4535794258117676, "learning_rate": 9.615818459292682e-06, "loss": 0.9444, "step": 2255 }, { "epoch": 0.13471873880329632, "grad_norm": 1.9772571325302124, "learning_rate": 9.61515493331564e-06, "loss": 0.9733, "step": 2256 }, { "epoch": 0.13477845455631196, "grad_norm": 2.5087292194366455, "learning_rate": 9.614491407338598e-06, "loss": 0.9679, "step": 2257 }, { "epoch": 0.1348381703093276, "grad_norm": 2.652635335922241, "learning_rate": 9.613827881361556e-06, "loss": 0.9319, "step": 2258 }, { "epoch": 0.13489788606234324, "grad_norm": 2.1429200172424316, "learning_rate": 9.613164355384514e-06, "loss": 0.9241, "step": 2259 }, { "epoch": 0.1349576018153589, "grad_norm": 2.380244255065918, "learning_rate": 9.612500829407472e-06, "loss": 0.9534, "step": 2260 }, { "epoch": 0.13501731756837454, "grad_norm": 3.994190216064453, "learning_rate": 9.61183730343043e-06, "loss": 0.9803, "step": 2261 }, { "epoch": 0.13507703332139018, "grad_norm": 2.348297357559204, "learning_rate": 9.611173777453387e-06, "loss": 0.9361, "step": 2262 }, { "epoch": 0.13513674907440582, "grad_norm": 2.357747793197632, "learning_rate": 9.610510251476347e-06, "loss": 0.938, "step": 2263 }, { "epoch": 0.13519646482742148, "grad_norm": 2.118543863296509, "learning_rate": 9.609846725499305e-06, "loss": 0.9405, "step": 2264 }, { "epoch": 0.13525618058043712, "grad_norm": 3.0545828342437744, "learning_rate": 9.609183199522261e-06, "loss": 0.9129, "step": 2265 }, { "epoch": 0.13531589633345276, "grad_norm": 3.049034595489502, "learning_rate": 9.60851967354522e-06, "loss": 0.9284, "step": 2266 }, { "epoch": 0.13537561208646842, "grad_norm": 3.162489414215088, "learning_rate": 9.607856147568177e-06, "loss": 0.9288, "step": 2267 }, { "epoch": 0.13543532783948406, "grad_norm": 1.8331354856491089, "learning_rate": 9.607192621591135e-06, "loss": 0.9151, "step": 2268 }, { "epoch": 0.1354950435924997, "grad_norm": 2.1783273220062256, "learning_rate": 9.606529095614095e-06, "loss": 0.9529, "step": 2269 }, { "epoch": 0.13555475934551534, "grad_norm": 3.6720776557922363, "learning_rate": 9.605865569637052e-06, "loss": 0.9411, "step": 2270 }, { "epoch": 0.135614475098531, "grad_norm": 3.227290391921997, "learning_rate": 9.60520204366001e-06, "loss": 0.9206, "step": 2271 }, { "epoch": 0.13567419085154664, "grad_norm": 2.0124049186706543, "learning_rate": 9.604538517682968e-06, "loss": 0.9293, "step": 2272 }, { "epoch": 0.13573390660456228, "grad_norm": 2.2063562870025635, "learning_rate": 9.603874991705926e-06, "loss": 0.8994, "step": 2273 }, { "epoch": 0.13579362235757794, "grad_norm": 3.165100574493408, "learning_rate": 9.603211465728884e-06, "loss": 0.9515, "step": 2274 }, { "epoch": 0.13585333811059358, "grad_norm": 2.9162356853485107, "learning_rate": 9.602547939751842e-06, "loss": 0.9609, "step": 2275 }, { "epoch": 0.13591305386360922, "grad_norm": 3.2699010372161865, "learning_rate": 9.6018844137748e-06, "loss": 0.881, "step": 2276 }, { "epoch": 0.13597276961662486, "grad_norm": 3.4968667030334473, "learning_rate": 9.601220887797758e-06, "loss": 0.9441, "step": 2277 }, { "epoch": 0.13603248536964052, "grad_norm": 3.2119998931884766, "learning_rate": 9.600557361820716e-06, "loss": 0.9819, "step": 2278 }, { "epoch": 0.13609220112265616, "grad_norm": 2.1949880123138428, "learning_rate": 9.599893835843674e-06, "loss": 0.9311, "step": 2279 }, { "epoch": 0.1361519168756718, "grad_norm": 3.0218751430511475, "learning_rate": 9.599230309866633e-06, "loss": 0.9304, "step": 2280 }, { "epoch": 0.13621163262868743, "grad_norm": 2.625293254852295, "learning_rate": 9.59856678388959e-06, "loss": 0.9477, "step": 2281 }, { "epoch": 0.1362713483817031, "grad_norm": 3.3241331577301025, "learning_rate": 9.597903257912547e-06, "loss": 0.9388, "step": 2282 }, { "epoch": 0.13633106413471874, "grad_norm": 2.5553464889526367, "learning_rate": 9.597239731935505e-06, "loss": 0.9149, "step": 2283 }, { "epoch": 0.13639077988773438, "grad_norm": 2.1893131732940674, "learning_rate": 9.596576205958465e-06, "loss": 0.9286, "step": 2284 }, { "epoch": 0.13645049564075004, "grad_norm": 2.04386305809021, "learning_rate": 9.595912679981421e-06, "loss": 0.9347, "step": 2285 }, { "epoch": 0.13651021139376568, "grad_norm": 3.0879576206207275, "learning_rate": 9.59524915400438e-06, "loss": 0.96, "step": 2286 }, { "epoch": 0.13656992714678132, "grad_norm": 3.135019540786743, "learning_rate": 9.594585628027338e-06, "loss": 0.9649, "step": 2287 }, { "epoch": 0.13662964289979695, "grad_norm": 1.9825769662857056, "learning_rate": 9.593922102050296e-06, "loss": 0.9366, "step": 2288 }, { "epoch": 0.13668935865281262, "grad_norm": 3.8694350719451904, "learning_rate": 9.593258576073254e-06, "loss": 0.9553, "step": 2289 }, { "epoch": 0.13674907440582826, "grad_norm": 2.675875186920166, "learning_rate": 9.592595050096212e-06, "loss": 0.934, "step": 2290 }, { "epoch": 0.1368087901588439, "grad_norm": 1.7247763872146606, "learning_rate": 9.59193152411917e-06, "loss": 0.9284, "step": 2291 }, { "epoch": 0.13686850591185956, "grad_norm": 2.028730630874634, "learning_rate": 9.591267998142128e-06, "loss": 0.9308, "step": 2292 }, { "epoch": 0.1369282216648752, "grad_norm": 2.2818126678466797, "learning_rate": 9.590604472165086e-06, "loss": 0.9458, "step": 2293 }, { "epoch": 0.13698793741789084, "grad_norm": 3.0343191623687744, "learning_rate": 9.589940946188044e-06, "loss": 0.9298, "step": 2294 }, { "epoch": 0.13704765317090647, "grad_norm": 2.051119804382324, "learning_rate": 9.589277420211002e-06, "loss": 0.9778, "step": 2295 }, { "epoch": 0.13710736892392214, "grad_norm": 2.111818790435791, "learning_rate": 9.58861389423396e-06, "loss": 0.9501, "step": 2296 }, { "epoch": 0.13716708467693778, "grad_norm": 1.9613066911697388, "learning_rate": 9.587950368256917e-06, "loss": 0.9342, "step": 2297 }, { "epoch": 0.13722680042995342, "grad_norm": 2.8283841609954834, "learning_rate": 9.587286842279877e-06, "loss": 0.9177, "step": 2298 }, { "epoch": 0.13728651618296905, "grad_norm": 2.868013381958008, "learning_rate": 9.586623316302835e-06, "loss": 0.9335, "step": 2299 }, { "epoch": 0.13734623193598472, "grad_norm": 2.3320915699005127, "learning_rate": 9.585959790325791e-06, "loss": 0.9635, "step": 2300 }, { "epoch": 0.13740594768900036, "grad_norm": 2.803851842880249, "learning_rate": 9.585296264348751e-06, "loss": 0.9605, "step": 2301 }, { "epoch": 0.137465663442016, "grad_norm": 3.608496904373169, "learning_rate": 9.584632738371707e-06, "loss": 0.9548, "step": 2302 }, { "epoch": 0.13752537919503166, "grad_norm": 1.9769853353500366, "learning_rate": 9.583969212394665e-06, "loss": 0.9171, "step": 2303 }, { "epoch": 0.1375850949480473, "grad_norm": 3.039644479751587, "learning_rate": 9.583305686417624e-06, "loss": 0.9401, "step": 2304 }, { "epoch": 0.13764481070106294, "grad_norm": 2.402102470397949, "learning_rate": 9.582642160440582e-06, "loss": 0.9428, "step": 2305 }, { "epoch": 0.13770452645407857, "grad_norm": 2.4628756046295166, "learning_rate": 9.58197863446354e-06, "loss": 0.9678, "step": 2306 }, { "epoch": 0.13776424220709424, "grad_norm": 2.423957586288452, "learning_rate": 9.581315108486498e-06, "loss": 0.9109, "step": 2307 }, { "epoch": 0.13782395796010988, "grad_norm": 1.8154462575912476, "learning_rate": 9.580651582509456e-06, "loss": 0.9712, "step": 2308 }, { "epoch": 0.13788367371312552, "grad_norm": 2.370652198791504, "learning_rate": 9.579988056532414e-06, "loss": 0.9058, "step": 2309 }, { "epoch": 0.13794338946614118, "grad_norm": 2.7597765922546387, "learning_rate": 9.579324530555372e-06, "loss": 0.9292, "step": 2310 }, { "epoch": 0.13800310521915682, "grad_norm": 1.9545823335647583, "learning_rate": 9.57866100457833e-06, "loss": 0.9223, "step": 2311 }, { "epoch": 0.13806282097217246, "grad_norm": 2.5627167224884033, "learning_rate": 9.577997478601287e-06, "loss": 0.9645, "step": 2312 }, { "epoch": 0.1381225367251881, "grad_norm": 2.053985357284546, "learning_rate": 9.577333952624246e-06, "loss": 0.9531, "step": 2313 }, { "epoch": 0.13818225247820376, "grad_norm": 2.420055389404297, "learning_rate": 9.576670426647205e-06, "loss": 0.922, "step": 2314 }, { "epoch": 0.1382419682312194, "grad_norm": 1.6927111148834229, "learning_rate": 9.576006900670161e-06, "loss": 0.9318, "step": 2315 }, { "epoch": 0.13830168398423504, "grad_norm": 1.9894635677337646, "learning_rate": 9.57534337469312e-06, "loss": 0.8986, "step": 2316 }, { "epoch": 0.13836139973725067, "grad_norm": 2.35371732711792, "learning_rate": 9.574679848716077e-06, "loss": 0.9379, "step": 2317 }, { "epoch": 0.13842111549026634, "grad_norm": 2.313549280166626, "learning_rate": 9.574016322739035e-06, "loss": 0.9505, "step": 2318 }, { "epoch": 0.13848083124328198, "grad_norm": 2.5501503944396973, "learning_rate": 9.573352796761995e-06, "loss": 0.9481, "step": 2319 }, { "epoch": 0.13854054699629761, "grad_norm": 2.070174217224121, "learning_rate": 9.572689270784951e-06, "loss": 0.9224, "step": 2320 }, { "epoch": 0.13860026274931328, "grad_norm": 2.433281421661377, "learning_rate": 9.57202574480791e-06, "loss": 0.9713, "step": 2321 }, { "epoch": 0.13865997850232892, "grad_norm": 3.38883376121521, "learning_rate": 9.571362218830868e-06, "loss": 0.9655, "step": 2322 }, { "epoch": 0.13871969425534456, "grad_norm": 2.4995100498199463, "learning_rate": 9.570698692853826e-06, "loss": 0.9285, "step": 2323 }, { "epoch": 0.1387794100083602, "grad_norm": 2.268993854522705, "learning_rate": 9.570035166876784e-06, "loss": 0.9268, "step": 2324 }, { "epoch": 0.13883912576137586, "grad_norm": 2.5418994426727295, "learning_rate": 9.569371640899742e-06, "loss": 0.9332, "step": 2325 }, { "epoch": 0.1388988415143915, "grad_norm": 2.512554883956909, "learning_rate": 9.5687081149227e-06, "loss": 0.9232, "step": 2326 }, { "epoch": 0.13895855726740713, "grad_norm": 3.356282949447632, "learning_rate": 9.568044588945658e-06, "loss": 0.9275, "step": 2327 }, { "epoch": 0.1390182730204228, "grad_norm": 2.877878189086914, "learning_rate": 9.567381062968616e-06, "loss": 0.9431, "step": 2328 }, { "epoch": 0.13907798877343844, "grad_norm": 2.1361234188079834, "learning_rate": 9.566717536991574e-06, "loss": 0.9342, "step": 2329 }, { "epoch": 0.13913770452645408, "grad_norm": 2.9852077960968018, "learning_rate": 9.566054011014532e-06, "loss": 0.9602, "step": 2330 }, { "epoch": 0.1391974202794697, "grad_norm": 4.643652439117432, "learning_rate": 9.56539048503749e-06, "loss": 0.9551, "step": 2331 }, { "epoch": 0.13925713603248538, "grad_norm": 3.212869644165039, "learning_rate": 9.564726959060447e-06, "loss": 1.0308, "step": 2332 }, { "epoch": 0.13931685178550102, "grad_norm": 1.9062129259109497, "learning_rate": 9.564063433083405e-06, "loss": 0.9441, "step": 2333 }, { "epoch": 0.13937656753851665, "grad_norm": 2.8699116706848145, "learning_rate": 9.563399907106365e-06, "loss": 0.9222, "step": 2334 }, { "epoch": 0.1394362832915323, "grad_norm": 2.7900068759918213, "learning_rate": 9.562736381129321e-06, "loss": 0.9625, "step": 2335 }, { "epoch": 0.13949599904454796, "grad_norm": 2.2623541355133057, "learning_rate": 9.56207285515228e-06, "loss": 0.9065, "step": 2336 }, { "epoch": 0.1395557147975636, "grad_norm": 2.0102405548095703, "learning_rate": 9.561409329175237e-06, "loss": 0.9546, "step": 2337 }, { "epoch": 0.13961543055057923, "grad_norm": 2.7718448638916016, "learning_rate": 9.560745803198196e-06, "loss": 0.9435, "step": 2338 }, { "epoch": 0.1396751463035949, "grad_norm": 2.8311450481414795, "learning_rate": 9.560082277221154e-06, "loss": 0.9353, "step": 2339 }, { "epoch": 0.13973486205661054, "grad_norm": 1.8263639211654663, "learning_rate": 9.559418751244112e-06, "loss": 0.9133, "step": 2340 }, { "epoch": 0.13979457780962617, "grad_norm": 2.3546674251556396, "learning_rate": 9.55875522526707e-06, "loss": 0.9319, "step": 2341 }, { "epoch": 0.1398542935626418, "grad_norm": 2.8129665851593018, "learning_rate": 9.558091699290028e-06, "loss": 0.9582, "step": 2342 }, { "epoch": 0.13991400931565748, "grad_norm": 5.753870010375977, "learning_rate": 9.557428173312986e-06, "loss": 0.9395, "step": 2343 }, { "epoch": 0.13997372506867312, "grad_norm": 3.068763494491577, "learning_rate": 9.556764647335944e-06, "loss": 0.9025, "step": 2344 }, { "epoch": 0.14003344082168875, "grad_norm": 2.3617630004882812, "learning_rate": 9.556101121358902e-06, "loss": 0.9117, "step": 2345 }, { "epoch": 0.14009315657470442, "grad_norm": 3.394197463989258, "learning_rate": 9.55543759538186e-06, "loss": 0.9938, "step": 2346 }, { "epoch": 0.14015287232772006, "grad_norm": 2.0649945735931396, "learning_rate": 9.554774069404817e-06, "loss": 0.9219, "step": 2347 }, { "epoch": 0.1402125880807357, "grad_norm": 2.298036813735962, "learning_rate": 9.554110543427777e-06, "loss": 0.9147, "step": 2348 }, { "epoch": 0.14027230383375133, "grad_norm": 3.2602591514587402, "learning_rate": 9.553447017450735e-06, "loss": 0.9622, "step": 2349 }, { "epoch": 0.140332019586767, "grad_norm": 1.8505656719207764, "learning_rate": 9.552783491473691e-06, "loss": 0.9727, "step": 2350 }, { "epoch": 0.14039173533978264, "grad_norm": 3.5552473068237305, "learning_rate": 9.552119965496651e-06, "loss": 0.9466, "step": 2351 }, { "epoch": 0.14045145109279827, "grad_norm": 2.163778781890869, "learning_rate": 9.551456439519607e-06, "loss": 0.927, "step": 2352 }, { "epoch": 0.14051116684581394, "grad_norm": 2.7068030834198, "learning_rate": 9.550792913542565e-06, "loss": 0.9802, "step": 2353 }, { "epoch": 0.14057088259882958, "grad_norm": 2.4884562492370605, "learning_rate": 9.550129387565523e-06, "loss": 0.986, "step": 2354 }, { "epoch": 0.14063059835184522, "grad_norm": 2.523629903793335, "learning_rate": 9.549465861588482e-06, "loss": 0.9599, "step": 2355 }, { "epoch": 0.14069031410486085, "grad_norm": 2.049837589263916, "learning_rate": 9.54880233561144e-06, "loss": 0.9386, "step": 2356 }, { "epoch": 0.14075002985787652, "grad_norm": 2.9857757091522217, "learning_rate": 9.548138809634398e-06, "loss": 0.9565, "step": 2357 }, { "epoch": 0.14080974561089216, "grad_norm": 3.6067256927490234, "learning_rate": 9.547475283657356e-06, "loss": 0.9497, "step": 2358 }, { "epoch": 0.1408694613639078, "grad_norm": 3.632534980773926, "learning_rate": 9.546811757680314e-06, "loss": 0.9431, "step": 2359 }, { "epoch": 0.14092917711692343, "grad_norm": 2.0244061946868896, "learning_rate": 9.546148231703272e-06, "loss": 0.969, "step": 2360 }, { "epoch": 0.1409888928699391, "grad_norm": 1.6974236965179443, "learning_rate": 9.54548470572623e-06, "loss": 0.9383, "step": 2361 }, { "epoch": 0.14104860862295474, "grad_norm": 2.62166428565979, "learning_rate": 9.544821179749187e-06, "loss": 0.9884, "step": 2362 }, { "epoch": 0.14110832437597037, "grad_norm": 2.4779114723205566, "learning_rate": 9.544157653772146e-06, "loss": 0.9544, "step": 2363 }, { "epoch": 0.14116804012898604, "grad_norm": 2.2234461307525635, "learning_rate": 9.543494127795104e-06, "loss": 0.9117, "step": 2364 }, { "epoch": 0.14122775588200168, "grad_norm": 2.449571132659912, "learning_rate": 9.542830601818061e-06, "loss": 0.9799, "step": 2365 }, { "epoch": 0.14128747163501731, "grad_norm": 2.216578722000122, "learning_rate": 9.54216707584102e-06, "loss": 0.9248, "step": 2366 }, { "epoch": 0.14134718738803295, "grad_norm": 2.0258190631866455, "learning_rate": 9.541503549863977e-06, "loss": 0.89, "step": 2367 }, { "epoch": 0.14140690314104862, "grad_norm": 2.3880984783172607, "learning_rate": 9.540840023886935e-06, "loss": 0.9077, "step": 2368 }, { "epoch": 0.14146661889406426, "grad_norm": 2.1665308475494385, "learning_rate": 9.540176497909895e-06, "loss": 0.9364, "step": 2369 }, { "epoch": 0.1415263346470799, "grad_norm": 2.3894219398498535, "learning_rate": 9.539512971932851e-06, "loss": 0.9281, "step": 2370 }, { "epoch": 0.14158605040009556, "grad_norm": 2.8524954319000244, "learning_rate": 9.53884944595581e-06, "loss": 0.9382, "step": 2371 }, { "epoch": 0.1416457661531112, "grad_norm": 2.7712697982788086, "learning_rate": 9.538185919978768e-06, "loss": 0.943, "step": 2372 }, { "epoch": 0.14170548190612683, "grad_norm": 2.4036448001861572, "learning_rate": 9.537522394001726e-06, "loss": 0.9374, "step": 2373 }, { "epoch": 0.14176519765914247, "grad_norm": 2.611513137817383, "learning_rate": 9.536858868024684e-06, "loss": 0.9646, "step": 2374 }, { "epoch": 0.14182491341215814, "grad_norm": 2.7936007976531982, "learning_rate": 9.536195342047642e-06, "loss": 0.9754, "step": 2375 }, { "epoch": 0.14188462916517378, "grad_norm": 2.7788166999816895, "learning_rate": 9.5355318160706e-06, "loss": 0.9565, "step": 2376 }, { "epoch": 0.1419443449181894, "grad_norm": 2.790376663208008, "learning_rate": 9.534868290093558e-06, "loss": 0.9419, "step": 2377 }, { "epoch": 0.14200406067120505, "grad_norm": 2.9602224826812744, "learning_rate": 9.534204764116516e-06, "loss": 0.9554, "step": 2378 }, { "epoch": 0.14206377642422072, "grad_norm": 2.7899625301361084, "learning_rate": 9.533541238139474e-06, "loss": 0.958, "step": 2379 }, { "epoch": 0.14212349217723635, "grad_norm": 2.7211341857910156, "learning_rate": 9.532877712162432e-06, "loss": 0.9509, "step": 2380 }, { "epoch": 0.142183207930252, "grad_norm": 2.771488666534424, "learning_rate": 9.53221418618539e-06, "loss": 0.9517, "step": 2381 }, { "epoch": 0.14224292368326766, "grad_norm": 2.9710094928741455, "learning_rate": 9.531550660208347e-06, "loss": 0.9093, "step": 2382 }, { "epoch": 0.1423026394362833, "grad_norm": 2.997061252593994, "learning_rate": 9.530887134231305e-06, "loss": 0.923, "step": 2383 }, { "epoch": 0.14236235518929893, "grad_norm": 2.427528142929077, "learning_rate": 9.530223608254265e-06, "loss": 0.931, "step": 2384 }, { "epoch": 0.14242207094231457, "grad_norm": 2.298966884613037, "learning_rate": 9.529560082277221e-06, "loss": 0.9631, "step": 2385 }, { "epoch": 0.14248178669533024, "grad_norm": 2.5698800086975098, "learning_rate": 9.52889655630018e-06, "loss": 0.903, "step": 2386 }, { "epoch": 0.14254150244834587, "grad_norm": 3.5776584148406982, "learning_rate": 9.528233030323137e-06, "loss": 0.9035, "step": 2387 }, { "epoch": 0.1426012182013615, "grad_norm": 2.9379210472106934, "learning_rate": 9.527569504346096e-06, "loss": 0.9613, "step": 2388 }, { "epoch": 0.14266093395437718, "grad_norm": 2.1463329792022705, "learning_rate": 9.526905978369054e-06, "loss": 0.9133, "step": 2389 }, { "epoch": 0.14272064970739282, "grad_norm": 2.2162065505981445, "learning_rate": 9.526242452392012e-06, "loss": 0.9423, "step": 2390 }, { "epoch": 0.14278036546040845, "grad_norm": 2.413501739501953, "learning_rate": 9.52557892641497e-06, "loss": 0.9427, "step": 2391 }, { "epoch": 0.1428400812134241, "grad_norm": 2.677417755126953, "learning_rate": 9.524915400437928e-06, "loss": 0.9551, "step": 2392 }, { "epoch": 0.14289979696643976, "grad_norm": 2.5517280101776123, "learning_rate": 9.524251874460886e-06, "loss": 0.92, "step": 2393 }, { "epoch": 0.1429595127194554, "grad_norm": 2.3082284927368164, "learning_rate": 9.523588348483844e-06, "loss": 0.8881, "step": 2394 }, { "epoch": 0.14301922847247103, "grad_norm": 2.294475793838501, "learning_rate": 9.522924822506802e-06, "loss": 0.986, "step": 2395 }, { "epoch": 0.14307894422548667, "grad_norm": 3.2816972732543945, "learning_rate": 9.52226129652976e-06, "loss": 0.9046, "step": 2396 }, { "epoch": 0.14313865997850234, "grad_norm": 1.983585238456726, "learning_rate": 9.521597770552717e-06, "loss": 0.9399, "step": 2397 }, { "epoch": 0.14319837573151797, "grad_norm": 1.7368534803390503, "learning_rate": 9.520934244575677e-06, "loss": 0.9569, "step": 2398 }, { "epoch": 0.1432580914845336, "grad_norm": 3.346815824508667, "learning_rate": 9.520270718598635e-06, "loss": 0.9269, "step": 2399 }, { "epoch": 0.14331780723754928, "grad_norm": 3.5015900135040283, "learning_rate": 9.519607192621591e-06, "loss": 0.9419, "step": 2400 }, { "epoch": 0.14331780723754928, "eval_text_loss": 0.9636279344558716, "eval_text_runtime": 15.1708, "eval_text_samples_per_second": 263.665, "eval_text_steps_per_second": 0.527, "step": 2400 }, { "epoch": 0.14331780723754928, "eval_image_loss": 0.7005918025970459, "eval_image_runtime": 5.1407, "eval_image_samples_per_second": 778.097, "eval_image_steps_per_second": 1.556, "step": 2400 }, { "epoch": 0.14331780723754928, "eval_video_loss": 1.1773686408996582, "eval_video_runtime": 77.8173, "eval_video_samples_per_second": 51.402, "eval_video_steps_per_second": 0.103, "step": 2400 }, { "epoch": 0.14337752299056492, "grad_norm": 1.757712960243225, "learning_rate": 9.51894366664455e-06, "loss": 0.9514, "step": 2401 }, { "epoch": 0.14343723874358055, "grad_norm": 12.515031814575195, "learning_rate": 9.518280140667507e-06, "loss": 0.9359, "step": 2402 }, { "epoch": 0.1434969544965962, "grad_norm": 5.436987400054932, "learning_rate": 9.517616614690465e-06, "loss": 0.94, "step": 2403 }, { "epoch": 0.14355667024961186, "grad_norm": 4.042179107666016, "learning_rate": 9.516953088713423e-06, "loss": 0.9544, "step": 2404 }, { "epoch": 0.1436163860026275, "grad_norm": 1.8711165189743042, "learning_rate": 9.516289562736382e-06, "loss": 0.9239, "step": 2405 }, { "epoch": 0.14367610175564313, "grad_norm": 2.791578531265259, "learning_rate": 9.51562603675934e-06, "loss": 0.9728, "step": 2406 }, { "epoch": 0.1437358175086588, "grad_norm": 4.041372776031494, "learning_rate": 9.514962510782298e-06, "loss": 0.947, "step": 2407 }, { "epoch": 0.14379553326167444, "grad_norm": 2.73108172416687, "learning_rate": 9.514298984805256e-06, "loss": 0.9202, "step": 2408 }, { "epoch": 0.14385524901469007, "grad_norm": 2.5555455684661865, "learning_rate": 9.513635458828214e-06, "loss": 0.9421, "step": 2409 }, { "epoch": 0.1439149647677057, "grad_norm": 2.251709222793579, "learning_rate": 9.512971932851172e-06, "loss": 0.9146, "step": 2410 }, { "epoch": 0.14397468052072138, "grad_norm": 2.9423816204071045, "learning_rate": 9.51230840687413e-06, "loss": 0.9373, "step": 2411 }, { "epoch": 0.14403439627373701, "grad_norm": 1.7545384168624878, "learning_rate": 9.511644880897087e-06, "loss": 0.9546, "step": 2412 }, { "epoch": 0.14409411202675265, "grad_norm": 2.906470537185669, "learning_rate": 9.510981354920046e-06, "loss": 0.9601, "step": 2413 }, { "epoch": 0.1441538277797683, "grad_norm": 3.0158307552337646, "learning_rate": 9.510317828943004e-06, "loss": 0.8966, "step": 2414 }, { "epoch": 0.14421354353278396, "grad_norm": 4.286125183105469, "learning_rate": 9.50965430296596e-06, "loss": 0.9246, "step": 2415 }, { "epoch": 0.1442732592857996, "grad_norm": 3.144871711730957, "learning_rate": 9.50899077698892e-06, "loss": 0.9428, "step": 2416 }, { "epoch": 0.14433297503881523, "grad_norm": 3.110166311264038, "learning_rate": 9.508327251011877e-06, "loss": 0.9538, "step": 2417 }, { "epoch": 0.1443926907918309, "grad_norm": 1.9897898435592651, "learning_rate": 9.507663725034835e-06, "loss": 0.9677, "step": 2418 }, { "epoch": 0.14445240654484653, "grad_norm": 2.787706136703491, "learning_rate": 9.507000199057795e-06, "loss": 0.9649, "step": 2419 }, { "epoch": 0.14451212229786217, "grad_norm": 2.5808160305023193, "learning_rate": 9.506336673080751e-06, "loss": 0.9671, "step": 2420 }, { "epoch": 0.1445718380508778, "grad_norm": 1.9386261701583862, "learning_rate": 9.50567314710371e-06, "loss": 0.927, "step": 2421 }, { "epoch": 0.14463155380389348, "grad_norm": 2.7442100048065186, "learning_rate": 9.505009621126668e-06, "loss": 0.9393, "step": 2422 }, { "epoch": 0.1446912695569091, "grad_norm": 3.696857213973999, "learning_rate": 9.504346095149626e-06, "loss": 0.9756, "step": 2423 }, { "epoch": 0.14475098530992475, "grad_norm": 2.421638011932373, "learning_rate": 9.503682569172584e-06, "loss": 0.9183, "step": 2424 }, { "epoch": 0.14481070106294042, "grad_norm": 2.992388963699341, "learning_rate": 9.503019043195542e-06, "loss": 0.9513, "step": 2425 }, { "epoch": 0.14487041681595605, "grad_norm": 2.397578239440918, "learning_rate": 9.5023555172185e-06, "loss": 0.9488, "step": 2426 }, { "epoch": 0.1449301325689717, "grad_norm": 1.8265740871429443, "learning_rate": 9.501691991241458e-06, "loss": 0.9433, "step": 2427 }, { "epoch": 0.14498984832198733, "grad_norm": 1.9416457414627075, "learning_rate": 9.501028465264416e-06, "loss": 0.9263, "step": 2428 }, { "epoch": 0.145049564075003, "grad_norm": 3.1381826400756836, "learning_rate": 9.500364939287374e-06, "loss": 0.9128, "step": 2429 }, { "epoch": 0.14510927982801863, "grad_norm": 2.7333626747131348, "learning_rate": 9.499701413310332e-06, "loss": 0.8958, "step": 2430 }, { "epoch": 0.14516899558103427, "grad_norm": 2.621509075164795, "learning_rate": 9.49903788733329e-06, "loss": 0.9759, "step": 2431 }, { "epoch": 0.1452287113340499, "grad_norm": 2.6217575073242188, "learning_rate": 9.498374361356247e-06, "loss": 0.9441, "step": 2432 }, { "epoch": 0.14528842708706557, "grad_norm": 2.3957626819610596, "learning_rate": 9.497710835379205e-06, "loss": 0.9721, "step": 2433 }, { "epoch": 0.1453481428400812, "grad_norm": 2.8007583618164062, "learning_rate": 9.497047309402165e-06, "loss": 0.9219, "step": 2434 }, { "epoch": 0.14540785859309685, "grad_norm": 2.817293405532837, "learning_rate": 9.496383783425121e-06, "loss": 0.9614, "step": 2435 }, { "epoch": 0.14546757434611252, "grad_norm": 2.9395999908447266, "learning_rate": 9.49572025744808e-06, "loss": 0.9113, "step": 2436 }, { "epoch": 0.14552729009912815, "grad_norm": 3.523804187774658, "learning_rate": 9.495056731471037e-06, "loss": 0.9775, "step": 2437 }, { "epoch": 0.1455870058521438, "grad_norm": 2.2642455101013184, "learning_rate": 9.494393205493995e-06, "loss": 0.9192, "step": 2438 }, { "epoch": 0.14564672160515943, "grad_norm": 3.5476760864257812, "learning_rate": 9.493729679516954e-06, "loss": 0.9518, "step": 2439 }, { "epoch": 0.1457064373581751, "grad_norm": 5.035466194152832, "learning_rate": 9.493066153539912e-06, "loss": 0.8936, "step": 2440 }, { "epoch": 0.14576615311119073, "grad_norm": 2.2213168144226074, "learning_rate": 9.49240262756287e-06, "loss": 0.9524, "step": 2441 }, { "epoch": 0.14582586886420637, "grad_norm": 2.1199448108673096, "learning_rate": 9.491739101585828e-06, "loss": 0.9467, "step": 2442 }, { "epoch": 0.14588558461722204, "grad_norm": 2.8827295303344727, "learning_rate": 9.491075575608786e-06, "loss": 0.9537, "step": 2443 }, { "epoch": 0.14594530037023767, "grad_norm": 3.1505188941955566, "learning_rate": 9.490412049631744e-06, "loss": 0.9382, "step": 2444 }, { "epoch": 0.1460050161232533, "grad_norm": 6.1742424964904785, "learning_rate": 9.489748523654702e-06, "loss": 0.9121, "step": 2445 }, { "epoch": 0.14606473187626895, "grad_norm": 2.588627815246582, "learning_rate": 9.48908499767766e-06, "loss": 0.961, "step": 2446 }, { "epoch": 0.14612444762928462, "grad_norm": 3.3270742893218994, "learning_rate": 9.488421471700617e-06, "loss": 0.9348, "step": 2447 }, { "epoch": 0.14618416338230025, "grad_norm": 2.286226511001587, "learning_rate": 9.487757945723576e-06, "loss": 0.932, "step": 2448 }, { "epoch": 0.1462438791353159, "grad_norm": 3.1425774097442627, "learning_rate": 9.487094419746535e-06, "loss": 0.9585, "step": 2449 }, { "epoch": 0.14630359488833153, "grad_norm": 3.000218629837036, "learning_rate": 9.486430893769491e-06, "loss": 0.9545, "step": 2450 }, { "epoch": 0.1463633106413472, "grad_norm": 2.4470362663269043, "learning_rate": 9.48576736779245e-06, "loss": 0.9034, "step": 2451 }, { "epoch": 0.14642302639436283, "grad_norm": 2.721287727355957, "learning_rate": 9.485103841815407e-06, "loss": 0.9128, "step": 2452 }, { "epoch": 0.14648274214737847, "grad_norm": 1.9632571935653687, "learning_rate": 9.484440315838365e-06, "loss": 0.9103, "step": 2453 }, { "epoch": 0.14654245790039414, "grad_norm": 2.1578404903411865, "learning_rate": 9.483776789861325e-06, "loss": 0.9518, "step": 2454 }, { "epoch": 0.14660217365340977, "grad_norm": 6.242565631866455, "learning_rate": 9.483113263884281e-06, "loss": 0.9049, "step": 2455 }, { "epoch": 0.1466618894064254, "grad_norm": 2.789259910583496, "learning_rate": 9.48244973790724e-06, "loss": 0.9602, "step": 2456 }, { "epoch": 0.14672160515944105, "grad_norm": 3.148000955581665, "learning_rate": 9.481786211930198e-06, "loss": 0.9354, "step": 2457 }, { "epoch": 0.14678132091245671, "grad_norm": 2.6528658866882324, "learning_rate": 9.481122685953156e-06, "loss": 0.939, "step": 2458 }, { "epoch": 0.14684103666547235, "grad_norm": 3.406670331954956, "learning_rate": 9.480459159976114e-06, "loss": 0.932, "step": 2459 }, { "epoch": 0.146900752418488, "grad_norm": 3.1062850952148438, "learning_rate": 9.479795633999072e-06, "loss": 0.9623, "step": 2460 }, { "epoch": 0.14696046817150366, "grad_norm": 4.196341037750244, "learning_rate": 9.47913210802203e-06, "loss": 0.9268, "step": 2461 }, { "epoch": 0.1470201839245193, "grad_norm": 2.1852526664733887, "learning_rate": 9.478468582044986e-06, "loss": 0.9481, "step": 2462 }, { "epoch": 0.14707989967753493, "grad_norm": 7.827360153198242, "learning_rate": 9.477805056067946e-06, "loss": 0.928, "step": 2463 }, { "epoch": 0.14713961543055057, "grad_norm": 5.739188194274902, "learning_rate": 9.477141530090904e-06, "loss": 0.9117, "step": 2464 }, { "epoch": 0.14719933118356623, "grad_norm": 2.2919986248016357, "learning_rate": 9.47647800411386e-06, "loss": 0.9221, "step": 2465 }, { "epoch": 0.14725904693658187, "grad_norm": 4.027832984924316, "learning_rate": 9.47581447813682e-06, "loss": 0.9334, "step": 2466 }, { "epoch": 0.1473187626895975, "grad_norm": 2.3168723583221436, "learning_rate": 9.475150952159777e-06, "loss": 0.9342, "step": 2467 }, { "epoch": 0.14737847844261315, "grad_norm": 3.230182409286499, "learning_rate": 9.474487426182735e-06, "loss": 0.9572, "step": 2468 }, { "epoch": 0.1474381941956288, "grad_norm": 3.188373327255249, "learning_rate": 9.473823900205695e-06, "loss": 0.9147, "step": 2469 }, { "epoch": 0.14749790994864445, "grad_norm": 2.2783443927764893, "learning_rate": 9.473160374228651e-06, "loss": 0.9255, "step": 2470 }, { "epoch": 0.1475576257016601, "grad_norm": 7.546876907348633, "learning_rate": 9.47249684825161e-06, "loss": 0.9483, "step": 2471 }, { "epoch": 0.14761734145467575, "grad_norm": 2.0874524116516113, "learning_rate": 9.471833322274567e-06, "loss": 0.9611, "step": 2472 }, { "epoch": 0.1476770572076914, "grad_norm": 22.909303665161133, "learning_rate": 9.471169796297526e-06, "loss": 0.9193, "step": 2473 }, { "epoch": 0.14773677296070703, "grad_norm": 2.7879838943481445, "learning_rate": 9.470506270320484e-06, "loss": 0.9377, "step": 2474 }, { "epoch": 0.14779648871372267, "grad_norm": 4.601938247680664, "learning_rate": 9.469842744343442e-06, "loss": 0.9493, "step": 2475 }, { "epoch": 0.14785620446673833, "grad_norm": 10.983050346374512, "learning_rate": 9.4691792183664e-06, "loss": 0.9483, "step": 2476 }, { "epoch": 0.14791592021975397, "grad_norm": 2.704233169555664, "learning_rate": 9.468515692389358e-06, "loss": 0.9133, "step": 2477 }, { "epoch": 0.1479756359727696, "grad_norm": 2.2010443210601807, "learning_rate": 9.467852166412316e-06, "loss": 0.924, "step": 2478 }, { "epoch": 0.14803535172578527, "grad_norm": 2.8204784393310547, "learning_rate": 9.467188640435274e-06, "loss": 0.9503, "step": 2479 }, { "epoch": 0.1480950674788009, "grad_norm": 2.8580832481384277, "learning_rate": 9.466525114458232e-06, "loss": 0.937, "step": 2480 }, { "epoch": 0.14815478323181655, "grad_norm": 2.54496169090271, "learning_rate": 9.46586158848119e-06, "loss": 0.9018, "step": 2481 }, { "epoch": 0.1482144989848322, "grad_norm": 2.7616961002349854, "learning_rate": 9.465198062504147e-06, "loss": 0.9502, "step": 2482 }, { "epoch": 0.14827421473784785, "grad_norm": 2.2714810371398926, "learning_rate": 9.464534536527107e-06, "loss": 0.8948, "step": 2483 }, { "epoch": 0.1483339304908635, "grad_norm": 3.4403884410858154, "learning_rate": 9.463871010550065e-06, "loss": 0.9004, "step": 2484 }, { "epoch": 0.14839364624387913, "grad_norm": 2.454108715057373, "learning_rate": 9.463207484573021e-06, "loss": 0.9391, "step": 2485 }, { "epoch": 0.14845336199689477, "grad_norm": 2.5334713459014893, "learning_rate": 9.46254395859598e-06, "loss": 0.9479, "step": 2486 }, { "epoch": 0.14851307774991043, "grad_norm": 2.6389050483703613, "learning_rate": 9.461880432618937e-06, "loss": 0.9781, "step": 2487 }, { "epoch": 0.14857279350292607, "grad_norm": 2.6389689445495605, "learning_rate": 9.461216906641895e-06, "loss": 0.9747, "step": 2488 }, { "epoch": 0.1486325092559417, "grad_norm": 2.3091628551483154, "learning_rate": 9.460553380664853e-06, "loss": 0.927, "step": 2489 }, { "epoch": 0.14869222500895737, "grad_norm": 2.0418789386749268, "learning_rate": 9.459889854687812e-06, "loss": 0.9588, "step": 2490 }, { "epoch": 0.148751940761973, "grad_norm": 3.7373533248901367, "learning_rate": 9.45922632871077e-06, "loss": 0.9226, "step": 2491 }, { "epoch": 0.14881165651498865, "grad_norm": 2.090778112411499, "learning_rate": 9.458562802733728e-06, "loss": 0.9464, "step": 2492 }, { "epoch": 0.1488713722680043, "grad_norm": 2.1255784034729004, "learning_rate": 9.457899276756686e-06, "loss": 0.9673, "step": 2493 }, { "epoch": 0.14893108802101995, "grad_norm": 1.936417579650879, "learning_rate": 9.457235750779644e-06, "loss": 0.9248, "step": 2494 }, { "epoch": 0.1489908037740356, "grad_norm": 2.4515938758850098, "learning_rate": 9.456572224802602e-06, "loss": 0.9047, "step": 2495 }, { "epoch": 0.14905051952705123, "grad_norm": 3.2303197383880615, "learning_rate": 9.45590869882556e-06, "loss": 0.937, "step": 2496 }, { "epoch": 0.1491102352800669, "grad_norm": 2.5095882415771484, "learning_rate": 9.455245172848517e-06, "loss": 0.9215, "step": 2497 }, { "epoch": 0.14916995103308253, "grad_norm": 3.1395273208618164, "learning_rate": 9.454581646871476e-06, "loss": 0.9692, "step": 2498 }, { "epoch": 0.14922966678609817, "grad_norm": 2.34802508354187, "learning_rate": 9.453918120894435e-06, "loss": 0.915, "step": 2499 }, { "epoch": 0.1492893825391138, "grad_norm": 2.4913949966430664, "learning_rate": 9.453254594917391e-06, "loss": 0.9674, "step": 2500 }, { "epoch": 0.14934909829212947, "grad_norm": 2.935652017593384, "learning_rate": 9.45259106894035e-06, "loss": 0.9412, "step": 2501 }, { "epoch": 0.1494088140451451, "grad_norm": 1.7384552955627441, "learning_rate": 9.451927542963307e-06, "loss": 0.9212, "step": 2502 }, { "epoch": 0.14946852979816075, "grad_norm": 1.9625766277313232, "learning_rate": 9.451264016986265e-06, "loss": 0.9384, "step": 2503 }, { "epoch": 0.14952824555117641, "grad_norm": 2.933206796646118, "learning_rate": 9.450600491009225e-06, "loss": 0.9347, "step": 2504 }, { "epoch": 0.14958796130419205, "grad_norm": 2.060142993927002, "learning_rate": 9.449936965032181e-06, "loss": 0.8731, "step": 2505 }, { "epoch": 0.1496476770572077, "grad_norm": 4.150566101074219, "learning_rate": 9.44927343905514e-06, "loss": 0.9095, "step": 2506 }, { "epoch": 0.14970739281022333, "grad_norm": 1.8324189186096191, "learning_rate": 9.448609913078098e-06, "loss": 0.9671, "step": 2507 }, { "epoch": 0.149767108563239, "grad_norm": 2.4949562549591064, "learning_rate": 9.447946387101056e-06, "loss": 0.936, "step": 2508 }, { "epoch": 0.14982682431625463, "grad_norm": 2.078306198120117, "learning_rate": 9.447282861124014e-06, "loss": 0.9248, "step": 2509 }, { "epoch": 0.14988654006927027, "grad_norm": 2.0828418731689453, "learning_rate": 9.446619335146972e-06, "loss": 0.8949, "step": 2510 }, { "epoch": 0.1499462558222859, "grad_norm": 2.456688404083252, "learning_rate": 9.44595580916993e-06, "loss": 0.9466, "step": 2511 }, { "epoch": 0.15000597157530157, "grad_norm": 2.0101540088653564, "learning_rate": 9.445292283192888e-06, "loss": 0.9744, "step": 2512 }, { "epoch": 0.1500656873283172, "grad_norm": 2.7939887046813965, "learning_rate": 9.444628757215846e-06, "loss": 0.9211, "step": 2513 }, { "epoch": 0.15012540308133285, "grad_norm": 2.682973623275757, "learning_rate": 9.443965231238804e-06, "loss": 0.9073, "step": 2514 }, { "epoch": 0.1501851188343485, "grad_norm": 2.4693543910980225, "learning_rate": 9.44330170526176e-06, "loss": 0.9654, "step": 2515 }, { "epoch": 0.15024483458736415, "grad_norm": 1.7941513061523438, "learning_rate": 9.44263817928472e-06, "loss": 0.9428, "step": 2516 }, { "epoch": 0.1503045503403798, "grad_norm": 4.727741241455078, "learning_rate": 9.441974653307677e-06, "loss": 0.9676, "step": 2517 }, { "epoch": 0.15036426609339543, "grad_norm": 2.8160228729248047, "learning_rate": 9.441311127330635e-06, "loss": 0.9266, "step": 2518 }, { "epoch": 0.1504239818464111, "grad_norm": 1.9780633449554443, "learning_rate": 9.440647601353595e-06, "loss": 0.9217, "step": 2519 }, { "epoch": 0.15048369759942673, "grad_norm": 2.739790916442871, "learning_rate": 9.439984075376551e-06, "loss": 0.9312, "step": 2520 }, { "epoch": 0.15054341335244237, "grad_norm": 2.003917932510376, "learning_rate": 9.43932054939951e-06, "loss": 0.9324, "step": 2521 }, { "epoch": 0.15060312910545803, "grad_norm": 2.0378003120422363, "learning_rate": 9.438657023422467e-06, "loss": 0.9567, "step": 2522 }, { "epoch": 0.15066284485847367, "grad_norm": 2.953892946243286, "learning_rate": 9.437993497445426e-06, "loss": 0.9285, "step": 2523 }, { "epoch": 0.1507225606114893, "grad_norm": 1.853289246559143, "learning_rate": 9.437329971468384e-06, "loss": 0.9533, "step": 2524 }, { "epoch": 0.15078227636450495, "grad_norm": 2.000539779663086, "learning_rate": 9.436666445491342e-06, "loss": 0.9283, "step": 2525 }, { "epoch": 0.1508419921175206, "grad_norm": 8.51266860961914, "learning_rate": 9.4360029195143e-06, "loss": 0.9346, "step": 2526 }, { "epoch": 0.15090170787053625, "grad_norm": 2.240861654281616, "learning_rate": 9.435339393537258e-06, "loss": 0.9456, "step": 2527 }, { "epoch": 0.1509614236235519, "grad_norm": 4.742900848388672, "learning_rate": 9.434675867560216e-06, "loss": 0.9724, "step": 2528 }, { "epoch": 0.15102113937656753, "grad_norm": 2.731919527053833, "learning_rate": 9.434012341583174e-06, "loss": 0.9565, "step": 2529 }, { "epoch": 0.1510808551295832, "grad_norm": 4.588780403137207, "learning_rate": 9.433348815606132e-06, "loss": 0.9592, "step": 2530 }, { "epoch": 0.15114057088259883, "grad_norm": 2.0185177326202393, "learning_rate": 9.43268528962909e-06, "loss": 0.9333, "step": 2531 }, { "epoch": 0.15120028663561447, "grad_norm": 3.3802990913391113, "learning_rate": 9.432021763652047e-06, "loss": 0.9529, "step": 2532 }, { "epoch": 0.15126000238863013, "grad_norm": 2.2966983318328857, "learning_rate": 9.431358237675007e-06, "loss": 0.946, "step": 2533 }, { "epoch": 0.15131971814164577, "grad_norm": 2.481478452682495, "learning_rate": 9.430694711697965e-06, "loss": 0.8963, "step": 2534 }, { "epoch": 0.1513794338946614, "grad_norm": 1.5285794734954834, "learning_rate": 9.430031185720921e-06, "loss": 0.949, "step": 2535 }, { "epoch": 0.15143914964767705, "grad_norm": 2.5771701335906982, "learning_rate": 9.429367659743879e-06, "loss": 0.9194, "step": 2536 }, { "epoch": 0.1514988654006927, "grad_norm": 2.2049994468688965, "learning_rate": 9.428704133766837e-06, "loss": 0.9585, "step": 2537 }, { "epoch": 0.15155858115370835, "grad_norm": 1.8402272462844849, "learning_rate": 9.428040607789795e-06, "loss": 0.9238, "step": 2538 }, { "epoch": 0.151618296906724, "grad_norm": 2.383366107940674, "learning_rate": 9.427377081812753e-06, "loss": 0.9482, "step": 2539 }, { "epoch": 0.15167801265973965, "grad_norm": 2.6742401123046875, "learning_rate": 9.426713555835712e-06, "loss": 0.9368, "step": 2540 }, { "epoch": 0.1517377284127553, "grad_norm": 2.3830578327178955, "learning_rate": 9.42605002985867e-06, "loss": 0.92, "step": 2541 }, { "epoch": 0.15179744416577093, "grad_norm": 2.370112895965576, "learning_rate": 9.425386503881628e-06, "loss": 0.9201, "step": 2542 }, { "epoch": 0.15185715991878657, "grad_norm": 2.123677968978882, "learning_rate": 9.424722977904586e-06, "loss": 0.9383, "step": 2543 }, { "epoch": 0.15191687567180223, "grad_norm": 2.0539486408233643, "learning_rate": 9.424059451927544e-06, "loss": 0.9104, "step": 2544 }, { "epoch": 0.15197659142481787, "grad_norm": 2.2795095443725586, "learning_rate": 9.423395925950502e-06, "loss": 0.9257, "step": 2545 }, { "epoch": 0.1520363071778335, "grad_norm": 1.8144606351852417, "learning_rate": 9.42273239997346e-06, "loss": 0.9405, "step": 2546 }, { "epoch": 0.15209602293084915, "grad_norm": 3.1618411540985107, "learning_rate": 9.422068873996417e-06, "loss": 0.9752, "step": 2547 }, { "epoch": 0.1521557386838648, "grad_norm": 7.811352729797363, "learning_rate": 9.421405348019376e-06, "loss": 0.9327, "step": 2548 }, { "epoch": 0.15221545443688045, "grad_norm": 2.0064761638641357, "learning_rate": 9.420741822042334e-06, "loss": 0.9485, "step": 2549 }, { "epoch": 0.1522751701898961, "grad_norm": 1.9601904153823853, "learning_rate": 9.420078296065291e-06, "loss": 0.9335, "step": 2550 }, { "epoch": 0.15233488594291175, "grad_norm": 1.9812630414962769, "learning_rate": 9.41941477008825e-06, "loss": 0.926, "step": 2551 }, { "epoch": 0.1523946016959274, "grad_norm": 2.434631109237671, "learning_rate": 9.418751244111207e-06, "loss": 0.9319, "step": 2552 }, { "epoch": 0.15245431744894303, "grad_norm": 3.248684883117676, "learning_rate": 9.418087718134165e-06, "loss": 0.9085, "step": 2553 }, { "epoch": 0.15251403320195867, "grad_norm": 2.7577779293060303, "learning_rate": 9.417424192157125e-06, "loss": 0.9623, "step": 2554 }, { "epoch": 0.15257374895497433, "grad_norm": 2.9907028675079346, "learning_rate": 9.416760666180081e-06, "loss": 0.9233, "step": 2555 }, { "epoch": 0.15263346470798997, "grad_norm": 2.1244802474975586, "learning_rate": 9.41609714020304e-06, "loss": 0.9394, "step": 2556 }, { "epoch": 0.1526931804610056, "grad_norm": 1.8837403059005737, "learning_rate": 9.415433614225998e-06, "loss": 0.948, "step": 2557 }, { "epoch": 0.15275289621402127, "grad_norm": 2.3483569622039795, "learning_rate": 9.414770088248956e-06, "loss": 0.9297, "step": 2558 }, { "epoch": 0.1528126119670369, "grad_norm": 3.631394386291504, "learning_rate": 9.414106562271914e-06, "loss": 0.9281, "step": 2559 }, { "epoch": 0.15287232772005255, "grad_norm": 2.662114381790161, "learning_rate": 9.413443036294872e-06, "loss": 0.9515, "step": 2560 }, { "epoch": 0.15293204347306819, "grad_norm": 1.7810155153274536, "learning_rate": 9.41277951031783e-06, "loss": 0.9222, "step": 2561 }, { "epoch": 0.15299175922608385, "grad_norm": 2.1993799209594727, "learning_rate": 9.412115984340788e-06, "loss": 0.9285, "step": 2562 }, { "epoch": 0.1530514749790995, "grad_norm": 3.194185972213745, "learning_rate": 9.411452458363746e-06, "loss": 0.9539, "step": 2563 }, { "epoch": 0.15311119073211513, "grad_norm": 2.488602876663208, "learning_rate": 9.410788932386704e-06, "loss": 0.9239, "step": 2564 }, { "epoch": 0.15317090648513076, "grad_norm": 2.120797872543335, "learning_rate": 9.41012540640966e-06, "loss": 0.9225, "step": 2565 }, { "epoch": 0.15323062223814643, "grad_norm": 2.188190460205078, "learning_rate": 9.40946188043262e-06, "loss": 0.9551, "step": 2566 }, { "epoch": 0.15329033799116207, "grad_norm": 3.130153179168701, "learning_rate": 9.408798354455577e-06, "loss": 0.9598, "step": 2567 }, { "epoch": 0.1533500537441777, "grad_norm": 2.3434300422668457, "learning_rate": 9.408134828478535e-06, "loss": 0.9303, "step": 2568 }, { "epoch": 0.15340976949719337, "grad_norm": 2.122650623321533, "learning_rate": 9.407471302501495e-06, "loss": 0.9406, "step": 2569 }, { "epoch": 0.153469485250209, "grad_norm": 3.5269339084625244, "learning_rate": 9.406807776524451e-06, "loss": 0.8915, "step": 2570 }, { "epoch": 0.15352920100322465, "grad_norm": 2.2381820678710938, "learning_rate": 9.40614425054741e-06, "loss": 0.9712, "step": 2571 }, { "epoch": 0.15358891675624028, "grad_norm": 3.5187859535217285, "learning_rate": 9.405480724570367e-06, "loss": 0.9683, "step": 2572 }, { "epoch": 0.15364863250925595, "grad_norm": 2.142223596572876, "learning_rate": 9.404817198593325e-06, "loss": 0.9169, "step": 2573 }, { "epoch": 0.1537083482622716, "grad_norm": 2.8215084075927734, "learning_rate": 9.404153672616284e-06, "loss": 0.9473, "step": 2574 }, { "epoch": 0.15376806401528723, "grad_norm": 6.162791728973389, "learning_rate": 9.403490146639242e-06, "loss": 0.9305, "step": 2575 }, { "epoch": 0.1538277797683029, "grad_norm": 2.5042710304260254, "learning_rate": 9.4028266206622e-06, "loss": 0.947, "step": 2576 }, { "epoch": 0.15388749552131853, "grad_norm": 2.4439709186553955, "learning_rate": 9.402163094685158e-06, "loss": 0.9702, "step": 2577 }, { "epoch": 0.15394721127433417, "grad_norm": 3.073810338973999, "learning_rate": 9.401499568708116e-06, "loss": 0.9324, "step": 2578 }, { "epoch": 0.1540069270273498, "grad_norm": 2.449343204498291, "learning_rate": 9.400836042731074e-06, "loss": 0.917, "step": 2579 }, { "epoch": 0.15406664278036547, "grad_norm": 1.8007915019989014, "learning_rate": 9.400172516754032e-06, "loss": 0.9367, "step": 2580 }, { "epoch": 0.1541263585333811, "grad_norm": 5.49234676361084, "learning_rate": 9.39950899077699e-06, "loss": 0.9355, "step": 2581 }, { "epoch": 0.15418607428639675, "grad_norm": 2.743633270263672, "learning_rate": 9.398845464799947e-06, "loss": 0.9152, "step": 2582 }, { "epoch": 0.15424579003941238, "grad_norm": 1.9756160974502563, "learning_rate": 9.398181938822906e-06, "loss": 0.9507, "step": 2583 }, { "epoch": 0.15430550579242805, "grad_norm": 2.407712697982788, "learning_rate": 9.397518412845865e-06, "loss": 0.9228, "step": 2584 }, { "epoch": 0.1543652215454437, "grad_norm": 2.9312117099761963, "learning_rate": 9.396854886868821e-06, "loss": 0.9457, "step": 2585 }, { "epoch": 0.15442493729845932, "grad_norm": 2.4098522663116455, "learning_rate": 9.396191360891779e-06, "loss": 0.9443, "step": 2586 }, { "epoch": 0.154484653051475, "grad_norm": 2.288306474685669, "learning_rate": 9.395527834914737e-06, "loss": 0.9915, "step": 2587 }, { "epoch": 0.15454436880449063, "grad_norm": 4.132940769195557, "learning_rate": 9.394864308937695e-06, "loss": 0.9559, "step": 2588 }, { "epoch": 0.15460408455750627, "grad_norm": 2.5787150859832764, "learning_rate": 9.394200782960653e-06, "loss": 0.921, "step": 2589 }, { "epoch": 0.1546638003105219, "grad_norm": 2.764265298843384, "learning_rate": 9.393537256983611e-06, "loss": 0.9167, "step": 2590 }, { "epoch": 0.15472351606353757, "grad_norm": 2.725383758544922, "learning_rate": 9.39287373100657e-06, "loss": 0.9142, "step": 2591 }, { "epoch": 0.1547832318165532, "grad_norm": 2.557729959487915, "learning_rate": 9.392210205029528e-06, "loss": 0.9566, "step": 2592 }, { "epoch": 0.15484294756956885, "grad_norm": 3.014270782470703, "learning_rate": 9.391546679052486e-06, "loss": 0.9461, "step": 2593 }, { "epoch": 0.1549026633225845, "grad_norm": 2.24782657623291, "learning_rate": 9.390883153075444e-06, "loss": 0.9401, "step": 2594 }, { "epoch": 0.15496237907560015, "grad_norm": 2.7359907627105713, "learning_rate": 9.390219627098402e-06, "loss": 0.9494, "step": 2595 }, { "epoch": 0.1550220948286158, "grad_norm": 3.875990390777588, "learning_rate": 9.38955610112136e-06, "loss": 0.898, "step": 2596 }, { "epoch": 0.15508181058163142, "grad_norm": 2.725748300552368, "learning_rate": 9.388892575144316e-06, "loss": 0.9446, "step": 2597 }, { "epoch": 0.1551415263346471, "grad_norm": 2.492598295211792, "learning_rate": 9.388229049167276e-06, "loss": 0.9502, "step": 2598 }, { "epoch": 0.15520124208766273, "grad_norm": 4.327844619750977, "learning_rate": 9.387565523190234e-06, "loss": 0.9795, "step": 2599 }, { "epoch": 0.15526095784067837, "grad_norm": 3.442169189453125, "learning_rate": 9.38690199721319e-06, "loss": 0.9655, "step": 2600 }, { "epoch": 0.15526095784067837, "eval_text_loss": 0.9612678289413452, "eval_text_runtime": 15.2122, "eval_text_samples_per_second": 262.947, "eval_text_steps_per_second": 0.526, "step": 2600 }, { "epoch": 0.15526095784067837, "eval_image_loss": 0.6963553428649902, "eval_image_runtime": 5.0123, "eval_image_samples_per_second": 798.035, "eval_image_steps_per_second": 1.596, "step": 2600 }, { "epoch": 0.15526095784067837, "eval_video_loss": 1.1733360290527344, "eval_video_runtime": 76.8624, "eval_video_samples_per_second": 52.041, "eval_video_steps_per_second": 0.104, "step": 2600 }, { "epoch": 0.155320673593694, "grad_norm": 2.2289652824401855, "learning_rate": 9.38623847123615e-06, "loss": 0.9463, "step": 2601 }, { "epoch": 0.15538038934670967, "grad_norm": 4.05707311630249, "learning_rate": 9.385574945259107e-06, "loss": 0.961, "step": 2602 }, { "epoch": 0.1554401050997253, "grad_norm": 4.151010513305664, "learning_rate": 9.384911419282065e-06, "loss": 0.9173, "step": 2603 }, { "epoch": 0.15549982085274094, "grad_norm": 3.0005340576171875, "learning_rate": 9.384247893305025e-06, "loss": 0.9302, "step": 2604 }, { "epoch": 0.1555595366057566, "grad_norm": 2.143958568572998, "learning_rate": 9.383584367327981e-06, "loss": 0.9361, "step": 2605 }, { "epoch": 0.15561925235877225, "grad_norm": 1.8464945554733276, "learning_rate": 9.38292084135094e-06, "loss": 0.9502, "step": 2606 }, { "epoch": 0.15567896811178789, "grad_norm": 1.6447837352752686, "learning_rate": 9.382257315373897e-06, "loss": 0.9423, "step": 2607 }, { "epoch": 0.15573868386480352, "grad_norm": 2.5387017726898193, "learning_rate": 9.381593789396856e-06, "loss": 0.8976, "step": 2608 }, { "epoch": 0.1557983996178192, "grad_norm": 1.8570168018341064, "learning_rate": 9.380930263419814e-06, "loss": 0.9518, "step": 2609 }, { "epoch": 0.15585811537083483, "grad_norm": 2.1177260875701904, "learning_rate": 9.380266737442772e-06, "loss": 0.9485, "step": 2610 }, { "epoch": 0.15591783112385046, "grad_norm": 2.264545202255249, "learning_rate": 9.37960321146573e-06, "loss": 0.9722, "step": 2611 }, { "epoch": 0.15597754687686613, "grad_norm": 2.3774046897888184, "learning_rate": 9.378939685488688e-06, "loss": 0.9485, "step": 2612 }, { "epoch": 0.15603726262988177, "grad_norm": 2.0562753677368164, "learning_rate": 9.378276159511646e-06, "loss": 0.9018, "step": 2613 }, { "epoch": 0.1560969783828974, "grad_norm": 2.015251874923706, "learning_rate": 9.377612633534604e-06, "loss": 0.9374, "step": 2614 }, { "epoch": 0.15615669413591304, "grad_norm": 2.46852970123291, "learning_rate": 9.37694910755756e-06, "loss": 0.9068, "step": 2615 }, { "epoch": 0.1562164098889287, "grad_norm": 3.623090982437134, "learning_rate": 9.37628558158052e-06, "loss": 0.926, "step": 2616 }, { "epoch": 0.15627612564194435, "grad_norm": 3.128661870956421, "learning_rate": 9.375622055603477e-06, "loss": 0.9309, "step": 2617 }, { "epoch": 0.15633584139495998, "grad_norm": 2.4337596893310547, "learning_rate": 9.374958529626435e-06, "loss": 0.9163, "step": 2618 }, { "epoch": 0.15639555714797562, "grad_norm": 1.71116304397583, "learning_rate": 9.374295003649395e-06, "loss": 0.9282, "step": 2619 }, { "epoch": 0.1564552729009913, "grad_norm": 5.900782108306885, "learning_rate": 9.373631477672351e-06, "loss": 0.9368, "step": 2620 }, { "epoch": 0.15651498865400693, "grad_norm": 1.9438021183013916, "learning_rate": 9.37296795169531e-06, "loss": 0.923, "step": 2621 }, { "epoch": 0.15657470440702256, "grad_norm": 2.2947065830230713, "learning_rate": 9.372304425718267e-06, "loss": 0.9288, "step": 2622 }, { "epoch": 0.15663442016003823, "grad_norm": 3.3314599990844727, "learning_rate": 9.371640899741225e-06, "loss": 0.9374, "step": 2623 }, { "epoch": 0.15669413591305387, "grad_norm": 2.32771897315979, "learning_rate": 9.370977373764184e-06, "loss": 0.9777, "step": 2624 }, { "epoch": 0.1567538516660695, "grad_norm": 1.9423779249191284, "learning_rate": 9.370313847787142e-06, "loss": 0.9259, "step": 2625 }, { "epoch": 0.15681356741908514, "grad_norm": 2.0998406410217285, "learning_rate": 9.3696503218101e-06, "loss": 0.9537, "step": 2626 }, { "epoch": 0.1568732831721008, "grad_norm": 4.066997051239014, "learning_rate": 9.368986795833058e-06, "loss": 0.9455, "step": 2627 }, { "epoch": 0.15693299892511645, "grad_norm": 2.492661476135254, "learning_rate": 9.368323269856016e-06, "loss": 0.9122, "step": 2628 }, { "epoch": 0.15699271467813208, "grad_norm": 2.7731447219848633, "learning_rate": 9.367659743878974e-06, "loss": 0.9591, "step": 2629 }, { "epoch": 0.15705243043114775, "grad_norm": 2.0535106658935547, "learning_rate": 9.366996217901932e-06, "loss": 0.898, "step": 2630 }, { "epoch": 0.1571121461841634, "grad_norm": 2.695718288421631, "learning_rate": 9.36633269192489e-06, "loss": 0.9194, "step": 2631 }, { "epoch": 0.15717186193717902, "grad_norm": 2.4253013134002686, "learning_rate": 9.365669165947847e-06, "loss": 0.9398, "step": 2632 }, { "epoch": 0.15723157769019466, "grad_norm": 2.074904203414917, "learning_rate": 9.365005639970806e-06, "loss": 0.9425, "step": 2633 }, { "epoch": 0.15729129344321033, "grad_norm": 2.3460402488708496, "learning_rate": 9.364342113993765e-06, "loss": 0.9399, "step": 2634 }, { "epoch": 0.15735100919622597, "grad_norm": 1.6913583278656006, "learning_rate": 9.363678588016721e-06, "loss": 0.957, "step": 2635 }, { "epoch": 0.1574107249492416, "grad_norm": 3.07513689994812, "learning_rate": 9.363015062039679e-06, "loss": 0.9363, "step": 2636 }, { "epoch": 0.15747044070225724, "grad_norm": 1.9973217248916626, "learning_rate": 9.362351536062637e-06, "loss": 0.9609, "step": 2637 }, { "epoch": 0.1575301564552729, "grad_norm": 3.3593733310699463, "learning_rate": 9.361688010085595e-06, "loss": 0.9289, "step": 2638 }, { "epoch": 0.15758987220828855, "grad_norm": 3.633345603942871, "learning_rate": 9.361024484108553e-06, "loss": 0.9721, "step": 2639 }, { "epoch": 0.15764958796130418, "grad_norm": 1.9935342073440552, "learning_rate": 9.360360958131511e-06, "loss": 0.9627, "step": 2640 }, { "epoch": 0.15770930371431985, "grad_norm": 2.013291835784912, "learning_rate": 9.35969743215447e-06, "loss": 0.9388, "step": 2641 }, { "epoch": 0.1577690194673355, "grad_norm": 2.817845582962036, "learning_rate": 9.359033906177428e-06, "loss": 0.9532, "step": 2642 }, { "epoch": 0.15782873522035112, "grad_norm": 2.1377546787261963, "learning_rate": 9.358370380200386e-06, "loss": 0.9268, "step": 2643 }, { "epoch": 0.15788845097336676, "grad_norm": 2.0677900314331055, "learning_rate": 9.357706854223344e-06, "loss": 0.9642, "step": 2644 }, { "epoch": 0.15794816672638243, "grad_norm": 4.737471103668213, "learning_rate": 9.357043328246302e-06, "loss": 0.982, "step": 2645 }, { "epoch": 0.15800788247939807, "grad_norm": 1.6958378553390503, "learning_rate": 9.35637980226926e-06, "loss": 0.939, "step": 2646 }, { "epoch": 0.1580675982324137, "grad_norm": 8.680261611938477, "learning_rate": 9.355716276292216e-06, "loss": 0.9283, "step": 2647 }, { "epoch": 0.15812731398542937, "grad_norm": 2.262944221496582, "learning_rate": 9.355052750315176e-06, "loss": 0.941, "step": 2648 }, { "epoch": 0.158187029738445, "grad_norm": 2.4903297424316406, "learning_rate": 9.354389224338134e-06, "loss": 0.9356, "step": 2649 }, { "epoch": 0.15824674549146064, "grad_norm": 2.4521682262420654, "learning_rate": 9.35372569836109e-06, "loss": 0.9357, "step": 2650 }, { "epoch": 0.15830646124447628, "grad_norm": 2.3401856422424316, "learning_rate": 9.35306217238405e-06, "loss": 0.9458, "step": 2651 }, { "epoch": 0.15836617699749195, "grad_norm": 3.3407654762268066, "learning_rate": 9.352398646407007e-06, "loss": 0.9605, "step": 2652 }, { "epoch": 0.15842589275050759, "grad_norm": 2.175250768661499, "learning_rate": 9.351735120429965e-06, "loss": 0.925, "step": 2653 }, { "epoch": 0.15848560850352322, "grad_norm": 2.6727044582366943, "learning_rate": 9.351071594452925e-06, "loss": 0.9777, "step": 2654 }, { "epoch": 0.1585453242565389, "grad_norm": 2.4338481426239014, "learning_rate": 9.350408068475881e-06, "loss": 0.9439, "step": 2655 }, { "epoch": 0.15860504000955453, "grad_norm": 3.081456184387207, "learning_rate": 9.34974454249884e-06, "loss": 0.9683, "step": 2656 }, { "epoch": 0.15866475576257016, "grad_norm": 8.02485179901123, "learning_rate": 9.349081016521797e-06, "loss": 0.9741, "step": 2657 }, { "epoch": 0.1587244715155858, "grad_norm": 2.194138526916504, "learning_rate": 9.348417490544756e-06, "loss": 0.9781, "step": 2658 }, { "epoch": 0.15878418726860147, "grad_norm": 4.7503509521484375, "learning_rate": 9.347753964567714e-06, "loss": 0.9484, "step": 2659 }, { "epoch": 0.1588439030216171, "grad_norm": 3.3565409183502197, "learning_rate": 9.347090438590672e-06, "loss": 0.9362, "step": 2660 }, { "epoch": 0.15890361877463274, "grad_norm": 1.8521794080734253, "learning_rate": 9.34642691261363e-06, "loss": 0.9382, "step": 2661 }, { "epoch": 0.15896333452764838, "grad_norm": 2.3411834239959717, "learning_rate": 9.345763386636588e-06, "loss": 0.9358, "step": 2662 }, { "epoch": 0.15902305028066405, "grad_norm": 3.518367290496826, "learning_rate": 9.345099860659546e-06, "loss": 0.9171, "step": 2663 }, { "epoch": 0.15908276603367968, "grad_norm": 3.9742064476013184, "learning_rate": 9.344436334682504e-06, "loss": 0.9261, "step": 2664 }, { "epoch": 0.15914248178669532, "grad_norm": 1.8422690629959106, "learning_rate": 9.34377280870546e-06, "loss": 0.9524, "step": 2665 }, { "epoch": 0.159202197539711, "grad_norm": 2.3522517681121826, "learning_rate": 9.34310928272842e-06, "loss": 0.9391, "step": 2666 }, { "epoch": 0.15926191329272663, "grad_norm": 5.828465461730957, "learning_rate": 9.342445756751377e-06, "loss": 0.9212, "step": 2667 }, { "epoch": 0.15932162904574226, "grad_norm": 2.258413791656494, "learning_rate": 9.341782230774335e-06, "loss": 0.9288, "step": 2668 }, { "epoch": 0.1593813447987579, "grad_norm": 2.7827606201171875, "learning_rate": 9.341118704797295e-06, "loss": 0.9351, "step": 2669 }, { "epoch": 0.15944106055177357, "grad_norm": 3.144165277481079, "learning_rate": 9.340455178820251e-06, "loss": 0.9503, "step": 2670 }, { "epoch": 0.1595007763047892, "grad_norm": 2.184136390686035, "learning_rate": 9.339791652843209e-06, "loss": 0.902, "step": 2671 }, { "epoch": 0.15956049205780484, "grad_norm": 3.8499794006347656, "learning_rate": 9.339128126866167e-06, "loss": 0.9287, "step": 2672 }, { "epoch": 0.1596202078108205, "grad_norm": 2.6497645378112793, "learning_rate": 9.338464600889125e-06, "loss": 0.9677, "step": 2673 }, { "epoch": 0.15967992356383615, "grad_norm": 3.838345766067505, "learning_rate": 9.337801074912083e-06, "loss": 0.9559, "step": 2674 }, { "epoch": 0.15973963931685178, "grad_norm": 4.1335320472717285, "learning_rate": 9.337137548935042e-06, "loss": 0.8991, "step": 2675 }, { "epoch": 0.15979935506986742, "grad_norm": 3.045543909072876, "learning_rate": 9.336474022958e-06, "loss": 0.917, "step": 2676 }, { "epoch": 0.1598590708228831, "grad_norm": 2.2888059616088867, "learning_rate": 9.335810496980958e-06, "loss": 0.9419, "step": 2677 }, { "epoch": 0.15991878657589872, "grad_norm": 2.2033207416534424, "learning_rate": 9.335146971003916e-06, "loss": 0.9514, "step": 2678 }, { "epoch": 0.15997850232891436, "grad_norm": 2.2921066284179688, "learning_rate": 9.334483445026874e-06, "loss": 0.9503, "step": 2679 }, { "epoch": 0.16003821808193, "grad_norm": 2.104685068130493, "learning_rate": 9.333819919049832e-06, "loss": 0.9619, "step": 2680 }, { "epoch": 0.16009793383494567, "grad_norm": 4.065999984741211, "learning_rate": 9.33315639307279e-06, "loss": 0.9625, "step": 2681 }, { "epoch": 0.1601576495879613, "grad_norm": 2.545896530151367, "learning_rate": 9.332492867095747e-06, "loss": 0.9254, "step": 2682 }, { "epoch": 0.16021736534097694, "grad_norm": 2.0110561847686768, "learning_rate": 9.331829341118706e-06, "loss": 0.9288, "step": 2683 }, { "epoch": 0.1602770810939926, "grad_norm": 2.2401535511016846, "learning_rate": 9.331165815141664e-06, "loss": 0.9342, "step": 2684 }, { "epoch": 0.16033679684700824, "grad_norm": 2.2573201656341553, "learning_rate": 9.330502289164621e-06, "loss": 0.9515, "step": 2685 }, { "epoch": 0.16039651260002388, "grad_norm": 2.2844693660736084, "learning_rate": 9.329838763187579e-06, "loss": 0.927, "step": 2686 }, { "epoch": 0.16045622835303952, "grad_norm": 2.1798298358917236, "learning_rate": 9.329175237210537e-06, "loss": 0.9554, "step": 2687 }, { "epoch": 0.1605159441060552, "grad_norm": 2.5097532272338867, "learning_rate": 9.328511711233495e-06, "loss": 0.9217, "step": 2688 }, { "epoch": 0.16057565985907082, "grad_norm": 2.1608455181121826, "learning_rate": 9.327848185256453e-06, "loss": 0.9267, "step": 2689 }, { "epoch": 0.16063537561208646, "grad_norm": 2.2027077674865723, "learning_rate": 9.327184659279411e-06, "loss": 0.9495, "step": 2690 }, { "epoch": 0.16069509136510213, "grad_norm": 3.364063262939453, "learning_rate": 9.32652113330237e-06, "loss": 0.954, "step": 2691 }, { "epoch": 0.16075480711811777, "grad_norm": 2.948009490966797, "learning_rate": 9.325857607325328e-06, "loss": 0.9538, "step": 2692 }, { "epoch": 0.1608145228711334, "grad_norm": 2.1509060859680176, "learning_rate": 9.325194081348286e-06, "loss": 0.9178, "step": 2693 }, { "epoch": 0.16087423862414904, "grad_norm": 1.8865604400634766, "learning_rate": 9.324530555371244e-06, "loss": 0.9334, "step": 2694 }, { "epoch": 0.1609339543771647, "grad_norm": 2.324319362640381, "learning_rate": 9.323867029394202e-06, "loss": 0.893, "step": 2695 }, { "epoch": 0.16099367013018034, "grad_norm": 3.1093268394470215, "learning_rate": 9.32320350341716e-06, "loss": 0.9071, "step": 2696 }, { "epoch": 0.16105338588319598, "grad_norm": 2.0624642372131348, "learning_rate": 9.322539977440116e-06, "loss": 0.8846, "step": 2697 }, { "epoch": 0.16111310163621162, "grad_norm": 2.6106860637664795, "learning_rate": 9.321876451463076e-06, "loss": 0.9456, "step": 2698 }, { "epoch": 0.16117281738922729, "grad_norm": 2.1260569095611572, "learning_rate": 9.321212925486034e-06, "loss": 0.9329, "step": 2699 }, { "epoch": 0.16123253314224292, "grad_norm": 1.6342856884002686, "learning_rate": 9.32054939950899e-06, "loss": 0.9179, "step": 2700 }, { "epoch": 0.16129224889525856, "grad_norm": 2.2537035942077637, "learning_rate": 9.31988587353195e-06, "loss": 0.9214, "step": 2701 }, { "epoch": 0.16135196464827423, "grad_norm": 2.8710381984710693, "learning_rate": 9.319222347554907e-06, "loss": 0.9074, "step": 2702 }, { "epoch": 0.16141168040128986, "grad_norm": 2.0701098442077637, "learning_rate": 9.318558821577865e-06, "loss": 0.9386, "step": 2703 }, { "epoch": 0.1614713961543055, "grad_norm": 3.0611517429351807, "learning_rate": 9.317895295600825e-06, "loss": 0.9284, "step": 2704 }, { "epoch": 0.16153111190732114, "grad_norm": 2.489089012145996, "learning_rate": 9.317231769623781e-06, "loss": 0.9321, "step": 2705 }, { "epoch": 0.1615908276603368, "grad_norm": 2.2420811653137207, "learning_rate": 9.31656824364674e-06, "loss": 0.9321, "step": 2706 }, { "epoch": 0.16165054341335244, "grad_norm": 2.1280858516693115, "learning_rate": 9.315904717669697e-06, "loss": 0.9767, "step": 2707 }, { "epoch": 0.16171025916636808, "grad_norm": 2.555314302444458, "learning_rate": 9.315241191692655e-06, "loss": 0.9071, "step": 2708 }, { "epoch": 0.16176997491938375, "grad_norm": 2.732250928878784, "learning_rate": 9.314577665715614e-06, "loss": 0.9332, "step": 2709 }, { "epoch": 0.16182969067239938, "grad_norm": 1.9286466836929321, "learning_rate": 9.313914139738572e-06, "loss": 0.9018, "step": 2710 }, { "epoch": 0.16188940642541502, "grad_norm": 2.8770217895507812, "learning_rate": 9.31325061376153e-06, "loss": 0.9416, "step": 2711 }, { "epoch": 0.16194912217843066, "grad_norm": 2.899885416030884, "learning_rate": 9.312587087784488e-06, "loss": 0.9088, "step": 2712 }, { "epoch": 0.16200883793144633, "grad_norm": 4.154971599578857, "learning_rate": 9.311923561807446e-06, "loss": 0.9264, "step": 2713 }, { "epoch": 0.16206855368446196, "grad_norm": 1.8074275255203247, "learning_rate": 9.311260035830404e-06, "loss": 0.9077, "step": 2714 }, { "epoch": 0.1621282694374776, "grad_norm": 2.0057151317596436, "learning_rate": 9.31059650985336e-06, "loss": 0.8938, "step": 2715 }, { "epoch": 0.16218798519049324, "grad_norm": 3.3175418376922607, "learning_rate": 9.30993298387632e-06, "loss": 0.9396, "step": 2716 }, { "epoch": 0.1622477009435089, "grad_norm": 1.9548710584640503, "learning_rate": 9.309269457899277e-06, "loss": 0.9554, "step": 2717 }, { "epoch": 0.16230741669652454, "grad_norm": 3.298783540725708, "learning_rate": 9.308605931922235e-06, "loss": 0.9577, "step": 2718 }, { "epoch": 0.16236713244954018, "grad_norm": 2.321518659591675, "learning_rate": 9.307942405945195e-06, "loss": 0.9532, "step": 2719 }, { "epoch": 0.16242684820255585, "grad_norm": 2.451794385910034, "learning_rate": 9.307278879968151e-06, "loss": 0.9346, "step": 2720 }, { "epoch": 0.16248656395557148, "grad_norm": 2.9110217094421387, "learning_rate": 9.306615353991109e-06, "loss": 0.9552, "step": 2721 }, { "epoch": 0.16254627970858712, "grad_norm": 3.3712258338928223, "learning_rate": 9.305951828014067e-06, "loss": 0.9363, "step": 2722 }, { "epoch": 0.16260599546160276, "grad_norm": 5.217428207397461, "learning_rate": 9.305288302037025e-06, "loss": 0.9985, "step": 2723 }, { "epoch": 0.16266571121461842, "grad_norm": 2.1604907512664795, "learning_rate": 9.304624776059983e-06, "loss": 0.9081, "step": 2724 }, { "epoch": 0.16272542696763406, "grad_norm": 2.1023213863372803, "learning_rate": 9.303961250082941e-06, "loss": 0.9356, "step": 2725 }, { "epoch": 0.1627851427206497, "grad_norm": 3.3734171390533447, "learning_rate": 9.3032977241059e-06, "loss": 0.9395, "step": 2726 }, { "epoch": 0.16284485847366537, "grad_norm": 2.280426502227783, "learning_rate": 9.302634198128858e-06, "loss": 0.936, "step": 2727 }, { "epoch": 0.162904574226681, "grad_norm": 2.395514488220215, "learning_rate": 9.301970672151816e-06, "loss": 0.9124, "step": 2728 }, { "epoch": 0.16296428997969664, "grad_norm": 2.6505348682403564, "learning_rate": 9.301307146174774e-06, "loss": 0.9066, "step": 2729 }, { "epoch": 0.16302400573271228, "grad_norm": 2.219331741333008, "learning_rate": 9.300643620197732e-06, "loss": 0.9394, "step": 2730 }, { "epoch": 0.16308372148572794, "grad_norm": 3.148611068725586, "learning_rate": 9.29998009422069e-06, "loss": 0.9751, "step": 2731 }, { "epoch": 0.16314343723874358, "grad_norm": 2.3238117694854736, "learning_rate": 9.299316568243647e-06, "loss": 0.9065, "step": 2732 }, { "epoch": 0.16320315299175922, "grad_norm": 1.842363953590393, "learning_rate": 9.298653042266606e-06, "loss": 0.957, "step": 2733 }, { "epoch": 0.16326286874477486, "grad_norm": 2.511692523956299, "learning_rate": 9.297989516289564e-06, "loss": 0.9321, "step": 2734 }, { "epoch": 0.16332258449779052, "grad_norm": 1.9431432485580444, "learning_rate": 9.29732599031252e-06, "loss": 0.9519, "step": 2735 }, { "epoch": 0.16338230025080616, "grad_norm": 2.5650312900543213, "learning_rate": 9.296662464335479e-06, "loss": 0.9364, "step": 2736 }, { "epoch": 0.1634420160038218, "grad_norm": 2.1724789142608643, "learning_rate": 9.295998938358437e-06, "loss": 0.8991, "step": 2737 }, { "epoch": 0.16350173175683747, "grad_norm": 3.39560866355896, "learning_rate": 9.295335412381395e-06, "loss": 0.911, "step": 2738 }, { "epoch": 0.1635614475098531, "grad_norm": 2.5002331733703613, "learning_rate": 9.294671886404353e-06, "loss": 0.9476, "step": 2739 }, { "epoch": 0.16362116326286874, "grad_norm": 2.8889808654785156, "learning_rate": 9.294008360427311e-06, "loss": 0.9313, "step": 2740 }, { "epoch": 0.16368087901588438, "grad_norm": 2.6118173599243164, "learning_rate": 9.29334483445027e-06, "loss": 0.9708, "step": 2741 }, { "epoch": 0.16374059476890004, "grad_norm": 2.4693219661712646, "learning_rate": 9.292681308473228e-06, "loss": 0.9171, "step": 2742 }, { "epoch": 0.16380031052191568, "grad_norm": 2.351414918899536, "learning_rate": 9.292017782496186e-06, "loss": 0.9562, "step": 2743 }, { "epoch": 0.16386002627493132, "grad_norm": 2.707585334777832, "learning_rate": 9.291354256519144e-06, "loss": 0.9444, "step": 2744 }, { "epoch": 0.16391974202794699, "grad_norm": 1.9648339748382568, "learning_rate": 9.290690730542102e-06, "loss": 0.9096, "step": 2745 }, { "epoch": 0.16397945778096262, "grad_norm": 3.9459221363067627, "learning_rate": 9.29002720456506e-06, "loss": 0.9243, "step": 2746 }, { "epoch": 0.16403917353397826, "grad_norm": 2.078425645828247, "learning_rate": 9.289363678588016e-06, "loss": 0.9102, "step": 2747 }, { "epoch": 0.1640988892869939, "grad_norm": 2.7227635383605957, "learning_rate": 9.288700152610976e-06, "loss": 0.9312, "step": 2748 }, { "epoch": 0.16415860504000956, "grad_norm": 1.846681833267212, "learning_rate": 9.288036626633934e-06, "loss": 0.9352, "step": 2749 }, { "epoch": 0.1642183207930252, "grad_norm": 2.6907362937927246, "learning_rate": 9.28737310065689e-06, "loss": 0.9387, "step": 2750 }, { "epoch": 0.16427803654604084, "grad_norm": 2.5129222869873047, "learning_rate": 9.28670957467985e-06, "loss": 0.901, "step": 2751 }, { "epoch": 0.16433775229905648, "grad_norm": 2.0229480266571045, "learning_rate": 9.286046048702807e-06, "loss": 0.9079, "step": 2752 }, { "epoch": 0.16439746805207214, "grad_norm": 1.8906447887420654, "learning_rate": 9.285382522725765e-06, "loss": 0.9305, "step": 2753 }, { "epoch": 0.16445718380508778, "grad_norm": 2.4056248664855957, "learning_rate": 9.284718996748725e-06, "loss": 0.9461, "step": 2754 }, { "epoch": 0.16451689955810342, "grad_norm": 1.9975606203079224, "learning_rate": 9.284055470771681e-06, "loss": 0.9453, "step": 2755 }, { "epoch": 0.16457661531111908, "grad_norm": 3.3341472148895264, "learning_rate": 9.28339194479464e-06, "loss": 0.9312, "step": 2756 }, { "epoch": 0.16463633106413472, "grad_norm": 2.4038448333740234, "learning_rate": 9.282728418817597e-06, "loss": 0.9133, "step": 2757 }, { "epoch": 0.16469604681715036, "grad_norm": 1.9144282341003418, "learning_rate": 9.282064892840555e-06, "loss": 0.9037, "step": 2758 }, { "epoch": 0.164755762570166, "grad_norm": 2.4640655517578125, "learning_rate": 9.281401366863514e-06, "loss": 0.9056, "step": 2759 }, { "epoch": 0.16481547832318166, "grad_norm": 4.06308126449585, "learning_rate": 9.280737840886472e-06, "loss": 0.9765, "step": 2760 }, { "epoch": 0.1648751940761973, "grad_norm": 2.030510902404785, "learning_rate": 9.28007431490943e-06, "loss": 0.9206, "step": 2761 }, { "epoch": 0.16493490982921294, "grad_norm": 3.9294638633728027, "learning_rate": 9.279410788932388e-06, "loss": 0.9545, "step": 2762 }, { "epoch": 0.1649946255822286, "grad_norm": 2.2740631103515625, "learning_rate": 9.278747262955346e-06, "loss": 0.9406, "step": 2763 }, { "epoch": 0.16505434133524424, "grad_norm": 2.053527593612671, "learning_rate": 9.278083736978304e-06, "loss": 0.9207, "step": 2764 }, { "epoch": 0.16511405708825988, "grad_norm": 2.3104960918426514, "learning_rate": 9.27742021100126e-06, "loss": 0.9385, "step": 2765 }, { "epoch": 0.16517377284127552, "grad_norm": 2.5524721145629883, "learning_rate": 9.27675668502422e-06, "loss": 0.938, "step": 2766 }, { "epoch": 0.16523348859429118, "grad_norm": 1.6970679759979248, "learning_rate": 9.276093159047177e-06, "loss": 0.927, "step": 2767 }, { "epoch": 0.16529320434730682, "grad_norm": 5.659600734710693, "learning_rate": 9.275429633070135e-06, "loss": 0.9634, "step": 2768 }, { "epoch": 0.16535292010032246, "grad_norm": 2.3264517784118652, "learning_rate": 9.274766107093095e-06, "loss": 0.9692, "step": 2769 }, { "epoch": 0.1654126358533381, "grad_norm": 2.190098762512207, "learning_rate": 9.274102581116051e-06, "loss": 0.9321, "step": 2770 }, { "epoch": 0.16547235160635376, "grad_norm": 1.871661901473999, "learning_rate": 9.273439055139009e-06, "loss": 0.9354, "step": 2771 }, { "epoch": 0.1655320673593694, "grad_norm": 2.147315502166748, "learning_rate": 9.272775529161967e-06, "loss": 0.9168, "step": 2772 }, { "epoch": 0.16559178311238504, "grad_norm": 3.7177062034606934, "learning_rate": 9.272112003184925e-06, "loss": 0.9402, "step": 2773 }, { "epoch": 0.1656514988654007, "grad_norm": 2.360808849334717, "learning_rate": 9.271448477207883e-06, "loss": 0.9593, "step": 2774 }, { "epoch": 0.16571121461841634, "grad_norm": 2.0025885105133057, "learning_rate": 9.270784951230841e-06, "loss": 0.9659, "step": 2775 }, { "epoch": 0.16577093037143198, "grad_norm": 1.9015686511993408, "learning_rate": 9.2701214252538e-06, "loss": 0.935, "step": 2776 }, { "epoch": 0.16583064612444762, "grad_norm": 2.692394971847534, "learning_rate": 9.269457899276758e-06, "loss": 0.9597, "step": 2777 }, { "epoch": 0.16589036187746328, "grad_norm": 2.4478304386138916, "learning_rate": 9.268794373299716e-06, "loss": 0.9374, "step": 2778 }, { "epoch": 0.16595007763047892, "grad_norm": 2.2546634674072266, "learning_rate": 9.268130847322674e-06, "loss": 0.9495, "step": 2779 }, { "epoch": 0.16600979338349456, "grad_norm": 2.5008299350738525, "learning_rate": 9.267467321345632e-06, "loss": 0.9108, "step": 2780 }, { "epoch": 0.16606950913651022, "grad_norm": 2.1958694458007812, "learning_rate": 9.26680379536859e-06, "loss": 0.9259, "step": 2781 }, { "epoch": 0.16612922488952586, "grad_norm": 3.128352403640747, "learning_rate": 9.266140269391546e-06, "loss": 0.9202, "step": 2782 }, { "epoch": 0.1661889406425415, "grad_norm": 2.2428431510925293, "learning_rate": 9.265476743414506e-06, "loss": 0.9202, "step": 2783 }, { "epoch": 0.16624865639555714, "grad_norm": 1.9108067750930786, "learning_rate": 9.264813217437464e-06, "loss": 0.948, "step": 2784 }, { "epoch": 0.1663083721485728, "grad_norm": 2.8875815868377686, "learning_rate": 9.26414969146042e-06, "loss": 0.9374, "step": 2785 }, { "epoch": 0.16636808790158844, "grad_norm": 2.0377297401428223, "learning_rate": 9.263486165483379e-06, "loss": 0.9069, "step": 2786 }, { "epoch": 0.16642780365460408, "grad_norm": 4.678360462188721, "learning_rate": 9.262822639506337e-06, "loss": 0.9354, "step": 2787 }, { "epoch": 0.16648751940761972, "grad_norm": 1.9328100681304932, "learning_rate": 9.262159113529295e-06, "loss": 0.921, "step": 2788 }, { "epoch": 0.16654723516063538, "grad_norm": 2.6620352268218994, "learning_rate": 9.261495587552253e-06, "loss": 0.9307, "step": 2789 }, { "epoch": 0.16660695091365102, "grad_norm": 2.66020131111145, "learning_rate": 9.260832061575211e-06, "loss": 0.9207, "step": 2790 }, { "epoch": 0.16666666666666666, "grad_norm": 6.544478893280029, "learning_rate": 9.26016853559817e-06, "loss": 0.9241, "step": 2791 }, { "epoch": 0.16672638241968232, "grad_norm": 2.527632236480713, "learning_rate": 9.259505009621127e-06, "loss": 0.9414, "step": 2792 }, { "epoch": 0.16678609817269796, "grad_norm": 2.3375654220581055, "learning_rate": 9.258841483644086e-06, "loss": 0.924, "step": 2793 }, { "epoch": 0.1668458139257136, "grad_norm": 2.064525604248047, "learning_rate": 9.258177957667044e-06, "loss": 0.915, "step": 2794 }, { "epoch": 0.16690552967872924, "grad_norm": 2.747225046157837, "learning_rate": 9.257514431690002e-06, "loss": 0.9131, "step": 2795 }, { "epoch": 0.1669652454317449, "grad_norm": 2.2207274436950684, "learning_rate": 9.25685090571296e-06, "loss": 0.9473, "step": 2796 }, { "epoch": 0.16702496118476054, "grad_norm": 2.8467559814453125, "learning_rate": 9.256187379735916e-06, "loss": 0.9202, "step": 2797 }, { "epoch": 0.16708467693777618, "grad_norm": 3.155768394470215, "learning_rate": 9.255523853758876e-06, "loss": 0.9481, "step": 2798 }, { "epoch": 0.16714439269079184, "grad_norm": 2.0632717609405518, "learning_rate": 9.254860327781834e-06, "loss": 0.9222, "step": 2799 }, { "epoch": 0.16720410844380748, "grad_norm": 1.9677320718765259, "learning_rate": 9.25419680180479e-06, "loss": 0.9017, "step": 2800 }, { "epoch": 0.16720410844380748, "eval_text_loss": 0.960015594959259, "eval_text_runtime": 15.1676, "eval_text_samples_per_second": 263.72, "eval_text_steps_per_second": 0.527, "step": 2800 }, { "epoch": 0.16720410844380748, "eval_image_loss": 0.693382740020752, "eval_image_runtime": 5.2123, "eval_image_samples_per_second": 767.416, "eval_image_steps_per_second": 1.535, "step": 2800 }, { "epoch": 0.16720410844380748, "eval_video_loss": 1.1702890396118164, "eval_video_runtime": 77.5207, "eval_video_samples_per_second": 51.599, "eval_video_steps_per_second": 0.103, "step": 2800 }, { "epoch": 0.16726382419682312, "grad_norm": 2.121821880340576, "learning_rate": 9.25353327582775e-06, "loss": 0.9435, "step": 2801 }, { "epoch": 0.16732353994983876, "grad_norm": 1.9149880409240723, "learning_rate": 9.252869749850707e-06, "loss": 0.9573, "step": 2802 }, { "epoch": 0.16738325570285442, "grad_norm": 2.133972406387329, "learning_rate": 9.252206223873665e-06, "loss": 0.9172, "step": 2803 }, { "epoch": 0.16744297145587006, "grad_norm": 3.039686918258667, "learning_rate": 9.251542697896625e-06, "loss": 0.945, "step": 2804 }, { "epoch": 0.1675026872088857, "grad_norm": 1.9200196266174316, "learning_rate": 9.250879171919581e-06, "loss": 0.9475, "step": 2805 }, { "epoch": 0.16756240296190136, "grad_norm": 1.6259435415267944, "learning_rate": 9.25021564594254e-06, "loss": 0.9007, "step": 2806 }, { "epoch": 0.167622118714917, "grad_norm": 2.056816339492798, "learning_rate": 9.249552119965497e-06, "loss": 0.9648, "step": 2807 }, { "epoch": 0.16768183446793264, "grad_norm": 2.5338971614837646, "learning_rate": 9.248888593988455e-06, "loss": 0.9349, "step": 2808 }, { "epoch": 0.16774155022094828, "grad_norm": 2.7359774112701416, "learning_rate": 9.248225068011413e-06, "loss": 0.904, "step": 2809 }, { "epoch": 0.16780126597396394, "grad_norm": 3.442486524581909, "learning_rate": 9.247561542034372e-06, "loss": 0.9498, "step": 2810 }, { "epoch": 0.16786098172697958, "grad_norm": 2.6651482582092285, "learning_rate": 9.24689801605733e-06, "loss": 0.9552, "step": 2811 }, { "epoch": 0.16792069747999522, "grad_norm": 2.352501153945923, "learning_rate": 9.246234490080288e-06, "loss": 0.8906, "step": 2812 }, { "epoch": 0.16798041323301086, "grad_norm": 2.4257161617279053, "learning_rate": 9.245570964103246e-06, "loss": 0.9065, "step": 2813 }, { "epoch": 0.16804012898602652, "grad_norm": 5.287574768066406, "learning_rate": 9.244907438126204e-06, "loss": 0.9461, "step": 2814 }, { "epoch": 0.16809984473904216, "grad_norm": 2.0448591709136963, "learning_rate": 9.24424391214916e-06, "loss": 0.9269, "step": 2815 }, { "epoch": 0.1681595604920578, "grad_norm": 3.4642693996429443, "learning_rate": 9.24358038617212e-06, "loss": 0.9052, "step": 2816 }, { "epoch": 0.16821927624507346, "grad_norm": 2.1033382415771484, "learning_rate": 9.242916860195077e-06, "loss": 0.9279, "step": 2817 }, { "epoch": 0.1682789919980891, "grad_norm": 3.0744147300720215, "learning_rate": 9.242253334218035e-06, "loss": 0.9359, "step": 2818 }, { "epoch": 0.16833870775110474, "grad_norm": 7.214621543884277, "learning_rate": 9.241589808240994e-06, "loss": 0.9637, "step": 2819 }, { "epoch": 0.16839842350412038, "grad_norm": 3.9003653526306152, "learning_rate": 9.240926282263951e-06, "loss": 0.9674, "step": 2820 }, { "epoch": 0.16845813925713604, "grad_norm": 1.9257365465164185, "learning_rate": 9.240262756286909e-06, "loss": 0.9492, "step": 2821 }, { "epoch": 0.16851785501015168, "grad_norm": 2.9068973064422607, "learning_rate": 9.239599230309867e-06, "loss": 0.9225, "step": 2822 }, { "epoch": 0.16857757076316732, "grad_norm": 2.375751495361328, "learning_rate": 9.238935704332825e-06, "loss": 0.9635, "step": 2823 }, { "epoch": 0.16863728651618298, "grad_norm": 2.4720165729522705, "learning_rate": 9.238272178355783e-06, "loss": 0.9513, "step": 2824 }, { "epoch": 0.16869700226919862, "grad_norm": 2.1108920574188232, "learning_rate": 9.237608652378741e-06, "loss": 0.942, "step": 2825 }, { "epoch": 0.16875671802221426, "grad_norm": 3.642011880874634, "learning_rate": 9.2369451264017e-06, "loss": 0.9194, "step": 2826 }, { "epoch": 0.1688164337752299, "grad_norm": 2.792661666870117, "learning_rate": 9.236281600424658e-06, "loss": 0.9246, "step": 2827 }, { "epoch": 0.16887614952824556, "grad_norm": 3.7718114852905273, "learning_rate": 9.235618074447616e-06, "loss": 0.9218, "step": 2828 }, { "epoch": 0.1689358652812612, "grad_norm": 3.9202239513397217, "learning_rate": 9.234954548470574e-06, "loss": 0.9173, "step": 2829 }, { "epoch": 0.16899558103427684, "grad_norm": 3.4207522869110107, "learning_rate": 9.234291022493532e-06, "loss": 0.944, "step": 2830 }, { "epoch": 0.16905529678729248, "grad_norm": 2.1750104427337646, "learning_rate": 9.23362749651649e-06, "loss": 0.9021, "step": 2831 }, { "epoch": 0.16911501254030814, "grad_norm": 1.9095962047576904, "learning_rate": 9.232963970539446e-06, "loss": 0.9284, "step": 2832 }, { "epoch": 0.16917472829332378, "grad_norm": 2.2496776580810547, "learning_rate": 9.232300444562406e-06, "loss": 0.9144, "step": 2833 }, { "epoch": 0.16923444404633942, "grad_norm": 4.431883335113525, "learning_rate": 9.231636918585364e-06, "loss": 0.9112, "step": 2834 }, { "epoch": 0.16929415979935508, "grad_norm": 4.304491996765137, "learning_rate": 9.23097339260832e-06, "loss": 0.9229, "step": 2835 }, { "epoch": 0.16935387555237072, "grad_norm": 2.576601505279541, "learning_rate": 9.230309866631279e-06, "loss": 0.9083, "step": 2836 }, { "epoch": 0.16941359130538636, "grad_norm": 1.8399441242218018, "learning_rate": 9.229646340654237e-06, "loss": 0.9281, "step": 2837 }, { "epoch": 0.169473307058402, "grad_norm": 1.8258858919143677, "learning_rate": 9.228982814677195e-06, "loss": 0.9808, "step": 2838 }, { "epoch": 0.16953302281141766, "grad_norm": 4.30776309967041, "learning_rate": 9.228319288700153e-06, "loss": 0.947, "step": 2839 }, { "epoch": 0.1695927385644333, "grad_norm": 3.731858015060425, "learning_rate": 9.227655762723111e-06, "loss": 0.9362, "step": 2840 }, { "epoch": 0.16965245431744894, "grad_norm": 2.449907064437866, "learning_rate": 9.22699223674607e-06, "loss": 0.9576, "step": 2841 }, { "epoch": 0.1697121700704646, "grad_norm": 2.702924966812134, "learning_rate": 9.226328710769027e-06, "loss": 0.903, "step": 2842 }, { "epoch": 0.16977188582348024, "grad_norm": 2.3121583461761475, "learning_rate": 9.225665184791985e-06, "loss": 0.9487, "step": 2843 }, { "epoch": 0.16983160157649588, "grad_norm": 2.1167709827423096, "learning_rate": 9.225001658814944e-06, "loss": 0.9609, "step": 2844 }, { "epoch": 0.16989131732951152, "grad_norm": 6.31154203414917, "learning_rate": 9.224338132837902e-06, "loss": 0.9022, "step": 2845 }, { "epoch": 0.16995103308252718, "grad_norm": 2.3479995727539062, "learning_rate": 9.22367460686086e-06, "loss": 0.926, "step": 2846 }, { "epoch": 0.17001074883554282, "grad_norm": 4.0924530029296875, "learning_rate": 9.223011080883816e-06, "loss": 0.9588, "step": 2847 }, { "epoch": 0.17007046458855846, "grad_norm": 2.7674522399902344, "learning_rate": 9.222347554906776e-06, "loss": 0.9009, "step": 2848 }, { "epoch": 0.1701301803415741, "grad_norm": 2.4994072914123535, "learning_rate": 9.221684028929734e-06, "loss": 0.9377, "step": 2849 }, { "epoch": 0.17018989609458976, "grad_norm": 1.8970626592636108, "learning_rate": 9.22102050295269e-06, "loss": 0.9258, "step": 2850 }, { "epoch": 0.1702496118476054, "grad_norm": 2.960174083709717, "learning_rate": 9.22035697697565e-06, "loss": 0.9208, "step": 2851 }, { "epoch": 0.17030932760062104, "grad_norm": 2.2002947330474854, "learning_rate": 9.219693450998607e-06, "loss": 0.9326, "step": 2852 }, { "epoch": 0.1703690433536367, "grad_norm": 2.068312168121338, "learning_rate": 9.219029925021565e-06, "loss": 0.923, "step": 2853 }, { "epoch": 0.17042875910665234, "grad_norm": 1.9949016571044922, "learning_rate": 9.218366399044525e-06, "loss": 0.9586, "step": 2854 }, { "epoch": 0.17048847485966798, "grad_norm": 1.9710755348205566, "learning_rate": 9.217702873067481e-06, "loss": 0.9064, "step": 2855 }, { "epoch": 0.17054819061268361, "grad_norm": 1.9277573823928833, "learning_rate": 9.217039347090439e-06, "loss": 0.9424, "step": 2856 }, { "epoch": 0.17060790636569928, "grad_norm": 2.4023220539093018, "learning_rate": 9.216375821113397e-06, "loss": 0.9489, "step": 2857 }, { "epoch": 0.17066762211871492, "grad_norm": 2.4264652729034424, "learning_rate": 9.215712295136355e-06, "loss": 0.9431, "step": 2858 }, { "epoch": 0.17072733787173056, "grad_norm": 1.7635400295257568, "learning_rate": 9.215048769159313e-06, "loss": 0.9389, "step": 2859 }, { "epoch": 0.17078705362474622, "grad_norm": 2.3039462566375732, "learning_rate": 9.214385243182272e-06, "loss": 0.9203, "step": 2860 }, { "epoch": 0.17084676937776186, "grad_norm": 3.563000202178955, "learning_rate": 9.21372171720523e-06, "loss": 0.9465, "step": 2861 }, { "epoch": 0.1709064851307775, "grad_norm": 4.523982524871826, "learning_rate": 9.213058191228188e-06, "loss": 0.9408, "step": 2862 }, { "epoch": 0.17096620088379313, "grad_norm": 2.769252061843872, "learning_rate": 9.212394665251146e-06, "loss": 0.9066, "step": 2863 }, { "epoch": 0.1710259166368088, "grad_norm": 2.7012908458709717, "learning_rate": 9.211731139274104e-06, "loss": 0.9208, "step": 2864 }, { "epoch": 0.17108563238982444, "grad_norm": 2.8533096313476562, "learning_rate": 9.21106761329706e-06, "loss": 0.9784, "step": 2865 }, { "epoch": 0.17114534814284008, "grad_norm": 1.9578593969345093, "learning_rate": 9.21040408732002e-06, "loss": 0.9375, "step": 2866 }, { "epoch": 0.1712050638958557, "grad_norm": 3.2546584606170654, "learning_rate": 9.209740561342977e-06, "loss": 0.9317, "step": 2867 }, { "epoch": 0.17126477964887138, "grad_norm": 3.14400053024292, "learning_rate": 9.209077035365935e-06, "loss": 0.9012, "step": 2868 }, { "epoch": 0.17132449540188702, "grad_norm": 2.2696235179901123, "learning_rate": 9.208413509388894e-06, "loss": 0.9216, "step": 2869 }, { "epoch": 0.17138421115490265, "grad_norm": 2.6493887901306152, "learning_rate": 9.20774998341185e-06, "loss": 0.9623, "step": 2870 }, { "epoch": 0.17144392690791832, "grad_norm": 1.9711594581604004, "learning_rate": 9.207086457434809e-06, "loss": 0.9402, "step": 2871 }, { "epoch": 0.17150364266093396, "grad_norm": 2.492906332015991, "learning_rate": 9.206422931457767e-06, "loss": 0.9286, "step": 2872 }, { "epoch": 0.1715633584139496, "grad_norm": 2.2295145988464355, "learning_rate": 9.205759405480725e-06, "loss": 0.9371, "step": 2873 }, { "epoch": 0.17162307416696523, "grad_norm": 2.5476155281066895, "learning_rate": 9.205095879503683e-06, "loss": 0.9577, "step": 2874 }, { "epoch": 0.1716827899199809, "grad_norm": 1.8713188171386719, "learning_rate": 9.204432353526641e-06, "loss": 0.9077, "step": 2875 }, { "epoch": 0.17174250567299654, "grad_norm": 2.0644309520721436, "learning_rate": 9.2037688275496e-06, "loss": 0.9175, "step": 2876 }, { "epoch": 0.17180222142601217, "grad_norm": 2.794074773788452, "learning_rate": 9.203105301572558e-06, "loss": 0.886, "step": 2877 }, { "epoch": 0.17186193717902784, "grad_norm": 2.6907246112823486, "learning_rate": 9.202441775595516e-06, "loss": 0.9455, "step": 2878 }, { "epoch": 0.17192165293204348, "grad_norm": 3.0684938430786133, "learning_rate": 9.201778249618474e-06, "loss": 0.9398, "step": 2879 }, { "epoch": 0.17198136868505912, "grad_norm": 4.440251350402832, "learning_rate": 9.201114723641432e-06, "loss": 0.9542, "step": 2880 }, { "epoch": 0.17204108443807475, "grad_norm": 2.2005820274353027, "learning_rate": 9.20045119766439e-06, "loss": 0.9125, "step": 2881 }, { "epoch": 0.17210080019109042, "grad_norm": 2.9597392082214355, "learning_rate": 9.199787671687346e-06, "loss": 0.9671, "step": 2882 }, { "epoch": 0.17216051594410606, "grad_norm": 1.8924733400344849, "learning_rate": 9.199124145710306e-06, "loss": 0.9268, "step": 2883 }, { "epoch": 0.1722202316971217, "grad_norm": 1.8330473899841309, "learning_rate": 9.198460619733264e-06, "loss": 0.951, "step": 2884 }, { "epoch": 0.17227994745013733, "grad_norm": 3.0009987354278564, "learning_rate": 9.19779709375622e-06, "loss": 0.9265, "step": 2885 }, { "epoch": 0.172339663203153, "grad_norm": 2.6169166564941406, "learning_rate": 9.197133567779179e-06, "loss": 0.9595, "step": 2886 }, { "epoch": 0.17239937895616864, "grad_norm": 2.6212120056152344, "learning_rate": 9.196470041802137e-06, "loss": 0.9602, "step": 2887 }, { "epoch": 0.17245909470918427, "grad_norm": 3.154463052749634, "learning_rate": 9.195806515825095e-06, "loss": 0.9439, "step": 2888 }, { "epoch": 0.17251881046219994, "grad_norm": 3.2962043285369873, "learning_rate": 9.195142989848053e-06, "loss": 0.9597, "step": 2889 }, { "epoch": 0.17257852621521558, "grad_norm": 2.201991319656372, "learning_rate": 9.194479463871011e-06, "loss": 0.9119, "step": 2890 }, { "epoch": 0.17263824196823122, "grad_norm": 2.6358304023742676, "learning_rate": 9.19381593789397e-06, "loss": 0.9479, "step": 2891 }, { "epoch": 0.17269795772124685, "grad_norm": 2.2548725605010986, "learning_rate": 9.193152411916927e-06, "loss": 0.933, "step": 2892 }, { "epoch": 0.17275767347426252, "grad_norm": 2.1108999252319336, "learning_rate": 9.192488885939885e-06, "loss": 0.9284, "step": 2893 }, { "epoch": 0.17281738922727816, "grad_norm": 2.524355888366699, "learning_rate": 9.191825359962844e-06, "loss": 0.9524, "step": 2894 }, { "epoch": 0.1728771049802938, "grad_norm": 1.987141728401184, "learning_rate": 9.191161833985802e-06, "loss": 0.9816, "step": 2895 }, { "epoch": 0.17293682073330946, "grad_norm": 2.3967807292938232, "learning_rate": 9.19049830800876e-06, "loss": 0.9243, "step": 2896 }, { "epoch": 0.1729965364863251, "grad_norm": 3.4055590629577637, "learning_rate": 9.189834782031716e-06, "loss": 0.8916, "step": 2897 }, { "epoch": 0.17305625223934074, "grad_norm": 2.3615097999572754, "learning_rate": 9.189171256054676e-06, "loss": 0.9546, "step": 2898 }, { "epoch": 0.17311596799235637, "grad_norm": 2.1037211418151855, "learning_rate": 9.188507730077634e-06, "loss": 0.8866, "step": 2899 }, { "epoch": 0.17317568374537204, "grad_norm": 2.2186737060546875, "learning_rate": 9.18784420410059e-06, "loss": 0.915, "step": 2900 }, { "epoch": 0.17323539949838768, "grad_norm": 3.1502585411071777, "learning_rate": 9.18718067812355e-06, "loss": 0.9285, "step": 2901 }, { "epoch": 0.17329511525140331, "grad_norm": 2.080308675765991, "learning_rate": 9.186517152146507e-06, "loss": 0.9305, "step": 2902 }, { "epoch": 0.17335483100441895, "grad_norm": 4.783076763153076, "learning_rate": 9.185853626169465e-06, "loss": 0.9037, "step": 2903 }, { "epoch": 0.17341454675743462, "grad_norm": 2.3310627937316895, "learning_rate": 9.185190100192425e-06, "loss": 0.9302, "step": 2904 }, { "epoch": 0.17347426251045026, "grad_norm": 5.365455150604248, "learning_rate": 9.184526574215381e-06, "loss": 0.9004, "step": 2905 }, { "epoch": 0.1735339782634659, "grad_norm": 2.095165491104126, "learning_rate": 9.183863048238339e-06, "loss": 0.9249, "step": 2906 }, { "epoch": 0.17359369401648156, "grad_norm": 1.7024621963500977, "learning_rate": 9.183199522261297e-06, "loss": 0.9796, "step": 2907 }, { "epoch": 0.1736534097694972, "grad_norm": 2.3449997901916504, "learning_rate": 9.182535996284255e-06, "loss": 0.9053, "step": 2908 }, { "epoch": 0.17371312552251283, "grad_norm": 2.4951870441436768, "learning_rate": 9.181872470307213e-06, "loss": 0.9741, "step": 2909 }, { "epoch": 0.17377284127552847, "grad_norm": 2.1191587448120117, "learning_rate": 9.181208944330171e-06, "loss": 0.912, "step": 2910 }, { "epoch": 0.17383255702854414, "grad_norm": 3.3610942363739014, "learning_rate": 9.18054541835313e-06, "loss": 0.9004, "step": 2911 }, { "epoch": 0.17389227278155978, "grad_norm": 2.0015652179718018, "learning_rate": 9.179881892376088e-06, "loss": 0.9145, "step": 2912 }, { "epoch": 0.1739519885345754, "grad_norm": 2.3148839473724365, "learning_rate": 9.179218366399046e-06, "loss": 0.9444, "step": 2913 }, { "epoch": 0.17401170428759108, "grad_norm": 2.188072443008423, "learning_rate": 9.178554840422004e-06, "loss": 0.9382, "step": 2914 }, { "epoch": 0.17407142004060672, "grad_norm": 2.2377734184265137, "learning_rate": 9.17789131444496e-06, "loss": 0.9506, "step": 2915 }, { "epoch": 0.17413113579362235, "grad_norm": 1.9669251441955566, "learning_rate": 9.17722778846792e-06, "loss": 0.9589, "step": 2916 }, { "epoch": 0.174190851546638, "grad_norm": 2.498396158218384, "learning_rate": 9.176564262490876e-06, "loss": 0.9235, "step": 2917 }, { "epoch": 0.17425056729965366, "grad_norm": 1.8187040090560913, "learning_rate": 9.175900736513835e-06, "loss": 0.9157, "step": 2918 }, { "epoch": 0.1743102830526693, "grad_norm": 1.6774232387542725, "learning_rate": 9.175237210536794e-06, "loss": 0.9394, "step": 2919 }, { "epoch": 0.17436999880568493, "grad_norm": 1.8275893926620483, "learning_rate": 9.17457368455975e-06, "loss": 0.919, "step": 2920 }, { "epoch": 0.17442971455870057, "grad_norm": 2.840900182723999, "learning_rate": 9.173910158582709e-06, "loss": 0.9408, "step": 2921 }, { "epoch": 0.17448943031171624, "grad_norm": 1.9163508415222168, "learning_rate": 9.173246632605667e-06, "loss": 0.9294, "step": 2922 }, { "epoch": 0.17454914606473187, "grad_norm": 2.2053985595703125, "learning_rate": 9.172583106628625e-06, "loss": 0.917, "step": 2923 }, { "epoch": 0.1746088618177475, "grad_norm": 2.1396543979644775, "learning_rate": 9.171919580651583e-06, "loss": 0.9101, "step": 2924 }, { "epoch": 0.17466857757076318, "grad_norm": 1.8044790029525757, "learning_rate": 9.171256054674541e-06, "loss": 0.9202, "step": 2925 }, { "epoch": 0.17472829332377882, "grad_norm": 2.389791250228882, "learning_rate": 9.1705925286975e-06, "loss": 0.9164, "step": 2926 }, { "epoch": 0.17478800907679445, "grad_norm": 1.869624137878418, "learning_rate": 9.169929002720457e-06, "loss": 0.8797, "step": 2927 }, { "epoch": 0.1748477248298101, "grad_norm": 2.9481704235076904, "learning_rate": 9.169265476743416e-06, "loss": 0.9352, "step": 2928 }, { "epoch": 0.17490744058282576, "grad_norm": 4.741058349609375, "learning_rate": 9.168601950766374e-06, "loss": 0.934, "step": 2929 }, { "epoch": 0.1749671563358414, "grad_norm": 1.8687164783477783, "learning_rate": 9.167938424789332e-06, "loss": 0.9251, "step": 2930 }, { "epoch": 0.17502687208885703, "grad_norm": 2.651451826095581, "learning_rate": 9.16727489881229e-06, "loss": 0.8983, "step": 2931 }, { "epoch": 0.1750865878418727, "grad_norm": 2.762063980102539, "learning_rate": 9.166611372835246e-06, "loss": 0.9322, "step": 2932 }, { "epoch": 0.17514630359488834, "grad_norm": 2.38586688041687, "learning_rate": 9.165947846858206e-06, "loss": 0.9277, "step": 2933 }, { "epoch": 0.17520601934790397, "grad_norm": 1.8854219913482666, "learning_rate": 9.165284320881164e-06, "loss": 0.8987, "step": 2934 }, { "epoch": 0.1752657351009196, "grad_norm": 2.95284104347229, "learning_rate": 9.16462079490412e-06, "loss": 0.9135, "step": 2935 }, { "epoch": 0.17532545085393528, "grad_norm": 2.2499313354492188, "learning_rate": 9.163957268927079e-06, "loss": 0.9901, "step": 2936 }, { "epoch": 0.17538516660695092, "grad_norm": 2.0231516361236572, "learning_rate": 9.163293742950037e-06, "loss": 0.9328, "step": 2937 }, { "epoch": 0.17544488235996655, "grad_norm": 1.8149771690368652, "learning_rate": 9.162630216972995e-06, "loss": 0.9309, "step": 2938 }, { "epoch": 0.17550459811298222, "grad_norm": 2.4719557762145996, "learning_rate": 9.161966690995953e-06, "loss": 0.8973, "step": 2939 }, { "epoch": 0.17556431386599786, "grad_norm": 2.257167339324951, "learning_rate": 9.161303165018911e-06, "loss": 0.9633, "step": 2940 }, { "epoch": 0.1756240296190135, "grad_norm": 2.1443378925323486, "learning_rate": 9.16063963904187e-06, "loss": 0.8946, "step": 2941 }, { "epoch": 0.17568374537202913, "grad_norm": 1.9656380414962769, "learning_rate": 9.159976113064827e-06, "loss": 0.9275, "step": 2942 }, { "epoch": 0.1757434611250448, "grad_norm": 3.0429720878601074, "learning_rate": 9.159312587087785e-06, "loss": 0.9299, "step": 2943 }, { "epoch": 0.17580317687806044, "grad_norm": 3.7025949954986572, "learning_rate": 9.158649061110743e-06, "loss": 0.9075, "step": 2944 }, { "epoch": 0.17586289263107607, "grad_norm": 6.563588619232178, "learning_rate": 9.157985535133702e-06, "loss": 0.926, "step": 2945 }, { "epoch": 0.1759226083840917, "grad_norm": 2.0079891681671143, "learning_rate": 9.15732200915666e-06, "loss": 0.9201, "step": 2946 }, { "epoch": 0.17598232413710738, "grad_norm": 2.0341882705688477, "learning_rate": 9.156658483179616e-06, "loss": 0.9201, "step": 2947 }, { "epoch": 0.17604203989012301, "grad_norm": 2.075810432434082, "learning_rate": 9.155994957202576e-06, "loss": 0.9737, "step": 2948 }, { "epoch": 0.17610175564313865, "grad_norm": 4.994594097137451, "learning_rate": 9.155331431225534e-06, "loss": 0.9912, "step": 2949 }, { "epoch": 0.17616147139615432, "grad_norm": 2.6122679710388184, "learning_rate": 9.15466790524849e-06, "loss": 0.9736, "step": 2950 }, { "epoch": 0.17622118714916996, "grad_norm": 2.0171220302581787, "learning_rate": 9.15400437927145e-06, "loss": 0.908, "step": 2951 }, { "epoch": 0.1762809029021856, "grad_norm": 2.3632824420928955, "learning_rate": 9.153340853294407e-06, "loss": 0.9371, "step": 2952 }, { "epoch": 0.17634061865520123, "grad_norm": 2.570902109146118, "learning_rate": 9.152677327317365e-06, "loss": 0.9223, "step": 2953 }, { "epoch": 0.1764003344082169, "grad_norm": 2.3832802772521973, "learning_rate": 9.152013801340324e-06, "loss": 0.9413, "step": 2954 }, { "epoch": 0.17646005016123253, "grad_norm": 2.793954372406006, "learning_rate": 9.151350275363281e-06, "loss": 0.9534, "step": 2955 }, { "epoch": 0.17651976591424817, "grad_norm": 2.423088788986206, "learning_rate": 9.150686749386239e-06, "loss": 0.9127, "step": 2956 }, { "epoch": 0.17657948166726384, "grad_norm": 2.4133830070495605, "learning_rate": 9.150023223409197e-06, "loss": 0.948, "step": 2957 }, { "epoch": 0.17663919742027948, "grad_norm": 1.7082631587982178, "learning_rate": 9.149359697432155e-06, "loss": 0.9661, "step": 2958 }, { "epoch": 0.1766989131732951, "grad_norm": 2.7864491939544678, "learning_rate": 9.148696171455113e-06, "loss": 0.9159, "step": 2959 }, { "epoch": 0.17675862892631075, "grad_norm": 2.1095244884490967, "learning_rate": 9.148032645478071e-06, "loss": 0.8807, "step": 2960 }, { "epoch": 0.17681834467932642, "grad_norm": 2.044013023376465, "learning_rate": 9.14736911950103e-06, "loss": 0.9282, "step": 2961 }, { "epoch": 0.17687806043234205, "grad_norm": 4.314416408538818, "learning_rate": 9.146705593523988e-06, "loss": 0.9326, "step": 2962 }, { "epoch": 0.1769377761853577, "grad_norm": 2.890448570251465, "learning_rate": 9.146042067546946e-06, "loss": 0.9423, "step": 2963 }, { "epoch": 0.17699749193837333, "grad_norm": 2.7382314205169678, "learning_rate": 9.145378541569904e-06, "loss": 0.922, "step": 2964 }, { "epoch": 0.177057207691389, "grad_norm": 2.1855640411376953, "learning_rate": 9.14471501559286e-06, "loss": 0.9322, "step": 2965 }, { "epoch": 0.17711692344440463, "grad_norm": 2.18278169631958, "learning_rate": 9.14405148961582e-06, "loss": 0.9367, "step": 2966 }, { "epoch": 0.17717663919742027, "grad_norm": 3.454826831817627, "learning_rate": 9.143387963638776e-06, "loss": 0.9389, "step": 2967 }, { "epoch": 0.17723635495043594, "grad_norm": 2.7235405445098877, "learning_rate": 9.142724437661734e-06, "loss": 0.988, "step": 2968 }, { "epoch": 0.17729607070345157, "grad_norm": 2.917722225189209, "learning_rate": 9.142060911684694e-06, "loss": 0.9313, "step": 2969 }, { "epoch": 0.1773557864564672, "grad_norm": 1.9124444723129272, "learning_rate": 9.14139738570765e-06, "loss": 0.9549, "step": 2970 }, { "epoch": 0.17741550220948285, "grad_norm": 2.08984375, "learning_rate": 9.140733859730609e-06, "loss": 0.9452, "step": 2971 }, { "epoch": 0.17747521796249852, "grad_norm": 2.1956655979156494, "learning_rate": 9.140070333753567e-06, "loss": 0.9325, "step": 2972 }, { "epoch": 0.17753493371551415, "grad_norm": 2.369481086730957, "learning_rate": 9.139406807776525e-06, "loss": 0.9346, "step": 2973 }, { "epoch": 0.1775946494685298, "grad_norm": 2.4897708892822266, "learning_rate": 9.138743281799483e-06, "loss": 0.9116, "step": 2974 }, { "epoch": 0.17765436522154546, "grad_norm": 2.137071371078491, "learning_rate": 9.138079755822441e-06, "loss": 0.9268, "step": 2975 }, { "epoch": 0.1777140809745611, "grad_norm": 1.8377431631088257, "learning_rate": 9.1374162298454e-06, "loss": 0.926, "step": 2976 }, { "epoch": 0.17777379672757673, "grad_norm": 1.9073420763015747, "learning_rate": 9.136752703868357e-06, "loss": 0.9926, "step": 2977 }, { "epoch": 0.17783351248059237, "grad_norm": 2.1074795722961426, "learning_rate": 9.136089177891316e-06, "loss": 0.9133, "step": 2978 }, { "epoch": 0.17789322823360804, "grad_norm": 1.8611811399459839, "learning_rate": 9.135425651914274e-06, "loss": 0.9185, "step": 2979 }, { "epoch": 0.17795294398662367, "grad_norm": 2.3644464015960693, "learning_rate": 9.134762125937232e-06, "loss": 0.9078, "step": 2980 }, { "epoch": 0.1780126597396393, "grad_norm": 2.155904769897461, "learning_rate": 9.13409859996019e-06, "loss": 0.9234, "step": 2981 }, { "epoch": 0.17807237549265495, "grad_norm": 3.3949506282806396, "learning_rate": 9.133435073983146e-06, "loss": 0.9612, "step": 2982 }, { "epoch": 0.17813209124567062, "grad_norm": 2.586395740509033, "learning_rate": 9.132771548006106e-06, "loss": 0.9136, "step": 2983 }, { "epoch": 0.17819180699868625, "grad_norm": 2.730309247970581, "learning_rate": 9.132108022029064e-06, "loss": 0.9143, "step": 2984 }, { "epoch": 0.1782515227517019, "grad_norm": 2.1277596950531006, "learning_rate": 9.13144449605202e-06, "loss": 0.903, "step": 2985 }, { "epoch": 0.17831123850471756, "grad_norm": 2.274106025695801, "learning_rate": 9.130780970074979e-06, "loss": 0.8825, "step": 2986 }, { "epoch": 0.1783709542577332, "grad_norm": 2.4163105487823486, "learning_rate": 9.130117444097937e-06, "loss": 0.9825, "step": 2987 }, { "epoch": 0.17843067001074883, "grad_norm": 2.586984395980835, "learning_rate": 9.129453918120895e-06, "loss": 0.9433, "step": 2988 }, { "epoch": 0.17849038576376447, "grad_norm": 2.6561412811279297, "learning_rate": 9.128790392143853e-06, "loss": 0.9456, "step": 2989 }, { "epoch": 0.17855010151678014, "grad_norm": 2.5957858562469482, "learning_rate": 9.128126866166811e-06, "loss": 0.9255, "step": 2990 }, { "epoch": 0.17860981726979577, "grad_norm": 2.4794538021087646, "learning_rate": 9.127463340189769e-06, "loss": 0.9257, "step": 2991 }, { "epoch": 0.1786695330228114, "grad_norm": 2.10552978515625, "learning_rate": 9.126799814212727e-06, "loss": 0.9055, "step": 2992 }, { "epoch": 0.17872924877582708, "grad_norm": 15.67383098602295, "learning_rate": 9.126136288235685e-06, "loss": 0.9179, "step": 2993 }, { "epoch": 0.17878896452884271, "grad_norm": 2.9914958477020264, "learning_rate": 9.125472762258643e-06, "loss": 0.9751, "step": 2994 }, { "epoch": 0.17884868028185835, "grad_norm": 2.5181527137756348, "learning_rate": 9.124809236281602e-06, "loss": 0.9182, "step": 2995 }, { "epoch": 0.178908396034874, "grad_norm": 5.006690502166748, "learning_rate": 9.12414571030456e-06, "loss": 0.9021, "step": 2996 }, { "epoch": 0.17896811178788966, "grad_norm": 1.8748457431793213, "learning_rate": 9.123482184327516e-06, "loss": 0.947, "step": 2997 }, { "epoch": 0.1790278275409053, "grad_norm": 2.4486000537872314, "learning_rate": 9.122818658350476e-06, "loss": 0.9267, "step": 2998 }, { "epoch": 0.17908754329392093, "grad_norm": 2.5151636600494385, "learning_rate": 9.122155132373434e-06, "loss": 0.964, "step": 2999 }, { "epoch": 0.17914725904693657, "grad_norm": 2.833425998687744, "learning_rate": 9.12149160639639e-06, "loss": 0.9154, "step": 3000 }, { "epoch": 0.17914725904693657, "eval_text_loss": 0.9549440145492554, "eval_text_runtime": 15.1729, "eval_text_samples_per_second": 263.628, "eval_text_steps_per_second": 0.527, "step": 3000 }, { "epoch": 0.17914725904693657, "eval_image_loss": 0.690746545791626, "eval_image_runtime": 5.0194, "eval_image_samples_per_second": 796.914, "eval_image_steps_per_second": 1.594, "step": 3000 }, { "epoch": 0.17914725904693657, "eval_video_loss": 1.163590669631958, "eval_video_runtime": 77.2072, "eval_video_samples_per_second": 51.809, "eval_video_steps_per_second": 0.104, "step": 3000 }, { "epoch": 0.17920697479995223, "grad_norm": 2.783895969390869, "learning_rate": 9.12082808041935e-06, "loss": 0.9126, "step": 3001 }, { "epoch": 0.17926669055296787, "grad_norm": 2.89279842376709, "learning_rate": 9.120164554442307e-06, "loss": 0.9699, "step": 3002 }, { "epoch": 0.1793264063059835, "grad_norm": 2.2388644218444824, "learning_rate": 9.119501028465265e-06, "loss": 0.9427, "step": 3003 }, { "epoch": 0.17938612205899918, "grad_norm": 4.171393394470215, "learning_rate": 9.118837502488224e-06, "loss": 0.9354, "step": 3004 }, { "epoch": 0.1794458378120148, "grad_norm": 1.9257930517196655, "learning_rate": 9.11817397651118e-06, "loss": 0.9098, "step": 3005 }, { "epoch": 0.17950555356503045, "grad_norm": 1.7196141481399536, "learning_rate": 9.117510450534139e-06, "loss": 0.9537, "step": 3006 }, { "epoch": 0.1795652693180461, "grad_norm": 2.3951687812805176, "learning_rate": 9.116846924557097e-06, "loss": 0.9417, "step": 3007 }, { "epoch": 0.17962498507106175, "grad_norm": 2.2273406982421875, "learning_rate": 9.116183398580055e-06, "loss": 0.9831, "step": 3008 }, { "epoch": 0.1796847008240774, "grad_norm": 2.853560209274292, "learning_rate": 9.115519872603013e-06, "loss": 0.9353, "step": 3009 }, { "epoch": 0.17974441657709303, "grad_norm": 2.2123684883117676, "learning_rate": 9.114856346625971e-06, "loss": 0.9313, "step": 3010 }, { "epoch": 0.1798041323301087, "grad_norm": 2.7842204570770264, "learning_rate": 9.11419282064893e-06, "loss": 0.9437, "step": 3011 }, { "epoch": 0.17986384808312433, "grad_norm": 2.815847158432007, "learning_rate": 9.113529294671888e-06, "loss": 0.9615, "step": 3012 }, { "epoch": 0.17992356383613997, "grad_norm": 1.9254807233810425, "learning_rate": 9.112865768694846e-06, "loss": 0.9232, "step": 3013 }, { "epoch": 0.1799832795891556, "grad_norm": 2.2792840003967285, "learning_rate": 9.112202242717804e-06, "loss": 0.9301, "step": 3014 }, { "epoch": 0.18004299534217127, "grad_norm": 2.1445438861846924, "learning_rate": 9.11153871674076e-06, "loss": 0.9591, "step": 3015 }, { "epoch": 0.1801027110951869, "grad_norm": 5.618615627288818, "learning_rate": 9.11087519076372e-06, "loss": 0.9214, "step": 3016 }, { "epoch": 0.18016242684820255, "grad_norm": 2.007347345352173, "learning_rate": 9.110211664786676e-06, "loss": 0.8988, "step": 3017 }, { "epoch": 0.1802221426012182, "grad_norm": 2.461341619491577, "learning_rate": 9.109548138809634e-06, "loss": 0.957, "step": 3018 }, { "epoch": 0.18028185835423385, "grad_norm": 3.6474969387054443, "learning_rate": 9.108884612832594e-06, "loss": 0.9808, "step": 3019 }, { "epoch": 0.1803415741072495, "grad_norm": 2.278089761734009, "learning_rate": 9.10822108685555e-06, "loss": 0.8951, "step": 3020 }, { "epoch": 0.18040128986026513, "grad_norm": 2.369234085083008, "learning_rate": 9.107557560878509e-06, "loss": 0.9499, "step": 3021 }, { "epoch": 0.1804610056132808, "grad_norm": 1.9575713872909546, "learning_rate": 9.106894034901467e-06, "loss": 0.9629, "step": 3022 }, { "epoch": 0.18052072136629643, "grad_norm": 5.88104248046875, "learning_rate": 9.106230508924425e-06, "loss": 0.9188, "step": 3023 }, { "epoch": 0.18058043711931207, "grad_norm": 2.1863880157470703, "learning_rate": 9.105566982947383e-06, "loss": 0.9776, "step": 3024 }, { "epoch": 0.1806401528723277, "grad_norm": 1.9242736101150513, "learning_rate": 9.104903456970341e-06, "loss": 0.9026, "step": 3025 }, { "epoch": 0.18069986862534337, "grad_norm": 2.684187412261963, "learning_rate": 9.1042399309933e-06, "loss": 0.9262, "step": 3026 }, { "epoch": 0.180759584378359, "grad_norm": 2.6324400901794434, "learning_rate": 9.103576405016257e-06, "loss": 0.9168, "step": 3027 }, { "epoch": 0.18081930013137465, "grad_norm": 1.743090033531189, "learning_rate": 9.102912879039215e-06, "loss": 0.9307, "step": 3028 }, { "epoch": 0.18087901588439032, "grad_norm": 3.165308952331543, "learning_rate": 9.102249353062174e-06, "loss": 0.9595, "step": 3029 }, { "epoch": 0.18093873163740595, "grad_norm": 2.3681576251983643, "learning_rate": 9.101585827085132e-06, "loss": 0.9208, "step": 3030 }, { "epoch": 0.1809984473904216, "grad_norm": 2.698052406311035, "learning_rate": 9.10092230110809e-06, "loss": 0.945, "step": 3031 }, { "epoch": 0.18105816314343723, "grad_norm": 2.433753252029419, "learning_rate": 9.100258775131046e-06, "loss": 0.9364, "step": 3032 }, { "epoch": 0.1811178788964529, "grad_norm": 2.196911573410034, "learning_rate": 9.099595249154006e-06, "loss": 0.9331, "step": 3033 }, { "epoch": 0.18117759464946853, "grad_norm": 2.2942023277282715, "learning_rate": 9.098931723176964e-06, "loss": 0.9743, "step": 3034 }, { "epoch": 0.18123731040248417, "grad_norm": 3.0282113552093506, "learning_rate": 9.09826819719992e-06, "loss": 0.9235, "step": 3035 }, { "epoch": 0.1812970261554998, "grad_norm": 2.5776209831237793, "learning_rate": 9.097604671222879e-06, "loss": 0.9005, "step": 3036 }, { "epoch": 0.18135674190851547, "grad_norm": 2.2383971214294434, "learning_rate": 9.096941145245837e-06, "loss": 0.9081, "step": 3037 }, { "epoch": 0.1814164576615311, "grad_norm": 1.497299313545227, "learning_rate": 9.096277619268795e-06, "loss": 0.9012, "step": 3038 }, { "epoch": 0.18147617341454675, "grad_norm": 4.305228233337402, "learning_rate": 9.095614093291753e-06, "loss": 0.9155, "step": 3039 }, { "epoch": 0.18153588916756241, "grad_norm": 2.3203699588775635, "learning_rate": 9.094950567314711e-06, "loss": 0.9118, "step": 3040 }, { "epoch": 0.18159560492057805, "grad_norm": 3.0475127696990967, "learning_rate": 9.094287041337669e-06, "loss": 0.8856, "step": 3041 }, { "epoch": 0.1816553206735937, "grad_norm": 2.308046340942383, "learning_rate": 9.093623515360627e-06, "loss": 0.9504, "step": 3042 }, { "epoch": 0.18171503642660933, "grad_norm": 1.8551396131515503, "learning_rate": 9.092959989383585e-06, "loss": 0.9307, "step": 3043 }, { "epoch": 0.181774752179625, "grad_norm": 2.348029375076294, "learning_rate": 9.092296463406543e-06, "loss": 0.9261, "step": 3044 }, { "epoch": 0.18183446793264063, "grad_norm": 2.2001943588256836, "learning_rate": 9.091632937429501e-06, "loss": 0.9555, "step": 3045 }, { "epoch": 0.18189418368565627, "grad_norm": 2.023383140563965, "learning_rate": 9.09096941145246e-06, "loss": 0.9467, "step": 3046 }, { "epoch": 0.18195389943867193, "grad_norm": 2.135862350463867, "learning_rate": 9.090305885475416e-06, "loss": 0.9445, "step": 3047 }, { "epoch": 0.18201361519168757, "grad_norm": 1.9564118385314941, "learning_rate": 9.089642359498376e-06, "loss": 0.8998, "step": 3048 }, { "epoch": 0.1820733309447032, "grad_norm": 1.9625227451324463, "learning_rate": 9.088978833521334e-06, "loss": 0.9401, "step": 3049 }, { "epoch": 0.18213304669771885, "grad_norm": 2.814734935760498, "learning_rate": 9.08831530754429e-06, "loss": 0.9393, "step": 3050 }, { "epoch": 0.1821927624507345, "grad_norm": 2.0622401237487793, "learning_rate": 9.08765178156725e-06, "loss": 0.9386, "step": 3051 }, { "epoch": 0.18225247820375015, "grad_norm": 2.757077932357788, "learning_rate": 9.086988255590206e-06, "loss": 0.9567, "step": 3052 }, { "epoch": 0.1823121939567658, "grad_norm": 2.2632033824920654, "learning_rate": 9.086324729613165e-06, "loss": 0.8779, "step": 3053 }, { "epoch": 0.18237190970978143, "grad_norm": 1.8493943214416504, "learning_rate": 9.085661203636124e-06, "loss": 0.9155, "step": 3054 }, { "epoch": 0.1824316254627971, "grad_norm": 2.7086191177368164, "learning_rate": 9.08499767765908e-06, "loss": 0.9888, "step": 3055 }, { "epoch": 0.18249134121581273, "grad_norm": 2.3666129112243652, "learning_rate": 9.084334151682039e-06, "loss": 0.9029, "step": 3056 }, { "epoch": 0.18255105696882837, "grad_norm": 2.0480797290802, "learning_rate": 9.083670625704997e-06, "loss": 0.9507, "step": 3057 }, { "epoch": 0.18261077272184403, "grad_norm": 2.3365936279296875, "learning_rate": 9.083007099727955e-06, "loss": 0.8989, "step": 3058 }, { "epoch": 0.18267048847485967, "grad_norm": 2.2501609325408936, "learning_rate": 9.082343573750913e-06, "loss": 0.9282, "step": 3059 }, { "epoch": 0.1827302042278753, "grad_norm": 4.4495978355407715, "learning_rate": 9.081680047773871e-06, "loss": 0.9139, "step": 3060 }, { "epoch": 0.18278991998089095, "grad_norm": 2.0665738582611084, "learning_rate": 9.08101652179683e-06, "loss": 0.8909, "step": 3061 }, { "epoch": 0.1828496357339066, "grad_norm": 3.290220022201538, "learning_rate": 9.080352995819787e-06, "loss": 0.941, "step": 3062 }, { "epoch": 0.18290935148692225, "grad_norm": 2.779130458831787, "learning_rate": 9.079689469842746e-06, "loss": 0.917, "step": 3063 }, { "epoch": 0.1829690672399379, "grad_norm": 2.896714448928833, "learning_rate": 9.079025943865704e-06, "loss": 0.9424, "step": 3064 }, { "epoch": 0.18302878299295355, "grad_norm": 3.044191837310791, "learning_rate": 9.07836241788866e-06, "loss": 0.9115, "step": 3065 }, { "epoch": 0.1830884987459692, "grad_norm": 3.4303061962127686, "learning_rate": 9.07769889191162e-06, "loss": 0.9381, "step": 3066 }, { "epoch": 0.18314821449898483, "grad_norm": 2.2708899974823, "learning_rate": 9.077035365934576e-06, "loss": 0.9577, "step": 3067 }, { "epoch": 0.18320793025200047, "grad_norm": 2.1136038303375244, "learning_rate": 9.076371839957534e-06, "loss": 0.9273, "step": 3068 }, { "epoch": 0.18326764600501613, "grad_norm": 1.6102046966552734, "learning_rate": 9.075708313980494e-06, "loss": 0.9331, "step": 3069 }, { "epoch": 0.18332736175803177, "grad_norm": 2.19248104095459, "learning_rate": 9.07504478800345e-06, "loss": 0.9404, "step": 3070 }, { "epoch": 0.1833870775110474, "grad_norm": 2.0225284099578857, "learning_rate": 9.074381262026409e-06, "loss": 0.9431, "step": 3071 }, { "epoch": 0.18344679326406305, "grad_norm": 2.3863556385040283, "learning_rate": 9.073717736049367e-06, "loss": 0.9275, "step": 3072 }, { "epoch": 0.1835065090170787, "grad_norm": 2.7089169025421143, "learning_rate": 9.073054210072325e-06, "loss": 0.9093, "step": 3073 }, { "epoch": 0.18356622477009435, "grad_norm": 1.9285680055618286, "learning_rate": 9.072390684095283e-06, "loss": 0.9491, "step": 3074 }, { "epoch": 0.18362594052311, "grad_norm": 2.282266139984131, "learning_rate": 9.071727158118241e-06, "loss": 0.9557, "step": 3075 }, { "epoch": 0.18368565627612565, "grad_norm": 4.26730489730835, "learning_rate": 9.0710636321412e-06, "loss": 0.9616, "step": 3076 }, { "epoch": 0.1837453720291413, "grad_norm": 5.0798773765563965, "learning_rate": 9.070400106164157e-06, "loss": 0.9172, "step": 3077 }, { "epoch": 0.18380508778215693, "grad_norm": 2.777761459350586, "learning_rate": 9.069736580187115e-06, "loss": 0.9165, "step": 3078 }, { "epoch": 0.18386480353517257, "grad_norm": 2.0653254985809326, "learning_rate": 9.069073054210073e-06, "loss": 0.9127, "step": 3079 }, { "epoch": 0.18392451928818823, "grad_norm": 2.8297159671783447, "learning_rate": 9.068409528233032e-06, "loss": 0.9335, "step": 3080 }, { "epoch": 0.18398423504120387, "grad_norm": 2.163862943649292, "learning_rate": 9.06774600225599e-06, "loss": 0.9245, "step": 3081 }, { "epoch": 0.1840439507942195, "grad_norm": 2.375527858734131, "learning_rate": 9.067082476278946e-06, "loss": 0.9324, "step": 3082 }, { "epoch": 0.18410366654723517, "grad_norm": 4.672438621520996, "learning_rate": 9.066418950301906e-06, "loss": 0.9616, "step": 3083 }, { "epoch": 0.1841633823002508, "grad_norm": 2.1762938499450684, "learning_rate": 9.065755424324864e-06, "loss": 0.9215, "step": 3084 }, { "epoch": 0.18422309805326645, "grad_norm": 2.816056489944458, "learning_rate": 9.06509189834782e-06, "loss": 0.9686, "step": 3085 }, { "epoch": 0.1842828138062821, "grad_norm": 6.425425052642822, "learning_rate": 9.064428372370778e-06, "loss": 0.9217, "step": 3086 }, { "epoch": 0.18434252955929775, "grad_norm": 11.114168167114258, "learning_rate": 9.063764846393737e-06, "loss": 0.9149, "step": 3087 }, { "epoch": 0.1844022453123134, "grad_norm": 3.1166908740997314, "learning_rate": 9.063101320416695e-06, "loss": 0.9224, "step": 3088 }, { "epoch": 0.18446196106532903, "grad_norm": 4.972184181213379, "learning_rate": 9.062437794439653e-06, "loss": 0.9285, "step": 3089 }, { "epoch": 0.1845216768183447, "grad_norm": 1.912497878074646, "learning_rate": 9.061774268462611e-06, "loss": 0.9283, "step": 3090 }, { "epoch": 0.18458139257136033, "grad_norm": 1.8123300075531006, "learning_rate": 9.061110742485569e-06, "loss": 0.9077, "step": 3091 }, { "epoch": 0.18464110832437597, "grad_norm": 2.2212717533111572, "learning_rate": 9.060447216508527e-06, "loss": 0.9596, "step": 3092 }, { "epoch": 0.1847008240773916, "grad_norm": 3.7285897731781006, "learning_rate": 9.059783690531485e-06, "loss": 0.933, "step": 3093 }, { "epoch": 0.18476053983040727, "grad_norm": 2.6647677421569824, "learning_rate": 9.059120164554443e-06, "loss": 0.8966, "step": 3094 }, { "epoch": 0.1848202555834229, "grad_norm": 2.4827194213867188, "learning_rate": 9.058456638577401e-06, "loss": 0.9157, "step": 3095 }, { "epoch": 0.18487997133643855, "grad_norm": 2.5078563690185547, "learning_rate": 9.05779311260036e-06, "loss": 0.8936, "step": 3096 }, { "epoch": 0.18493968708945419, "grad_norm": 2.017958402633667, "learning_rate": 9.057129586623316e-06, "loss": 0.9091, "step": 3097 }, { "epoch": 0.18499940284246985, "grad_norm": 1.9700202941894531, "learning_rate": 9.056466060646276e-06, "loss": 0.9594, "step": 3098 }, { "epoch": 0.1850591185954855, "grad_norm": 1.8613487482070923, "learning_rate": 9.055802534669234e-06, "loss": 0.9729, "step": 3099 }, { "epoch": 0.18511883434850113, "grad_norm": 2.0754358768463135, "learning_rate": 9.05513900869219e-06, "loss": 0.9635, "step": 3100 }, { "epoch": 0.1851785501015168, "grad_norm": 1.9594554901123047, "learning_rate": 9.05447548271515e-06, "loss": 0.9212, "step": 3101 }, { "epoch": 0.18523826585453243, "grad_norm": 2.4934210777282715, "learning_rate": 9.053811956738106e-06, "loss": 0.94, "step": 3102 }, { "epoch": 0.18529798160754807, "grad_norm": 2.950530529022217, "learning_rate": 9.053148430761065e-06, "loss": 0.9416, "step": 3103 }, { "epoch": 0.1853576973605637, "grad_norm": 2.3288402557373047, "learning_rate": 9.052484904784024e-06, "loss": 0.9594, "step": 3104 }, { "epoch": 0.18541741311357937, "grad_norm": 3.071000337600708, "learning_rate": 9.05182137880698e-06, "loss": 0.9238, "step": 3105 }, { "epoch": 0.185477128866595, "grad_norm": 1.72898530960083, "learning_rate": 9.051157852829939e-06, "loss": 0.9305, "step": 3106 }, { "epoch": 0.18553684461961065, "grad_norm": 2.7653942108154297, "learning_rate": 9.050494326852897e-06, "loss": 0.9584, "step": 3107 }, { "epoch": 0.1855965603726263, "grad_norm": 2.8075549602508545, "learning_rate": 9.049830800875855e-06, "loss": 0.9518, "step": 3108 }, { "epoch": 0.18565627612564195, "grad_norm": 2.2567827701568604, "learning_rate": 9.049167274898813e-06, "loss": 0.9397, "step": 3109 }, { "epoch": 0.1857159918786576, "grad_norm": 1.8350706100463867, "learning_rate": 9.048503748921771e-06, "loss": 0.931, "step": 3110 }, { "epoch": 0.18577570763167323, "grad_norm": 4.763534069061279, "learning_rate": 9.04784022294473e-06, "loss": 0.9357, "step": 3111 }, { "epoch": 0.1858354233846889, "grad_norm": 2.6838409900665283, "learning_rate": 9.047176696967687e-06, "loss": 0.946, "step": 3112 }, { "epoch": 0.18589513913770453, "grad_norm": 2.590909719467163, "learning_rate": 9.046513170990646e-06, "loss": 0.9479, "step": 3113 }, { "epoch": 0.18595485489072017, "grad_norm": 2.2812469005584717, "learning_rate": 9.045849645013604e-06, "loss": 0.927, "step": 3114 }, { "epoch": 0.1860145706437358, "grad_norm": 1.9158817529678345, "learning_rate": 9.04518611903656e-06, "loss": 0.9338, "step": 3115 }, { "epoch": 0.18607428639675147, "grad_norm": 2.4954795837402344, "learning_rate": 9.04452259305952e-06, "loss": 0.9441, "step": 3116 }, { "epoch": 0.1861340021497671, "grad_norm": 7.129799842834473, "learning_rate": 9.043859067082476e-06, "loss": 0.9352, "step": 3117 }, { "epoch": 0.18619371790278275, "grad_norm": 1.9440948963165283, "learning_rate": 9.043195541105434e-06, "loss": 0.8841, "step": 3118 }, { "epoch": 0.1862534336557984, "grad_norm": 2.4424805641174316, "learning_rate": 9.042532015128394e-06, "loss": 0.9041, "step": 3119 }, { "epoch": 0.18631314940881405, "grad_norm": 2.4522483348846436, "learning_rate": 9.04186848915135e-06, "loss": 0.9225, "step": 3120 }, { "epoch": 0.1863728651618297, "grad_norm": 2.2359707355499268, "learning_rate": 9.041204963174309e-06, "loss": 0.9133, "step": 3121 }, { "epoch": 0.18643258091484533, "grad_norm": 2.305180072784424, "learning_rate": 9.040541437197267e-06, "loss": 0.8996, "step": 3122 }, { "epoch": 0.186492296667861, "grad_norm": 2.726963758468628, "learning_rate": 9.039877911220225e-06, "loss": 0.8971, "step": 3123 }, { "epoch": 0.18655201242087663, "grad_norm": 1.957698941230774, "learning_rate": 9.039214385243183e-06, "loss": 0.9157, "step": 3124 }, { "epoch": 0.18661172817389227, "grad_norm": 2.5770084857940674, "learning_rate": 9.038550859266141e-06, "loss": 0.9072, "step": 3125 }, { "epoch": 0.18667144392690793, "grad_norm": 2.8853001594543457, "learning_rate": 9.037887333289099e-06, "loss": 0.8936, "step": 3126 }, { "epoch": 0.18673115967992357, "grad_norm": 2.1196188926696777, "learning_rate": 9.037223807312057e-06, "loss": 0.895, "step": 3127 }, { "epoch": 0.1867908754329392, "grad_norm": 1.7092705965042114, "learning_rate": 9.036560281335015e-06, "loss": 0.9185, "step": 3128 }, { "epoch": 0.18685059118595485, "grad_norm": 2.00557804107666, "learning_rate": 9.035896755357973e-06, "loss": 0.9427, "step": 3129 }, { "epoch": 0.1869103069389705, "grad_norm": 2.9108238220214844, "learning_rate": 9.035233229380932e-06, "loss": 0.9389, "step": 3130 }, { "epoch": 0.18697002269198615, "grad_norm": 8.11083984375, "learning_rate": 9.03456970340389e-06, "loss": 0.9847, "step": 3131 }, { "epoch": 0.1870297384450018, "grad_norm": 2.603393077850342, "learning_rate": 9.033906177426846e-06, "loss": 0.9642, "step": 3132 }, { "epoch": 0.18708945419801742, "grad_norm": 2.350829839706421, "learning_rate": 9.033242651449806e-06, "loss": 0.9262, "step": 3133 }, { "epoch": 0.1871491699510331, "grad_norm": 1.9235342741012573, "learning_rate": 9.032579125472764e-06, "loss": 0.8945, "step": 3134 }, { "epoch": 0.18720888570404873, "grad_norm": 1.671744704246521, "learning_rate": 9.03191559949572e-06, "loss": 0.9171, "step": 3135 }, { "epoch": 0.18726860145706437, "grad_norm": 2.1937053203582764, "learning_rate": 9.031252073518678e-06, "loss": 0.9275, "step": 3136 }, { "epoch": 0.18732831721008003, "grad_norm": 2.0886502265930176, "learning_rate": 9.030588547541637e-06, "loss": 0.8855, "step": 3137 }, { "epoch": 0.18738803296309567, "grad_norm": 1.7454450130462646, "learning_rate": 9.029925021564595e-06, "loss": 0.9379, "step": 3138 }, { "epoch": 0.1874477487161113, "grad_norm": 2.296339988708496, "learning_rate": 9.029261495587553e-06, "loss": 0.9003, "step": 3139 }, { "epoch": 0.18750746446912694, "grad_norm": 2.225963830947876, "learning_rate": 9.02859796961051e-06, "loss": 0.9384, "step": 3140 }, { "epoch": 0.1875671802221426, "grad_norm": 2.3598973751068115, "learning_rate": 9.027934443633469e-06, "loss": 0.915, "step": 3141 }, { "epoch": 0.18762689597515825, "grad_norm": 2.595510244369507, "learning_rate": 9.027270917656427e-06, "loss": 0.8895, "step": 3142 }, { "epoch": 0.18768661172817389, "grad_norm": 2.5722484588623047, "learning_rate": 9.026607391679385e-06, "loss": 0.9401, "step": 3143 }, { "epoch": 0.18774632748118955, "grad_norm": 2.165654182434082, "learning_rate": 9.025943865702343e-06, "loss": 0.9287, "step": 3144 }, { "epoch": 0.1878060432342052, "grad_norm": 1.8270081281661987, "learning_rate": 9.025280339725301e-06, "loss": 0.9655, "step": 3145 }, { "epoch": 0.18786575898722083, "grad_norm": 1.8382525444030762, "learning_rate": 9.02461681374826e-06, "loss": 0.9648, "step": 3146 }, { "epoch": 0.18792547474023646, "grad_norm": 2.1212055683135986, "learning_rate": 9.023953287771216e-06, "loss": 0.9376, "step": 3147 }, { "epoch": 0.18798519049325213, "grad_norm": 2.9518537521362305, "learning_rate": 9.023289761794176e-06, "loss": 0.9105, "step": 3148 }, { "epoch": 0.18804490624626777, "grad_norm": 1.5546094179153442, "learning_rate": 9.022626235817134e-06, "loss": 0.9055, "step": 3149 }, { "epoch": 0.1881046219992834, "grad_norm": 1.7707730531692505, "learning_rate": 9.02196270984009e-06, "loss": 0.9185, "step": 3150 }, { "epoch": 0.18816433775229904, "grad_norm": 2.006234645843506, "learning_rate": 9.02129918386305e-06, "loss": 0.9171, "step": 3151 }, { "epoch": 0.1882240535053147, "grad_norm": 2.297419309616089, "learning_rate": 9.020635657886006e-06, "loss": 0.8941, "step": 3152 }, { "epoch": 0.18828376925833035, "grad_norm": 2.0092856884002686, "learning_rate": 9.019972131908964e-06, "loss": 0.9485, "step": 3153 }, { "epoch": 0.18834348501134598, "grad_norm": 2.8092658519744873, "learning_rate": 9.019308605931924e-06, "loss": 0.9637, "step": 3154 }, { "epoch": 0.18840320076436165, "grad_norm": 3.3733129501342773, "learning_rate": 9.01864507995488e-06, "loss": 0.9606, "step": 3155 }, { "epoch": 0.1884629165173773, "grad_norm": 2.0855507850646973, "learning_rate": 9.017981553977839e-06, "loss": 0.939, "step": 3156 }, { "epoch": 0.18852263227039293, "grad_norm": 2.944932222366333, "learning_rate": 9.017318028000797e-06, "loss": 0.8958, "step": 3157 }, { "epoch": 0.18858234802340856, "grad_norm": 2.12363862991333, "learning_rate": 9.016654502023755e-06, "loss": 0.936, "step": 3158 }, { "epoch": 0.18864206377642423, "grad_norm": 2.101557731628418, "learning_rate": 9.015990976046713e-06, "loss": 0.9059, "step": 3159 }, { "epoch": 0.18870177952943987, "grad_norm": 3.0544941425323486, "learning_rate": 9.015327450069671e-06, "loss": 0.894, "step": 3160 }, { "epoch": 0.1887614952824555, "grad_norm": 3.416999578475952, "learning_rate": 9.01466392409263e-06, "loss": 0.8892, "step": 3161 }, { "epoch": 0.18882121103547117, "grad_norm": 2.4099228382110596, "learning_rate": 9.014000398115587e-06, "loss": 0.9349, "step": 3162 }, { "epoch": 0.1888809267884868, "grad_norm": 2.5914413928985596, "learning_rate": 9.013336872138545e-06, "loss": 0.9199, "step": 3163 }, { "epoch": 0.18894064254150245, "grad_norm": 3.892718553543091, "learning_rate": 9.012673346161504e-06, "loss": 0.9518, "step": 3164 }, { "epoch": 0.18900035829451808, "grad_norm": 2.4993295669555664, "learning_rate": 9.01200982018446e-06, "loss": 0.9244, "step": 3165 }, { "epoch": 0.18906007404753375, "grad_norm": 2.3775265216827393, "learning_rate": 9.01134629420742e-06, "loss": 0.983, "step": 3166 }, { "epoch": 0.1891197898005494, "grad_norm": 2.372338056564331, "learning_rate": 9.010682768230376e-06, "loss": 0.9673, "step": 3167 }, { "epoch": 0.18917950555356502, "grad_norm": 2.3798325061798096, "learning_rate": 9.010019242253334e-06, "loss": 0.8966, "step": 3168 }, { "epoch": 0.18923922130658066, "grad_norm": 3.12447190284729, "learning_rate": 9.009355716276294e-06, "loss": 0.9722, "step": 3169 }, { "epoch": 0.18929893705959633, "grad_norm": 2.611337423324585, "learning_rate": 9.00869219029925e-06, "loss": 0.9218, "step": 3170 }, { "epoch": 0.18935865281261197, "grad_norm": 2.423475742340088, "learning_rate": 9.008028664322209e-06, "loss": 0.9435, "step": 3171 }, { "epoch": 0.1894183685656276, "grad_norm": 2.0932974815368652, "learning_rate": 9.007365138345167e-06, "loss": 0.9422, "step": 3172 }, { "epoch": 0.18947808431864327, "grad_norm": 2.093329429626465, "learning_rate": 9.006701612368125e-06, "loss": 0.9611, "step": 3173 }, { "epoch": 0.1895378000716589, "grad_norm": 2.6232845783233643, "learning_rate": 9.006038086391083e-06, "loss": 0.9417, "step": 3174 }, { "epoch": 0.18959751582467455, "grad_norm": 2.5876710414886475, "learning_rate": 9.005374560414041e-06, "loss": 0.9235, "step": 3175 }, { "epoch": 0.18965723157769018, "grad_norm": 1.9048247337341309, "learning_rate": 9.004711034436999e-06, "loss": 0.8927, "step": 3176 }, { "epoch": 0.18971694733070585, "grad_norm": 2.529872417449951, "learning_rate": 9.004047508459957e-06, "loss": 0.9034, "step": 3177 }, { "epoch": 0.1897766630837215, "grad_norm": 3.2408480644226074, "learning_rate": 9.003383982482915e-06, "loss": 0.9701, "step": 3178 }, { "epoch": 0.18983637883673712, "grad_norm": 3.2586543560028076, "learning_rate": 9.002720456505873e-06, "loss": 0.9395, "step": 3179 }, { "epoch": 0.1898960945897528, "grad_norm": 4.010632514953613, "learning_rate": 9.002056930528831e-06, "loss": 0.8806, "step": 3180 }, { "epoch": 0.18995581034276843, "grad_norm": 2.3573594093322754, "learning_rate": 9.00139340455179e-06, "loss": 0.9114, "step": 3181 }, { "epoch": 0.19001552609578407, "grad_norm": 2.550082206726074, "learning_rate": 9.000729878574746e-06, "loss": 0.9286, "step": 3182 }, { "epoch": 0.1900752418487997, "grad_norm": 2.2416129112243652, "learning_rate": 9.000066352597706e-06, "loss": 0.9027, "step": 3183 }, { "epoch": 0.19013495760181537, "grad_norm": 2.0289371013641357, "learning_rate": 8.999402826620664e-06, "loss": 0.9279, "step": 3184 }, { "epoch": 0.190194673354831, "grad_norm": 2.141396999359131, "learning_rate": 8.99873930064362e-06, "loss": 0.9333, "step": 3185 }, { "epoch": 0.19025438910784664, "grad_norm": 3.7326819896698, "learning_rate": 8.998075774666578e-06, "loss": 0.9104, "step": 3186 }, { "epoch": 0.19031410486086228, "grad_norm": 2.1914029121398926, "learning_rate": 8.997412248689536e-06, "loss": 0.9401, "step": 3187 }, { "epoch": 0.19037382061387795, "grad_norm": 1.9939093589782715, "learning_rate": 8.996748722712495e-06, "loss": 0.9512, "step": 3188 }, { "epoch": 0.19043353636689359, "grad_norm": 1.7227948904037476, "learning_rate": 8.996085196735453e-06, "loss": 0.8992, "step": 3189 }, { "epoch": 0.19049325211990922, "grad_norm": 2.659865617752075, "learning_rate": 8.99542167075841e-06, "loss": 0.9073, "step": 3190 }, { "epoch": 0.1905529678729249, "grad_norm": 2.5695080757141113, "learning_rate": 8.994758144781369e-06, "loss": 0.9016, "step": 3191 }, { "epoch": 0.19061268362594053, "grad_norm": 2.443331480026245, "learning_rate": 8.994094618804327e-06, "loss": 0.9002, "step": 3192 }, { "epoch": 0.19067239937895616, "grad_norm": 2.3638052940368652, "learning_rate": 8.993431092827285e-06, "loss": 0.9264, "step": 3193 }, { "epoch": 0.1907321151319718, "grad_norm": 2.346202850341797, "learning_rate": 8.992767566850243e-06, "loss": 0.9566, "step": 3194 }, { "epoch": 0.19079183088498747, "grad_norm": 2.16363525390625, "learning_rate": 8.992104040873201e-06, "loss": 0.9374, "step": 3195 }, { "epoch": 0.1908515466380031, "grad_norm": 2.320039987564087, "learning_rate": 8.99144051489616e-06, "loss": 0.9009, "step": 3196 }, { "epoch": 0.19091126239101874, "grad_norm": 1.8780782222747803, "learning_rate": 8.990776988919116e-06, "loss": 0.928, "step": 3197 }, { "epoch": 0.1909709781440344, "grad_norm": 2.519815683364868, "learning_rate": 8.990113462942076e-06, "loss": 0.9378, "step": 3198 }, { "epoch": 0.19103069389705005, "grad_norm": 2.0859756469726562, "learning_rate": 8.989449936965034e-06, "loss": 0.8829, "step": 3199 }, { "epoch": 0.19109040965006568, "grad_norm": 2.013587236404419, "learning_rate": 8.98878641098799e-06, "loss": 0.9297, "step": 3200 }, { "epoch": 0.19109040965006568, "eval_text_loss": 0.9539316892623901, "eval_text_runtime": 15.2206, "eval_text_samples_per_second": 262.801, "eval_text_steps_per_second": 0.526, "step": 3200 }, { "epoch": 0.19109040965006568, "eval_image_loss": 0.687603235244751, "eval_image_runtime": 4.9597, "eval_image_samples_per_second": 806.502, "eval_image_steps_per_second": 1.613, "step": 3200 }, { "epoch": 0.19109040965006568, "eval_video_loss": 1.1583529710769653, "eval_video_runtime": 76.2239, "eval_video_samples_per_second": 52.477, "eval_video_steps_per_second": 0.105, "step": 3200 }, { "epoch": 0.19115012540308132, "grad_norm": 1.8550992012023926, "learning_rate": 8.98812288501095e-06, "loss": 0.9366, "step": 3201 }, { "epoch": 0.191209841156097, "grad_norm": 3.3500425815582275, "learning_rate": 8.987459359033906e-06, "loss": 0.9375, "step": 3202 }, { "epoch": 0.19126955690911263, "grad_norm": 3.230117082595825, "learning_rate": 8.986795833056864e-06, "loss": 0.9486, "step": 3203 }, { "epoch": 0.19132927266212826, "grad_norm": 4.111449718475342, "learning_rate": 8.986132307079824e-06, "loss": 0.9206, "step": 3204 }, { "epoch": 0.1913889884151439, "grad_norm": 2.6111180782318115, "learning_rate": 8.98546878110278e-06, "loss": 0.94, "step": 3205 }, { "epoch": 0.19144870416815957, "grad_norm": 3.797515392303467, "learning_rate": 8.984805255125739e-06, "loss": 0.9327, "step": 3206 }, { "epoch": 0.1915084199211752, "grad_norm": 2.2833731174468994, "learning_rate": 8.984141729148697e-06, "loss": 0.9307, "step": 3207 }, { "epoch": 0.19156813567419084, "grad_norm": 2.2634212970733643, "learning_rate": 8.983478203171655e-06, "loss": 0.9326, "step": 3208 }, { "epoch": 0.1916278514272065, "grad_norm": 2.2744688987731934, "learning_rate": 8.982814677194613e-06, "loss": 0.9101, "step": 3209 }, { "epoch": 0.19168756718022215, "grad_norm": 2.3616654872894287, "learning_rate": 8.982151151217571e-06, "loss": 0.8781, "step": 3210 }, { "epoch": 0.19174728293323778, "grad_norm": 1.9998209476470947, "learning_rate": 8.98148762524053e-06, "loss": 0.9672, "step": 3211 }, { "epoch": 0.19180699868625342, "grad_norm": 2.7433764934539795, "learning_rate": 8.980824099263487e-06, "loss": 0.9251, "step": 3212 }, { "epoch": 0.1918667144392691, "grad_norm": 2.6133806705474854, "learning_rate": 8.980160573286445e-06, "loss": 0.9584, "step": 3213 }, { "epoch": 0.19192643019228472, "grad_norm": 3.0228583812713623, "learning_rate": 8.979497047309403e-06, "loss": 0.9343, "step": 3214 }, { "epoch": 0.19198614594530036, "grad_norm": 2.2328386306762695, "learning_rate": 8.97883352133236e-06, "loss": 0.908, "step": 3215 }, { "epoch": 0.19204586169831603, "grad_norm": 2.120366096496582, "learning_rate": 8.97816999535532e-06, "loss": 0.9163, "step": 3216 }, { "epoch": 0.19210557745133167, "grad_norm": 3.1703083515167236, "learning_rate": 8.977506469378276e-06, "loss": 0.9411, "step": 3217 }, { "epoch": 0.1921652932043473, "grad_norm": 2.380579710006714, "learning_rate": 8.976842943401234e-06, "loss": 0.9282, "step": 3218 }, { "epoch": 0.19222500895736294, "grad_norm": 2.0447418689727783, "learning_rate": 8.976179417424194e-06, "loss": 0.9168, "step": 3219 }, { "epoch": 0.1922847247103786, "grad_norm": 3.567750930786133, "learning_rate": 8.97551589144715e-06, "loss": 0.9328, "step": 3220 }, { "epoch": 0.19234444046339425, "grad_norm": 7.813692092895508, "learning_rate": 8.974852365470109e-06, "loss": 0.9129, "step": 3221 }, { "epoch": 0.19240415621640988, "grad_norm": 2.0921761989593506, "learning_rate": 8.974188839493067e-06, "loss": 0.9151, "step": 3222 }, { "epoch": 0.19246387196942552, "grad_norm": 3.4487133026123047, "learning_rate": 8.973525313516025e-06, "loss": 0.9612, "step": 3223 }, { "epoch": 0.1925235877224412, "grad_norm": 2.8912301063537598, "learning_rate": 8.972861787538983e-06, "loss": 0.9276, "step": 3224 }, { "epoch": 0.19258330347545682, "grad_norm": 2.6648495197296143, "learning_rate": 8.972198261561941e-06, "loss": 0.9095, "step": 3225 }, { "epoch": 0.19264301922847246, "grad_norm": 2.3635926246643066, "learning_rate": 8.971534735584899e-06, "loss": 0.9295, "step": 3226 }, { "epoch": 0.19270273498148813, "grad_norm": 2.122685670852661, "learning_rate": 8.970871209607857e-06, "loss": 0.9479, "step": 3227 }, { "epoch": 0.19276245073450377, "grad_norm": 1.7140756845474243, "learning_rate": 8.970207683630815e-06, "loss": 0.9114, "step": 3228 }, { "epoch": 0.1928221664875194, "grad_norm": 2.37955904006958, "learning_rate": 8.969544157653773e-06, "loss": 0.9316, "step": 3229 }, { "epoch": 0.19288188224053504, "grad_norm": 3.3083786964416504, "learning_rate": 8.968880631676731e-06, "loss": 0.9074, "step": 3230 }, { "epoch": 0.1929415979935507, "grad_norm": 2.4805808067321777, "learning_rate": 8.96821710569969e-06, "loss": 0.9381, "step": 3231 }, { "epoch": 0.19300131374656634, "grad_norm": 1.7345174551010132, "learning_rate": 8.967553579722646e-06, "loss": 0.9044, "step": 3232 }, { "epoch": 0.19306102949958198, "grad_norm": 6.977373123168945, "learning_rate": 8.966890053745606e-06, "loss": 0.9182, "step": 3233 }, { "epoch": 0.19312074525259765, "grad_norm": 2.3878700733184814, "learning_rate": 8.966226527768564e-06, "loss": 0.9162, "step": 3234 }, { "epoch": 0.19318046100561329, "grad_norm": 1.9443367719650269, "learning_rate": 8.96556300179152e-06, "loss": 0.9024, "step": 3235 }, { "epoch": 0.19324017675862892, "grad_norm": 4.586809158325195, "learning_rate": 8.964899475814478e-06, "loss": 0.907, "step": 3236 }, { "epoch": 0.19329989251164456, "grad_norm": 3.50197434425354, "learning_rate": 8.964235949837436e-06, "loss": 0.9174, "step": 3237 }, { "epoch": 0.19335960826466023, "grad_norm": 2.51914644241333, "learning_rate": 8.963572423860395e-06, "loss": 0.9399, "step": 3238 }, { "epoch": 0.19341932401767586, "grad_norm": 1.9355599880218506, "learning_rate": 8.962908897883353e-06, "loss": 0.9046, "step": 3239 }, { "epoch": 0.1934790397706915, "grad_norm": 3.801457643508911, "learning_rate": 8.96224537190631e-06, "loss": 0.9358, "step": 3240 }, { "epoch": 0.19353875552370717, "grad_norm": 3.3705687522888184, "learning_rate": 8.961581845929269e-06, "loss": 0.9457, "step": 3241 }, { "epoch": 0.1935984712767228, "grad_norm": 4.8189616203308105, "learning_rate": 8.960918319952227e-06, "loss": 0.9101, "step": 3242 }, { "epoch": 0.19365818702973844, "grad_norm": 2.7183592319488525, "learning_rate": 8.960254793975185e-06, "loss": 0.8599, "step": 3243 }, { "epoch": 0.19371790278275408, "grad_norm": 4.488056659698486, "learning_rate": 8.959591267998143e-06, "loss": 0.9228, "step": 3244 }, { "epoch": 0.19377761853576975, "grad_norm": 1.7417351007461548, "learning_rate": 8.958927742021101e-06, "loss": 0.8618, "step": 3245 }, { "epoch": 0.19383733428878538, "grad_norm": 2.4717249870300293, "learning_rate": 8.95826421604406e-06, "loss": 0.9482, "step": 3246 }, { "epoch": 0.19389705004180102, "grad_norm": 3.2188315391540527, "learning_rate": 8.957600690067016e-06, "loss": 0.9379, "step": 3247 }, { "epoch": 0.19395676579481666, "grad_norm": 3.3168559074401855, "learning_rate": 8.956937164089976e-06, "loss": 0.9343, "step": 3248 }, { "epoch": 0.19401648154783233, "grad_norm": 2.541382312774658, "learning_rate": 8.956273638112934e-06, "loss": 0.9231, "step": 3249 }, { "epoch": 0.19407619730084796, "grad_norm": 3.34621524810791, "learning_rate": 8.95561011213589e-06, "loss": 0.9134, "step": 3250 }, { "epoch": 0.1941359130538636, "grad_norm": 2.1328203678131104, "learning_rate": 8.95494658615885e-06, "loss": 0.9472, "step": 3251 }, { "epoch": 0.19419562880687927, "grad_norm": 3.201634168624878, "learning_rate": 8.954283060181806e-06, "loss": 0.9727, "step": 3252 }, { "epoch": 0.1942553445598949, "grad_norm": 3.59269118309021, "learning_rate": 8.953619534204764e-06, "loss": 0.897, "step": 3253 }, { "epoch": 0.19431506031291054, "grad_norm": 3.3035874366760254, "learning_rate": 8.952956008227724e-06, "loss": 0.9458, "step": 3254 }, { "epoch": 0.19437477606592618, "grad_norm": 3.5684409141540527, "learning_rate": 8.95229248225068e-06, "loss": 0.9277, "step": 3255 }, { "epoch": 0.19443449181894185, "grad_norm": 2.1574926376342773, "learning_rate": 8.951628956273639e-06, "loss": 0.9207, "step": 3256 }, { "epoch": 0.19449420757195748, "grad_norm": 2.5121190547943115, "learning_rate": 8.950965430296597e-06, "loss": 0.9304, "step": 3257 }, { "epoch": 0.19455392332497312, "grad_norm": 2.9619646072387695, "learning_rate": 8.950301904319555e-06, "loss": 0.9355, "step": 3258 }, { "epoch": 0.1946136390779888, "grad_norm": 1.8969885110855103, "learning_rate": 8.949638378342513e-06, "loss": 0.9257, "step": 3259 }, { "epoch": 0.19467335483100442, "grad_norm": 2.6609318256378174, "learning_rate": 8.948974852365471e-06, "loss": 0.9017, "step": 3260 }, { "epoch": 0.19473307058402006, "grad_norm": 2.8278133869171143, "learning_rate": 8.948311326388429e-06, "loss": 0.9228, "step": 3261 }, { "epoch": 0.1947927863370357, "grad_norm": 2.493496894836426, "learning_rate": 8.947647800411387e-06, "loss": 0.8777, "step": 3262 }, { "epoch": 0.19485250209005137, "grad_norm": 1.8900476694107056, "learning_rate": 8.946984274434345e-06, "loss": 0.8933, "step": 3263 }, { "epoch": 0.194912217843067, "grad_norm": 3.7199268341064453, "learning_rate": 8.946320748457303e-06, "loss": 0.9204, "step": 3264 }, { "epoch": 0.19497193359608264, "grad_norm": 2.3370587825775146, "learning_rate": 8.94565722248026e-06, "loss": 0.8995, "step": 3265 }, { "epoch": 0.19503164934909828, "grad_norm": 4.5803303718566895, "learning_rate": 8.94499369650322e-06, "loss": 0.9107, "step": 3266 }, { "epoch": 0.19509136510211394, "grad_norm": 2.7158827781677246, "learning_rate": 8.944330170526176e-06, "loss": 0.9187, "step": 3267 }, { "epoch": 0.19515108085512958, "grad_norm": 3.4932727813720703, "learning_rate": 8.943666644549134e-06, "loss": 0.8792, "step": 3268 }, { "epoch": 0.19521079660814522, "grad_norm": 2.201427698135376, "learning_rate": 8.943003118572094e-06, "loss": 0.9631, "step": 3269 }, { "epoch": 0.1952705123611609, "grad_norm": 3.4391555786132812, "learning_rate": 8.94233959259505e-06, "loss": 0.9048, "step": 3270 }, { "epoch": 0.19533022811417652, "grad_norm": 3.7274773120880127, "learning_rate": 8.941676066618008e-06, "loss": 0.9295, "step": 3271 }, { "epoch": 0.19538994386719216, "grad_norm": 2.2427103519439697, "learning_rate": 8.941012540640967e-06, "loss": 0.9044, "step": 3272 }, { "epoch": 0.1954496596202078, "grad_norm": 1.5048335790634155, "learning_rate": 8.940349014663925e-06, "loss": 0.9321, "step": 3273 }, { "epoch": 0.19550937537322347, "grad_norm": 5.0823893547058105, "learning_rate": 8.939685488686883e-06, "loss": 0.9509, "step": 3274 }, { "epoch": 0.1955690911262391, "grad_norm": 2.580021381378174, "learning_rate": 8.939021962709841e-06, "loss": 0.868, "step": 3275 }, { "epoch": 0.19562880687925474, "grad_norm": 14.79921817779541, "learning_rate": 8.938358436732799e-06, "loss": 0.9426, "step": 3276 }, { "epoch": 0.1956885226322704, "grad_norm": 1.8933038711547852, "learning_rate": 8.937694910755757e-06, "loss": 0.9016, "step": 3277 }, { "epoch": 0.19574823838528604, "grad_norm": 3.804062843322754, "learning_rate": 8.937031384778715e-06, "loss": 0.9328, "step": 3278 }, { "epoch": 0.19580795413830168, "grad_norm": 2.55600643157959, "learning_rate": 8.936367858801673e-06, "loss": 0.9421, "step": 3279 }, { "epoch": 0.19586766989131732, "grad_norm": 2.497607946395874, "learning_rate": 8.935704332824631e-06, "loss": 0.9298, "step": 3280 }, { "epoch": 0.19592738564433299, "grad_norm": 1.8930959701538086, "learning_rate": 8.93504080684759e-06, "loss": 0.9563, "step": 3281 }, { "epoch": 0.19598710139734862, "grad_norm": 3.046433210372925, "learning_rate": 8.934377280870546e-06, "loss": 0.9054, "step": 3282 }, { "epoch": 0.19604681715036426, "grad_norm": 2.590923547744751, "learning_rate": 8.933713754893506e-06, "loss": 0.9311, "step": 3283 }, { "epoch": 0.1961065329033799, "grad_norm": 2.002530097961426, "learning_rate": 8.933050228916464e-06, "loss": 0.9276, "step": 3284 }, { "epoch": 0.19616624865639556, "grad_norm": 1.8921226263046265, "learning_rate": 8.93238670293942e-06, "loss": 0.9228, "step": 3285 }, { "epoch": 0.1962259644094112, "grad_norm": 2.4231116771698, "learning_rate": 8.93172317696238e-06, "loss": 0.9057, "step": 3286 }, { "epoch": 0.19628568016242684, "grad_norm": 2.090277910232544, "learning_rate": 8.931059650985336e-06, "loss": 0.8837, "step": 3287 }, { "epoch": 0.1963453959154425, "grad_norm": 1.6943961381912231, "learning_rate": 8.930396125008294e-06, "loss": 0.9029, "step": 3288 }, { "epoch": 0.19640511166845814, "grad_norm": 2.3685007095336914, "learning_rate": 8.929732599031253e-06, "loss": 0.9039, "step": 3289 }, { "epoch": 0.19646482742147378, "grad_norm": 2.1684815883636475, "learning_rate": 8.92906907305421e-06, "loss": 0.9134, "step": 3290 }, { "epoch": 0.19652454317448942, "grad_norm": 1.7893949747085571, "learning_rate": 8.928405547077169e-06, "loss": 0.8807, "step": 3291 }, { "epoch": 0.19658425892750508, "grad_norm": 3.37268328666687, "learning_rate": 8.927742021100127e-06, "loss": 0.9501, "step": 3292 }, { "epoch": 0.19664397468052072, "grad_norm": 2.6071815490722656, "learning_rate": 8.927078495123085e-06, "loss": 0.9606, "step": 3293 }, { "epoch": 0.19670369043353636, "grad_norm": 2.213508129119873, "learning_rate": 8.926414969146043e-06, "loss": 0.9302, "step": 3294 }, { "epoch": 0.19676340618655203, "grad_norm": 2.144961357116699, "learning_rate": 8.925751443169001e-06, "loss": 0.8919, "step": 3295 }, { "epoch": 0.19682312193956766, "grad_norm": 2.969162702560425, "learning_rate": 8.92508791719196e-06, "loss": 0.95, "step": 3296 }, { "epoch": 0.1968828376925833, "grad_norm": 2.323972225189209, "learning_rate": 8.924424391214916e-06, "loss": 0.9191, "step": 3297 }, { "epoch": 0.19694255344559894, "grad_norm": 2.9398484230041504, "learning_rate": 8.923760865237875e-06, "loss": 0.9246, "step": 3298 }, { "epoch": 0.1970022691986146, "grad_norm": 4.112067222595215, "learning_rate": 8.923097339260834e-06, "loss": 0.9332, "step": 3299 }, { "epoch": 0.19706198495163024, "grad_norm": 1.806479811668396, "learning_rate": 8.92243381328379e-06, "loss": 0.874, "step": 3300 }, { "epoch": 0.19712170070464588, "grad_norm": 1.945587158203125, "learning_rate": 8.92177028730675e-06, "loss": 0.9147, "step": 3301 }, { "epoch": 0.19718141645766152, "grad_norm": 3.0966484546661377, "learning_rate": 8.921106761329706e-06, "loss": 0.8984, "step": 3302 }, { "epoch": 0.19724113221067718, "grad_norm": 1.8168975114822388, "learning_rate": 8.920443235352664e-06, "loss": 0.9012, "step": 3303 }, { "epoch": 0.19730084796369282, "grad_norm": 2.467531204223633, "learning_rate": 8.919779709375624e-06, "loss": 0.9246, "step": 3304 }, { "epoch": 0.19736056371670846, "grad_norm": 1.5517065525054932, "learning_rate": 8.91911618339858e-06, "loss": 0.9157, "step": 3305 }, { "epoch": 0.19742027946972412, "grad_norm": 2.398730993270874, "learning_rate": 8.918452657421539e-06, "loss": 0.9185, "step": 3306 }, { "epoch": 0.19747999522273976, "grad_norm": 1.7818669080734253, "learning_rate": 8.917789131444497e-06, "loss": 0.8887, "step": 3307 }, { "epoch": 0.1975397109757554, "grad_norm": 2.691899061203003, "learning_rate": 8.917125605467455e-06, "loss": 0.9246, "step": 3308 }, { "epoch": 0.19759942672877104, "grad_norm": 2.8124923706054688, "learning_rate": 8.916462079490413e-06, "loss": 0.9435, "step": 3309 }, { "epoch": 0.1976591424817867, "grad_norm": 1.9348177909851074, "learning_rate": 8.915798553513371e-06, "loss": 0.9401, "step": 3310 }, { "epoch": 0.19771885823480234, "grad_norm": 3.260023355484009, "learning_rate": 8.915135027536329e-06, "loss": 0.9099, "step": 3311 }, { "epoch": 0.19777857398781798, "grad_norm": 1.719078540802002, "learning_rate": 8.914471501559287e-06, "loss": 0.925, "step": 3312 }, { "epoch": 0.19783828974083364, "grad_norm": 2.1873669624328613, "learning_rate": 8.913807975582245e-06, "loss": 0.9173, "step": 3313 }, { "epoch": 0.19789800549384928, "grad_norm": 1.5969825983047485, "learning_rate": 8.913144449605203e-06, "loss": 0.8964, "step": 3314 }, { "epoch": 0.19795772124686492, "grad_norm": 1.9972409009933472, "learning_rate": 8.912480923628161e-06, "loss": 0.9316, "step": 3315 }, { "epoch": 0.19801743699988056, "grad_norm": 2.6418681144714355, "learning_rate": 8.91181739765112e-06, "loss": 0.9221, "step": 3316 }, { "epoch": 0.19807715275289622, "grad_norm": 2.9931764602661133, "learning_rate": 8.911153871674076e-06, "loss": 0.9542, "step": 3317 }, { "epoch": 0.19813686850591186, "grad_norm": 2.870511054992676, "learning_rate": 8.910490345697034e-06, "loss": 0.9476, "step": 3318 }, { "epoch": 0.1981965842589275, "grad_norm": 3.327174425125122, "learning_rate": 8.909826819719994e-06, "loss": 0.917, "step": 3319 }, { "epoch": 0.19825630001194314, "grad_norm": 8.794565200805664, "learning_rate": 8.90916329374295e-06, "loss": 0.928, "step": 3320 }, { "epoch": 0.1983160157649588, "grad_norm": 2.990234375, "learning_rate": 8.908499767765908e-06, "loss": 0.9498, "step": 3321 }, { "epoch": 0.19837573151797444, "grad_norm": 2.5087242126464844, "learning_rate": 8.907836241788866e-06, "loss": 0.9011, "step": 3322 }, { "epoch": 0.19843544727099008, "grad_norm": 1.8168305158615112, "learning_rate": 8.907172715811825e-06, "loss": 0.9225, "step": 3323 }, { "epoch": 0.19849516302400574, "grad_norm": 1.956884741783142, "learning_rate": 8.906509189834783e-06, "loss": 0.9203, "step": 3324 }, { "epoch": 0.19855487877702138, "grad_norm": 3.249246835708618, "learning_rate": 8.90584566385774e-06, "loss": 0.954, "step": 3325 }, { "epoch": 0.19861459453003702, "grad_norm": 2.165188789367676, "learning_rate": 8.905182137880699e-06, "loss": 0.9418, "step": 3326 }, { "epoch": 0.19867431028305266, "grad_norm": 2.3182201385498047, "learning_rate": 8.904518611903657e-06, "loss": 0.9308, "step": 3327 }, { "epoch": 0.19873402603606832, "grad_norm": 5.1954851150512695, "learning_rate": 8.903855085926615e-06, "loss": 0.9414, "step": 3328 }, { "epoch": 0.19879374178908396, "grad_norm": 2.1918904781341553, "learning_rate": 8.903191559949573e-06, "loss": 0.9267, "step": 3329 }, { "epoch": 0.1988534575420996, "grad_norm": 2.736314535140991, "learning_rate": 8.902528033972531e-06, "loss": 0.9107, "step": 3330 }, { "epoch": 0.19891317329511526, "grad_norm": 2.1920955181121826, "learning_rate": 8.90186450799549e-06, "loss": 0.9065, "step": 3331 }, { "epoch": 0.1989728890481309, "grad_norm": 1.4689584970474243, "learning_rate": 8.901200982018446e-06, "loss": 0.9039, "step": 3332 }, { "epoch": 0.19903260480114654, "grad_norm": 2.4931180477142334, "learning_rate": 8.900537456041406e-06, "loss": 0.9366, "step": 3333 }, { "epoch": 0.19909232055416218, "grad_norm": 6.243896961212158, "learning_rate": 8.899873930064364e-06, "loss": 0.9059, "step": 3334 }, { "epoch": 0.19915203630717784, "grad_norm": 1.8010313510894775, "learning_rate": 8.89921040408732e-06, "loss": 0.9063, "step": 3335 }, { "epoch": 0.19921175206019348, "grad_norm": 1.8680100440979004, "learning_rate": 8.89854687811028e-06, "loss": 0.9152, "step": 3336 }, { "epoch": 0.19927146781320912, "grad_norm": 1.9554872512817383, "learning_rate": 8.897883352133236e-06, "loss": 0.9052, "step": 3337 }, { "epoch": 0.19933118356622476, "grad_norm": 2.63260555267334, "learning_rate": 8.897219826156194e-06, "loss": 0.9249, "step": 3338 }, { "epoch": 0.19939089931924042, "grad_norm": 2.191436290740967, "learning_rate": 8.896556300179153e-06, "loss": 0.8917, "step": 3339 }, { "epoch": 0.19945061507225606, "grad_norm": 2.2073724269866943, "learning_rate": 8.89589277420211e-06, "loss": 0.9264, "step": 3340 }, { "epoch": 0.1995103308252717, "grad_norm": 5.57857608795166, "learning_rate": 8.895229248225069e-06, "loss": 0.9471, "step": 3341 }, { "epoch": 0.19957004657828736, "grad_norm": 5.665574073791504, "learning_rate": 8.894565722248027e-06, "loss": 0.9234, "step": 3342 }, { "epoch": 0.199629762331303, "grad_norm": 2.4347825050354004, "learning_rate": 8.893902196270985e-06, "loss": 0.9293, "step": 3343 }, { "epoch": 0.19968947808431864, "grad_norm": 2.3190114498138428, "learning_rate": 8.893238670293943e-06, "loss": 0.8938, "step": 3344 }, { "epoch": 0.19974919383733428, "grad_norm": 1.9059354066848755, "learning_rate": 8.892575144316901e-06, "loss": 0.9525, "step": 3345 }, { "epoch": 0.19980890959034994, "grad_norm": 2.296891927719116, "learning_rate": 8.89191161833986e-06, "loss": 0.8745, "step": 3346 }, { "epoch": 0.19986862534336558, "grad_norm": 3.3314030170440674, "learning_rate": 8.891248092362816e-06, "loss": 0.919, "step": 3347 }, { "epoch": 0.19992834109638122, "grad_norm": 1.9845548868179321, "learning_rate": 8.890584566385775e-06, "loss": 0.9435, "step": 3348 }, { "epoch": 0.19998805684939688, "grad_norm": 2.017897367477417, "learning_rate": 8.889921040408734e-06, "loss": 0.9297, "step": 3349 }, { "epoch": 0.20004777260241252, "grad_norm": 2.7166147232055664, "learning_rate": 8.88925751443169e-06, "loss": 0.9223, "step": 3350 }, { "epoch": 0.20010748835542816, "grad_norm": 2.465357542037964, "learning_rate": 8.88859398845465e-06, "loss": 0.9246, "step": 3351 }, { "epoch": 0.2001672041084438, "grad_norm": 1.5845870971679688, "learning_rate": 8.887930462477606e-06, "loss": 0.9501, "step": 3352 }, { "epoch": 0.20022691986145946, "grad_norm": 2.123839855194092, "learning_rate": 8.887266936500564e-06, "loss": 0.941, "step": 3353 }, { "epoch": 0.2002866356144751, "grad_norm": 1.9751282930374146, "learning_rate": 8.886603410523524e-06, "loss": 0.9172, "step": 3354 }, { "epoch": 0.20034635136749074, "grad_norm": 2.1994664669036865, "learning_rate": 8.88593988454648e-06, "loss": 0.8797, "step": 3355 }, { "epoch": 0.20040606712050638, "grad_norm": 2.4357242584228516, "learning_rate": 8.885276358569439e-06, "loss": 0.9266, "step": 3356 }, { "epoch": 0.20046578287352204, "grad_norm": 4.976998805999756, "learning_rate": 8.884612832592397e-06, "loss": 0.8835, "step": 3357 }, { "epoch": 0.20052549862653768, "grad_norm": 2.2554900646209717, "learning_rate": 8.883949306615355e-06, "loss": 0.9266, "step": 3358 }, { "epoch": 0.20058521437955332, "grad_norm": 3.6387343406677246, "learning_rate": 8.883285780638313e-06, "loss": 0.9281, "step": 3359 }, { "epoch": 0.20064493013256898, "grad_norm": 3.699528694152832, "learning_rate": 8.882622254661271e-06, "loss": 0.9656, "step": 3360 }, { "epoch": 0.20070464588558462, "grad_norm": 2.056601047515869, "learning_rate": 8.881958728684229e-06, "loss": 0.9163, "step": 3361 }, { "epoch": 0.20076436163860026, "grad_norm": 3.458517074584961, "learning_rate": 8.881295202707187e-06, "loss": 0.9191, "step": 3362 }, { "epoch": 0.2008240773916159, "grad_norm": 1.8905346393585205, "learning_rate": 8.880631676730145e-06, "loss": 0.905, "step": 3363 }, { "epoch": 0.20088379314463156, "grad_norm": 3.7912020683288574, "learning_rate": 8.879968150753103e-06, "loss": 0.9384, "step": 3364 }, { "epoch": 0.2009435088976472, "grad_norm": 2.2298505306243896, "learning_rate": 8.879304624776061e-06, "loss": 0.9334, "step": 3365 }, { "epoch": 0.20100322465066284, "grad_norm": 2.3068222999572754, "learning_rate": 8.87864109879902e-06, "loss": 0.8765, "step": 3366 }, { "epoch": 0.2010629404036785, "grad_norm": 1.8179843425750732, "learning_rate": 8.877977572821976e-06, "loss": 0.9003, "step": 3367 }, { "epoch": 0.20112265615669414, "grad_norm": 2.7003769874572754, "learning_rate": 8.877314046844934e-06, "loss": 0.8994, "step": 3368 }, { "epoch": 0.20118237190970978, "grad_norm": 1.9298497438430786, "learning_rate": 8.876650520867894e-06, "loss": 0.9065, "step": 3369 }, { "epoch": 0.20124208766272542, "grad_norm": 2.227151393890381, "learning_rate": 8.87598699489085e-06, "loss": 0.9392, "step": 3370 }, { "epoch": 0.20130180341574108, "grad_norm": 2.730879306793213, "learning_rate": 8.875323468913808e-06, "loss": 0.9233, "step": 3371 }, { "epoch": 0.20136151916875672, "grad_norm": 2.054563045501709, "learning_rate": 8.874659942936766e-06, "loss": 0.9408, "step": 3372 }, { "epoch": 0.20142123492177236, "grad_norm": 2.081146001815796, "learning_rate": 8.873996416959725e-06, "loss": 0.8938, "step": 3373 }, { "epoch": 0.201480950674788, "grad_norm": 2.4980921745300293, "learning_rate": 8.873332890982683e-06, "loss": 0.9341, "step": 3374 }, { "epoch": 0.20154066642780366, "grad_norm": 2.1734485626220703, "learning_rate": 8.87266936500564e-06, "loss": 0.9488, "step": 3375 }, { "epoch": 0.2016003821808193, "grad_norm": 2.2924580574035645, "learning_rate": 8.872005839028599e-06, "loss": 0.9021, "step": 3376 }, { "epoch": 0.20166009793383494, "grad_norm": 2.2276084423065186, "learning_rate": 8.871342313051557e-06, "loss": 0.9268, "step": 3377 }, { "epoch": 0.2017198136868506, "grad_norm": 3.5791735649108887, "learning_rate": 8.870678787074515e-06, "loss": 0.9162, "step": 3378 }, { "epoch": 0.20177952943986624, "grad_norm": 2.313228130340576, "learning_rate": 8.870015261097473e-06, "loss": 0.9101, "step": 3379 }, { "epoch": 0.20183924519288188, "grad_norm": 9.215046882629395, "learning_rate": 8.869351735120431e-06, "loss": 0.9365, "step": 3380 }, { "epoch": 0.20189896094589752, "grad_norm": 1.9466801881790161, "learning_rate": 8.86868820914339e-06, "loss": 0.9407, "step": 3381 }, { "epoch": 0.20195867669891318, "grad_norm": 1.8612056970596313, "learning_rate": 8.868024683166346e-06, "loss": 0.9103, "step": 3382 }, { "epoch": 0.20201839245192882, "grad_norm": 1.9457859992980957, "learning_rate": 8.867361157189306e-06, "loss": 0.8906, "step": 3383 }, { "epoch": 0.20207810820494446, "grad_norm": 3.1250100135803223, "learning_rate": 8.866697631212264e-06, "loss": 0.9578, "step": 3384 }, { "epoch": 0.20213782395796012, "grad_norm": 1.954760193824768, "learning_rate": 8.86603410523522e-06, "loss": 0.8848, "step": 3385 }, { "epoch": 0.20219753971097576, "grad_norm": 2.3090288639068604, "learning_rate": 8.86537057925818e-06, "loss": 0.8966, "step": 3386 }, { "epoch": 0.2022572554639914, "grad_norm": 2.0011141300201416, "learning_rate": 8.864707053281136e-06, "loss": 0.9297, "step": 3387 }, { "epoch": 0.20231697121700704, "grad_norm": 3.706047773361206, "learning_rate": 8.864043527304094e-06, "loss": 0.9331, "step": 3388 }, { "epoch": 0.2023766869700227, "grad_norm": 2.0224742889404297, "learning_rate": 8.863380001327052e-06, "loss": 0.9141, "step": 3389 }, { "epoch": 0.20243640272303834, "grad_norm": 2.406984806060791, "learning_rate": 8.86271647535001e-06, "loss": 0.9205, "step": 3390 }, { "epoch": 0.20249611847605398, "grad_norm": 4.316741466522217, "learning_rate": 8.862052949372969e-06, "loss": 0.92, "step": 3391 }, { "epoch": 0.20255583422906964, "grad_norm": 2.470766544342041, "learning_rate": 8.861389423395927e-06, "loss": 0.9291, "step": 3392 }, { "epoch": 0.20261554998208528, "grad_norm": 2.018563985824585, "learning_rate": 8.860725897418885e-06, "loss": 0.9091, "step": 3393 }, { "epoch": 0.20267526573510092, "grad_norm": 1.8882694244384766, "learning_rate": 8.860062371441843e-06, "loss": 0.9356, "step": 3394 }, { "epoch": 0.20273498148811656, "grad_norm": 2.0854668617248535, "learning_rate": 8.859398845464801e-06, "loss": 0.9353, "step": 3395 }, { "epoch": 0.20279469724113222, "grad_norm": 2.1037871837615967, "learning_rate": 8.858735319487759e-06, "loss": 0.9263, "step": 3396 }, { "epoch": 0.20285441299414786, "grad_norm": 1.8034480810165405, "learning_rate": 8.858071793510716e-06, "loss": 0.9196, "step": 3397 }, { "epoch": 0.2029141287471635, "grad_norm": 2.686551332473755, "learning_rate": 8.857408267533675e-06, "loss": 0.9054, "step": 3398 }, { "epoch": 0.20297384450017913, "grad_norm": 2.739936351776123, "learning_rate": 8.856744741556633e-06, "loss": 0.9647, "step": 3399 }, { "epoch": 0.2030335602531948, "grad_norm": 2.181790351867676, "learning_rate": 8.85608121557959e-06, "loss": 0.9114, "step": 3400 }, { "epoch": 0.2030335602531948, "eval_text_loss": 0.9542568922042847, "eval_text_runtime": 15.515, "eval_text_samples_per_second": 257.815, "eval_text_steps_per_second": 0.516, "step": 3400 }, { "epoch": 0.2030335602531948, "eval_image_loss": 0.6829699277877808, "eval_image_runtime": 5.4606, "eval_image_samples_per_second": 732.516, "eval_image_steps_per_second": 1.465, "step": 3400 }, { "epoch": 0.2030335602531948, "eval_video_loss": 1.155517339706421, "eval_video_runtime": 76.4153, "eval_video_samples_per_second": 52.346, "eval_video_steps_per_second": 0.105, "step": 3400 }, { "epoch": 0.20309327600621044, "grad_norm": 1.8753149509429932, "learning_rate": 8.85541768960255e-06, "loss": 0.9108, "step": 3401 }, { "epoch": 0.20315299175922608, "grad_norm": 2.757599353790283, "learning_rate": 8.854754163625506e-06, "loss": 0.9221, "step": 3402 }, { "epoch": 0.20321270751224174, "grad_norm": 2.188488245010376, "learning_rate": 8.854090637648464e-06, "loss": 0.919, "step": 3403 }, { "epoch": 0.20327242326525738, "grad_norm": 2.1181209087371826, "learning_rate": 8.853427111671424e-06, "loss": 0.8979, "step": 3404 }, { "epoch": 0.20333213901827302, "grad_norm": 1.7904917001724243, "learning_rate": 8.85276358569438e-06, "loss": 0.9336, "step": 3405 }, { "epoch": 0.20339185477128865, "grad_norm": 1.9242708683013916, "learning_rate": 8.852100059717338e-06, "loss": 0.9121, "step": 3406 }, { "epoch": 0.20345157052430432, "grad_norm": 4.025369644165039, "learning_rate": 8.851436533740297e-06, "loss": 0.9219, "step": 3407 }, { "epoch": 0.20351128627731996, "grad_norm": 2.61184024810791, "learning_rate": 8.850773007763255e-06, "loss": 0.9407, "step": 3408 }, { "epoch": 0.2035710020303356, "grad_norm": 1.6736689805984497, "learning_rate": 8.850109481786213e-06, "loss": 0.9403, "step": 3409 }, { "epoch": 0.20363071778335126, "grad_norm": 2.210963487625122, "learning_rate": 8.849445955809171e-06, "loss": 0.9475, "step": 3410 }, { "epoch": 0.2036904335363669, "grad_norm": 2.1761202812194824, "learning_rate": 8.848782429832129e-06, "loss": 0.9396, "step": 3411 }, { "epoch": 0.20375014928938254, "grad_norm": 2.174355983734131, "learning_rate": 8.848118903855087e-06, "loss": 0.9026, "step": 3412 }, { "epoch": 0.20380986504239818, "grad_norm": 2.4191462993621826, "learning_rate": 8.847455377878045e-06, "loss": 0.9463, "step": 3413 }, { "epoch": 0.20386958079541384, "grad_norm": 1.9068602323532104, "learning_rate": 8.846791851901003e-06, "loss": 0.9161, "step": 3414 }, { "epoch": 0.20392929654842948, "grad_norm": 2.577453136444092, "learning_rate": 8.846128325923961e-06, "loss": 0.9366, "step": 3415 }, { "epoch": 0.20398901230144512, "grad_norm": 2.739687442779541, "learning_rate": 8.84546479994692e-06, "loss": 0.8919, "step": 3416 }, { "epoch": 0.20404872805446075, "grad_norm": 4.687203884124756, "learning_rate": 8.844801273969876e-06, "loss": 0.9039, "step": 3417 }, { "epoch": 0.20410844380747642, "grad_norm": 2.66349458694458, "learning_rate": 8.844137747992834e-06, "loss": 0.9051, "step": 3418 }, { "epoch": 0.20416815956049206, "grad_norm": 1.8272478580474854, "learning_rate": 8.843474222015792e-06, "loss": 0.9168, "step": 3419 }, { "epoch": 0.2042278753135077, "grad_norm": 2.2152130603790283, "learning_rate": 8.84281069603875e-06, "loss": 0.9001, "step": 3420 }, { "epoch": 0.20428759106652336, "grad_norm": 1.7786972522735596, "learning_rate": 8.842147170061708e-06, "loss": 0.9346, "step": 3421 }, { "epoch": 0.204347306819539, "grad_norm": 2.3764865398406982, "learning_rate": 8.841483644084666e-06, "loss": 0.8843, "step": 3422 }, { "epoch": 0.20440702257255464, "grad_norm": 3.0806076526641846, "learning_rate": 8.840820118107624e-06, "loss": 0.9038, "step": 3423 }, { "epoch": 0.20446673832557027, "grad_norm": 1.9502286911010742, "learning_rate": 8.840156592130583e-06, "loss": 0.9186, "step": 3424 }, { "epoch": 0.20452645407858594, "grad_norm": 2.03416109085083, "learning_rate": 8.83949306615354e-06, "loss": 0.9157, "step": 3425 }, { "epoch": 0.20458616983160158, "grad_norm": 1.8191989660263062, "learning_rate": 8.838829540176499e-06, "loss": 0.9493, "step": 3426 }, { "epoch": 0.20464588558461722, "grad_norm": 3.391930103302002, "learning_rate": 8.838166014199457e-06, "loss": 0.9172, "step": 3427 }, { "epoch": 0.20470560133763288, "grad_norm": 2.8187947273254395, "learning_rate": 8.837502488222415e-06, "loss": 0.9066, "step": 3428 }, { "epoch": 0.20476531709064852, "grad_norm": 1.8525573015213013, "learning_rate": 8.836838962245371e-06, "loss": 0.932, "step": 3429 }, { "epoch": 0.20482503284366416, "grad_norm": 1.9922094345092773, "learning_rate": 8.836175436268331e-06, "loss": 0.9146, "step": 3430 }, { "epoch": 0.2048847485966798, "grad_norm": 1.6509573459625244, "learning_rate": 8.83551191029129e-06, "loss": 0.8932, "step": 3431 }, { "epoch": 0.20494446434969546, "grad_norm": 2.231926441192627, "learning_rate": 8.834848384314246e-06, "loss": 0.8841, "step": 3432 }, { "epoch": 0.2050041801027111, "grad_norm": 2.0599606037139893, "learning_rate": 8.834184858337205e-06, "loss": 0.929, "step": 3433 }, { "epoch": 0.20506389585572674, "grad_norm": 2.2165684700012207, "learning_rate": 8.833521332360162e-06, "loss": 0.8598, "step": 3434 }, { "epoch": 0.20512361160874237, "grad_norm": 3.176384449005127, "learning_rate": 8.83285780638312e-06, "loss": 0.9187, "step": 3435 }, { "epoch": 0.20518332736175804, "grad_norm": 2.158262014389038, "learning_rate": 8.83219428040608e-06, "loss": 0.9306, "step": 3436 }, { "epoch": 0.20524304311477368, "grad_norm": 2.972728967666626, "learning_rate": 8.831530754429036e-06, "loss": 0.9565, "step": 3437 }, { "epoch": 0.20530275886778931, "grad_norm": 1.8108985424041748, "learning_rate": 8.830867228451994e-06, "loss": 0.885, "step": 3438 }, { "epoch": 0.20536247462080498, "grad_norm": 2.1002228260040283, "learning_rate": 8.830203702474952e-06, "loss": 0.949, "step": 3439 }, { "epoch": 0.20542219037382062, "grad_norm": 3.600041151046753, "learning_rate": 8.82954017649791e-06, "loss": 0.9508, "step": 3440 }, { "epoch": 0.20548190612683626, "grad_norm": 2.265467643737793, "learning_rate": 8.828876650520869e-06, "loss": 0.8948, "step": 3441 }, { "epoch": 0.2055416218798519, "grad_norm": 3.52010440826416, "learning_rate": 8.828213124543827e-06, "loss": 0.9127, "step": 3442 }, { "epoch": 0.20560133763286756, "grad_norm": 3.0910418033599854, "learning_rate": 8.827549598566785e-06, "loss": 0.8881, "step": 3443 }, { "epoch": 0.2056610533858832, "grad_norm": 2.0703563690185547, "learning_rate": 8.826886072589743e-06, "loss": 0.8962, "step": 3444 }, { "epoch": 0.20572076913889883, "grad_norm": 3.5919110774993896, "learning_rate": 8.826222546612701e-06, "loss": 0.9195, "step": 3445 }, { "epoch": 0.2057804848919145, "grad_norm": 2.9356303215026855, "learning_rate": 8.825559020635659e-06, "loss": 0.9745, "step": 3446 }, { "epoch": 0.20584020064493014, "grad_norm": 2.4693262577056885, "learning_rate": 8.824895494658616e-06, "loss": 0.8887, "step": 3447 }, { "epoch": 0.20589991639794578, "grad_norm": 2.0317599773406982, "learning_rate": 8.824231968681575e-06, "loss": 0.9596, "step": 3448 }, { "epoch": 0.2059596321509614, "grad_norm": 2.0929315090179443, "learning_rate": 8.823568442704532e-06, "loss": 0.9406, "step": 3449 }, { "epoch": 0.20601934790397708, "grad_norm": 5.151556015014648, "learning_rate": 8.82290491672749e-06, "loss": 0.9126, "step": 3450 }, { "epoch": 0.20607906365699272, "grad_norm": 4.467487335205078, "learning_rate": 8.82224139075045e-06, "loss": 0.9086, "step": 3451 }, { "epoch": 0.20613877941000835, "grad_norm": 1.7760660648345947, "learning_rate": 8.821577864773406e-06, "loss": 0.9352, "step": 3452 }, { "epoch": 0.206198495163024, "grad_norm": 2.9269731044769287, "learning_rate": 8.820914338796364e-06, "loss": 0.9297, "step": 3453 }, { "epoch": 0.20625821091603966, "grad_norm": 1.93399178981781, "learning_rate": 8.820250812819322e-06, "loss": 0.9303, "step": 3454 }, { "epoch": 0.2063179266690553, "grad_norm": 1.6664760112762451, "learning_rate": 8.81958728684228e-06, "loss": 0.9071, "step": 3455 }, { "epoch": 0.20637764242207093, "grad_norm": 2.3114678859710693, "learning_rate": 8.818923760865238e-06, "loss": 0.8927, "step": 3456 }, { "epoch": 0.2064373581750866, "grad_norm": 1.8462879657745361, "learning_rate": 8.818260234888197e-06, "loss": 0.9515, "step": 3457 }, { "epoch": 0.20649707392810224, "grad_norm": 1.780216932296753, "learning_rate": 8.817596708911155e-06, "loss": 0.917, "step": 3458 }, { "epoch": 0.20655678968111787, "grad_norm": 2.426666736602783, "learning_rate": 8.816933182934113e-06, "loss": 0.9136, "step": 3459 }, { "epoch": 0.2066165054341335, "grad_norm": 2.911868095397949, "learning_rate": 8.81626965695707e-06, "loss": 0.8879, "step": 3460 }, { "epoch": 0.20667622118714918, "grad_norm": 1.9792003631591797, "learning_rate": 8.815606130980029e-06, "loss": 0.9334, "step": 3461 }, { "epoch": 0.20673593694016482, "grad_norm": 2.7495789527893066, "learning_rate": 8.814942605002987e-06, "loss": 0.9321, "step": 3462 }, { "epoch": 0.20679565269318045, "grad_norm": 2.0329384803771973, "learning_rate": 8.814279079025945e-06, "loss": 0.9464, "step": 3463 }, { "epoch": 0.20685536844619612, "grad_norm": 3.253159761428833, "learning_rate": 8.813615553048902e-06, "loss": 0.9459, "step": 3464 }, { "epoch": 0.20691508419921176, "grad_norm": 3.7536187171936035, "learning_rate": 8.812952027071861e-06, "loss": 0.9821, "step": 3465 }, { "epoch": 0.2069747999522274, "grad_norm": 2.0251104831695557, "learning_rate": 8.81228850109482e-06, "loss": 0.9069, "step": 3466 }, { "epoch": 0.20703451570524303, "grad_norm": 5.045644760131836, "learning_rate": 8.811624975117776e-06, "loss": 0.9067, "step": 3467 }, { "epoch": 0.2070942314582587, "grad_norm": 2.319385290145874, "learning_rate": 8.810961449140734e-06, "loss": 0.9026, "step": 3468 }, { "epoch": 0.20715394721127434, "grad_norm": 3.0320470333099365, "learning_rate": 8.810297923163692e-06, "loss": 0.9167, "step": 3469 }, { "epoch": 0.20721366296428997, "grad_norm": 1.74042546749115, "learning_rate": 8.80963439718665e-06, "loss": 0.9112, "step": 3470 }, { "epoch": 0.2072733787173056, "grad_norm": 4.9096784591674805, "learning_rate": 8.808970871209608e-06, "loss": 0.9523, "step": 3471 }, { "epoch": 0.20733309447032128, "grad_norm": 4.867388725280762, "learning_rate": 8.808307345232566e-06, "loss": 0.9604, "step": 3472 }, { "epoch": 0.20739281022333692, "grad_norm": 1.848802089691162, "learning_rate": 8.807643819255524e-06, "loss": 0.9491, "step": 3473 }, { "epoch": 0.20745252597635255, "grad_norm": 2.630221128463745, "learning_rate": 8.806980293278483e-06, "loss": 0.9504, "step": 3474 }, { "epoch": 0.20751224172936822, "grad_norm": 2.458585739135742, "learning_rate": 8.80631676730144e-06, "loss": 0.9373, "step": 3475 }, { "epoch": 0.20757195748238386, "grad_norm": 9.17897891998291, "learning_rate": 8.805653241324399e-06, "loss": 0.9015, "step": 3476 }, { "epoch": 0.2076316732353995, "grad_norm": 2.26930570602417, "learning_rate": 8.804989715347357e-06, "loss": 0.9044, "step": 3477 }, { "epoch": 0.20769138898841513, "grad_norm": 2.185622453689575, "learning_rate": 8.804326189370315e-06, "loss": 0.9085, "step": 3478 }, { "epoch": 0.2077511047414308, "grad_norm": 2.794511318206787, "learning_rate": 8.803662663393271e-06, "loss": 0.9331, "step": 3479 }, { "epoch": 0.20781082049444644, "grad_norm": 3.8288493156433105, "learning_rate": 8.802999137416231e-06, "loss": 0.93, "step": 3480 }, { "epoch": 0.20787053624746207, "grad_norm": 3.9749748706817627, "learning_rate": 8.80233561143919e-06, "loss": 0.9226, "step": 3481 }, { "epoch": 0.20793025200047774, "grad_norm": 2.373164653778076, "learning_rate": 8.801672085462146e-06, "loss": 0.8854, "step": 3482 }, { "epoch": 0.20798996775349338, "grad_norm": 3.10569167137146, "learning_rate": 8.801008559485105e-06, "loss": 0.936, "step": 3483 }, { "epoch": 0.20804968350650901, "grad_norm": 4.439079284667969, "learning_rate": 8.800345033508062e-06, "loss": 0.936, "step": 3484 }, { "epoch": 0.20810939925952465, "grad_norm": 1.8738188743591309, "learning_rate": 8.79968150753102e-06, "loss": 0.8947, "step": 3485 }, { "epoch": 0.20816911501254032, "grad_norm": 2.401909589767456, "learning_rate": 8.79901798155398e-06, "loss": 0.9561, "step": 3486 }, { "epoch": 0.20822883076555596, "grad_norm": 2.2806925773620605, "learning_rate": 8.798354455576936e-06, "loss": 0.8743, "step": 3487 }, { "epoch": 0.2082885465185716, "grad_norm": 3.587486982345581, "learning_rate": 8.797690929599894e-06, "loss": 0.9182, "step": 3488 }, { "epoch": 0.20834826227158723, "grad_norm": 2.5260210037231445, "learning_rate": 8.797027403622852e-06, "loss": 0.885, "step": 3489 }, { "epoch": 0.2084079780246029, "grad_norm": 2.232379913330078, "learning_rate": 8.79636387764581e-06, "loss": 0.9139, "step": 3490 }, { "epoch": 0.20846769377761853, "grad_norm": 1.507000207901001, "learning_rate": 8.795700351668769e-06, "loss": 0.8765, "step": 3491 }, { "epoch": 0.20852740953063417, "grad_norm": 3.227332592010498, "learning_rate": 8.795036825691727e-06, "loss": 0.9045, "step": 3492 }, { "epoch": 0.20858712528364984, "grad_norm": 2.4545035362243652, "learning_rate": 8.794373299714685e-06, "loss": 0.9029, "step": 3493 }, { "epoch": 0.20864684103666548, "grad_norm": 2.350407123565674, "learning_rate": 8.793709773737643e-06, "loss": 0.9317, "step": 3494 }, { "epoch": 0.2087065567896811, "grad_norm": 1.947440266609192, "learning_rate": 8.793046247760601e-06, "loss": 0.9367, "step": 3495 }, { "epoch": 0.20876627254269675, "grad_norm": 4.481655120849609, "learning_rate": 8.792382721783559e-06, "loss": 0.9199, "step": 3496 }, { "epoch": 0.20882598829571242, "grad_norm": 4.315814971923828, "learning_rate": 8.791719195806515e-06, "loss": 0.9311, "step": 3497 }, { "epoch": 0.20888570404872805, "grad_norm": 2.0755960941314697, "learning_rate": 8.791055669829475e-06, "loss": 0.9184, "step": 3498 }, { "epoch": 0.2089454198017437, "grad_norm": 1.724367618560791, "learning_rate": 8.790392143852432e-06, "loss": 0.8927, "step": 3499 }, { "epoch": 0.20900513555475936, "grad_norm": 2.2936410903930664, "learning_rate": 8.78972861787539e-06, "loss": 0.9027, "step": 3500 }, { "epoch": 0.209064851307775, "grad_norm": 2.2564938068389893, "learning_rate": 8.78906509189835e-06, "loss": 0.9329, "step": 3501 }, { "epoch": 0.20912456706079063, "grad_norm": 2.497896671295166, "learning_rate": 8.788401565921306e-06, "loss": 0.9323, "step": 3502 }, { "epoch": 0.20918428281380627, "grad_norm": 2.2680606842041016, "learning_rate": 8.787738039944264e-06, "loss": 0.89, "step": 3503 }, { "epoch": 0.20924399856682194, "grad_norm": 2.3063900470733643, "learning_rate": 8.787074513967222e-06, "loss": 0.9116, "step": 3504 }, { "epoch": 0.20930371431983757, "grad_norm": 1.892916202545166, "learning_rate": 8.78641098799018e-06, "loss": 0.8946, "step": 3505 }, { "epoch": 0.2093634300728532, "grad_norm": 2.0650479793548584, "learning_rate": 8.785747462013138e-06, "loss": 0.9388, "step": 3506 }, { "epoch": 0.20942314582586885, "grad_norm": 2.770806312561035, "learning_rate": 8.785083936036096e-06, "loss": 0.9244, "step": 3507 }, { "epoch": 0.20948286157888452, "grad_norm": 3.1112923622131348, "learning_rate": 8.784420410059055e-06, "loss": 0.9447, "step": 3508 }, { "epoch": 0.20954257733190015, "grad_norm": 2.2833240032196045, "learning_rate": 8.783756884082013e-06, "loss": 0.9282, "step": 3509 }, { "epoch": 0.2096022930849158, "grad_norm": 2.623765468597412, "learning_rate": 8.78309335810497e-06, "loss": 0.9228, "step": 3510 }, { "epoch": 0.20966200883793146, "grad_norm": 1.688667893409729, "learning_rate": 8.782429832127929e-06, "loss": 0.908, "step": 3511 }, { "epoch": 0.2097217245909471, "grad_norm": 2.336528778076172, "learning_rate": 8.781766306150887e-06, "loss": 0.9622, "step": 3512 }, { "epoch": 0.20978144034396273, "grad_norm": 2.2380809783935547, "learning_rate": 8.781102780173845e-06, "loss": 0.9368, "step": 3513 }, { "epoch": 0.20984115609697837, "grad_norm": 2.2446224689483643, "learning_rate": 8.780439254196801e-06, "loss": 0.9433, "step": 3514 }, { "epoch": 0.20990087184999404, "grad_norm": 2.3820078372955322, "learning_rate": 8.779775728219761e-06, "loss": 0.9337, "step": 3515 }, { "epoch": 0.20996058760300967, "grad_norm": 2.314608573913574, "learning_rate": 8.77911220224272e-06, "loss": 0.9613, "step": 3516 }, { "epoch": 0.2100203033560253, "grad_norm": 2.2672038078308105, "learning_rate": 8.778448676265676e-06, "loss": 0.9079, "step": 3517 }, { "epoch": 0.21008001910904098, "grad_norm": 3.1981515884399414, "learning_rate": 8.777785150288634e-06, "loss": 0.8978, "step": 3518 }, { "epoch": 0.21013973486205662, "grad_norm": 2.543782949447632, "learning_rate": 8.777121624311592e-06, "loss": 0.9374, "step": 3519 }, { "epoch": 0.21019945061507225, "grad_norm": 2.2658331394195557, "learning_rate": 8.77645809833455e-06, "loss": 0.9105, "step": 3520 }, { "epoch": 0.2102591663680879, "grad_norm": 5.226182460784912, "learning_rate": 8.775794572357508e-06, "loss": 0.9367, "step": 3521 }, { "epoch": 0.21031888212110356, "grad_norm": 2.65838360786438, "learning_rate": 8.775131046380466e-06, "loss": 0.9325, "step": 3522 }, { "epoch": 0.2103785978741192, "grad_norm": 3.349828004837036, "learning_rate": 8.774467520403424e-06, "loss": 0.9287, "step": 3523 }, { "epoch": 0.21043831362713483, "grad_norm": 3.0039045810699463, "learning_rate": 8.773803994426382e-06, "loss": 0.9476, "step": 3524 }, { "epoch": 0.21049802938015047, "grad_norm": 2.616924285888672, "learning_rate": 8.77314046844934e-06, "loss": 0.9338, "step": 3525 }, { "epoch": 0.21055774513316614, "grad_norm": 1.5627344846725464, "learning_rate": 8.772476942472299e-06, "loss": 0.9244, "step": 3526 }, { "epoch": 0.21061746088618177, "grad_norm": 3.6670026779174805, "learning_rate": 8.771813416495257e-06, "loss": 0.9512, "step": 3527 }, { "epoch": 0.2106771766391974, "grad_norm": 2.440291404724121, "learning_rate": 8.771149890518215e-06, "loss": 0.9438, "step": 3528 }, { "epoch": 0.21073689239221308, "grad_norm": 2.217259407043457, "learning_rate": 8.770486364541171e-06, "loss": 0.9267, "step": 3529 }, { "epoch": 0.21079660814522871, "grad_norm": 2.5455994606018066, "learning_rate": 8.769822838564131e-06, "loss": 0.9014, "step": 3530 }, { "epoch": 0.21085632389824435, "grad_norm": 3.34116792678833, "learning_rate": 8.76915931258709e-06, "loss": 0.906, "step": 3531 }, { "epoch": 0.21091603965126, "grad_norm": 2.0898215770721436, "learning_rate": 8.768495786610046e-06, "loss": 0.9024, "step": 3532 }, { "epoch": 0.21097575540427566, "grad_norm": 1.829800009727478, "learning_rate": 8.767832260633005e-06, "loss": 0.9066, "step": 3533 }, { "epoch": 0.2110354711572913, "grad_norm": 12.394468307495117, "learning_rate": 8.767168734655962e-06, "loss": 0.9459, "step": 3534 }, { "epoch": 0.21109518691030693, "grad_norm": 2.264984607696533, "learning_rate": 8.76650520867892e-06, "loss": 0.9443, "step": 3535 }, { "epoch": 0.2111549026633226, "grad_norm": 3.09548020362854, "learning_rate": 8.76584168270188e-06, "loss": 0.9042, "step": 3536 }, { "epoch": 0.21121461841633823, "grad_norm": 2.031989812850952, "learning_rate": 8.765178156724836e-06, "loss": 0.8935, "step": 3537 }, { "epoch": 0.21127433416935387, "grad_norm": 1.8716837167739868, "learning_rate": 8.764514630747794e-06, "loss": 0.9211, "step": 3538 }, { "epoch": 0.2113340499223695, "grad_norm": 2.0436408519744873, "learning_rate": 8.763851104770752e-06, "loss": 0.9352, "step": 3539 }, { "epoch": 0.21139376567538518, "grad_norm": 2.479872703552246, "learning_rate": 8.76318757879371e-06, "loss": 0.899, "step": 3540 }, { "epoch": 0.2114534814284008, "grad_norm": 2.5099024772644043, "learning_rate": 8.762524052816668e-06, "loss": 0.8795, "step": 3541 }, { "epoch": 0.21151319718141645, "grad_norm": 3.434305429458618, "learning_rate": 8.761860526839627e-06, "loss": 0.8912, "step": 3542 }, { "epoch": 0.21157291293443212, "grad_norm": 1.9124001264572144, "learning_rate": 8.761197000862585e-06, "loss": 0.9339, "step": 3543 }, { "epoch": 0.21163262868744775, "grad_norm": 5.003149032592773, "learning_rate": 8.760533474885543e-06, "loss": 0.9816, "step": 3544 }, { "epoch": 0.2116923444404634, "grad_norm": 2.0069122314453125, "learning_rate": 8.759869948908501e-06, "loss": 0.9183, "step": 3545 }, { "epoch": 0.21175206019347903, "grad_norm": 2.7478582859039307, "learning_rate": 8.759206422931459e-06, "loss": 0.9471, "step": 3546 }, { "epoch": 0.2118117759464947, "grad_norm": 2.0503013134002686, "learning_rate": 8.758542896954415e-06, "loss": 0.8962, "step": 3547 }, { "epoch": 0.21187149169951033, "grad_norm": 1.7940266132354736, "learning_rate": 8.757879370977375e-06, "loss": 0.9224, "step": 3548 }, { "epoch": 0.21193120745252597, "grad_norm": 3.2481462955474854, "learning_rate": 8.757215845000332e-06, "loss": 0.9519, "step": 3549 }, { "epoch": 0.2119909232055416, "grad_norm": 2.1402721405029297, "learning_rate": 8.75655231902329e-06, "loss": 0.912, "step": 3550 }, { "epoch": 0.21205063895855727, "grad_norm": 3.3296852111816406, "learning_rate": 8.75588879304625e-06, "loss": 0.922, "step": 3551 }, { "epoch": 0.2121103547115729, "grad_norm": 3.1468698978424072, "learning_rate": 8.755225267069206e-06, "loss": 0.9137, "step": 3552 }, { "epoch": 0.21217007046458855, "grad_norm": 1.8578342199325562, "learning_rate": 8.754561741092164e-06, "loss": 0.8984, "step": 3553 }, { "epoch": 0.21222978621760422, "grad_norm": 3.971785068511963, "learning_rate": 8.753898215115122e-06, "loss": 0.9603, "step": 3554 }, { "epoch": 0.21228950197061985, "grad_norm": 2.6682605743408203, "learning_rate": 8.75323468913808e-06, "loss": 0.9466, "step": 3555 }, { "epoch": 0.2123492177236355, "grad_norm": 2.2378969192504883, "learning_rate": 8.752571163161038e-06, "loss": 0.9224, "step": 3556 }, { "epoch": 0.21240893347665113, "grad_norm": 2.84417724609375, "learning_rate": 8.751907637183996e-06, "loss": 0.9325, "step": 3557 }, { "epoch": 0.2124686492296668, "grad_norm": 2.2752866744995117, "learning_rate": 8.751244111206954e-06, "loss": 0.9097, "step": 3558 }, { "epoch": 0.21252836498268243, "grad_norm": 1.977906346321106, "learning_rate": 8.750580585229913e-06, "loss": 0.9196, "step": 3559 }, { "epoch": 0.21258808073569807, "grad_norm": 2.2271084785461426, "learning_rate": 8.74991705925287e-06, "loss": 0.9256, "step": 3560 }, { "epoch": 0.21264779648871374, "grad_norm": 2.0924606323242188, "learning_rate": 8.749253533275829e-06, "loss": 0.9183, "step": 3561 }, { "epoch": 0.21270751224172937, "grad_norm": 2.4386816024780273, "learning_rate": 8.748590007298787e-06, "loss": 0.9047, "step": 3562 }, { "epoch": 0.212767227994745, "grad_norm": 1.7371258735656738, "learning_rate": 8.747926481321745e-06, "loss": 0.9222, "step": 3563 }, { "epoch": 0.21282694374776065, "grad_norm": 2.611279010772705, "learning_rate": 8.747262955344701e-06, "loss": 0.9043, "step": 3564 }, { "epoch": 0.21288665950077632, "grad_norm": 2.4101743698120117, "learning_rate": 8.746599429367661e-06, "loss": 0.9348, "step": 3565 }, { "epoch": 0.21294637525379195, "grad_norm": 3.284423589706421, "learning_rate": 8.74593590339062e-06, "loss": 0.9414, "step": 3566 }, { "epoch": 0.2130060910068076, "grad_norm": 1.7805882692337036, "learning_rate": 8.745272377413576e-06, "loss": 0.8951, "step": 3567 }, { "epoch": 0.21306580675982323, "grad_norm": 4.064797401428223, "learning_rate": 8.744608851436534e-06, "loss": 0.9288, "step": 3568 }, { "epoch": 0.2131255225128389, "grad_norm": 2.17171573638916, "learning_rate": 8.743945325459492e-06, "loss": 0.9431, "step": 3569 }, { "epoch": 0.21318523826585453, "grad_norm": 2.218325614929199, "learning_rate": 8.74328179948245e-06, "loss": 0.916, "step": 3570 }, { "epoch": 0.21324495401887017, "grad_norm": 1.891235589981079, "learning_rate": 8.742618273505408e-06, "loss": 0.9314, "step": 3571 }, { "epoch": 0.21330466977188584, "grad_norm": 2.5301754474639893, "learning_rate": 8.741954747528366e-06, "loss": 0.9067, "step": 3572 }, { "epoch": 0.21336438552490147, "grad_norm": 2.2831854820251465, "learning_rate": 8.741291221551324e-06, "loss": 0.9596, "step": 3573 }, { "epoch": 0.2134241012779171, "grad_norm": 2.069218635559082, "learning_rate": 8.740627695574282e-06, "loss": 0.9015, "step": 3574 }, { "epoch": 0.21348381703093275, "grad_norm": 1.724441647529602, "learning_rate": 8.73996416959724e-06, "loss": 0.9187, "step": 3575 }, { "epoch": 0.21354353278394841, "grad_norm": 3.30486798286438, "learning_rate": 8.739300643620199e-06, "loss": 0.9128, "step": 3576 }, { "epoch": 0.21360324853696405, "grad_norm": 3.7985758781433105, "learning_rate": 8.738637117643157e-06, "loss": 0.9003, "step": 3577 }, { "epoch": 0.2136629642899797, "grad_norm": 2.02866530418396, "learning_rate": 8.737973591666115e-06, "loss": 0.9377, "step": 3578 }, { "epoch": 0.21372268004299536, "grad_norm": 2.2627978324890137, "learning_rate": 8.737310065689071e-06, "loss": 0.9164, "step": 3579 }, { "epoch": 0.213782395796011, "grad_norm": 2.452903985977173, "learning_rate": 8.736646539712031e-06, "loss": 0.9446, "step": 3580 }, { "epoch": 0.21384211154902663, "grad_norm": 3.0618982315063477, "learning_rate": 8.735983013734989e-06, "loss": 0.9051, "step": 3581 }, { "epoch": 0.21390182730204227, "grad_norm": 1.6189448833465576, "learning_rate": 8.735319487757946e-06, "loss": 0.9432, "step": 3582 }, { "epoch": 0.21396154305505793, "grad_norm": 2.7724413871765137, "learning_rate": 8.734655961780905e-06, "loss": 0.9571, "step": 3583 }, { "epoch": 0.21402125880807357, "grad_norm": 2.2884902954101562, "learning_rate": 8.733992435803862e-06, "loss": 0.8704, "step": 3584 }, { "epoch": 0.2140809745610892, "grad_norm": 3.0518429279327393, "learning_rate": 8.73332890982682e-06, "loss": 0.9142, "step": 3585 }, { "epoch": 0.21414069031410485, "grad_norm": 1.8089362382888794, "learning_rate": 8.73266538384978e-06, "loss": 0.8895, "step": 3586 }, { "epoch": 0.2142004060671205, "grad_norm": 2.2196602821350098, "learning_rate": 8.732001857872736e-06, "loss": 0.9206, "step": 3587 }, { "epoch": 0.21426012182013615, "grad_norm": 4.960068702697754, "learning_rate": 8.731338331895694e-06, "loss": 0.9298, "step": 3588 }, { "epoch": 0.2143198375731518, "grad_norm": 2.3237006664276123, "learning_rate": 8.730674805918652e-06, "loss": 0.9317, "step": 3589 }, { "epoch": 0.21437955332616745, "grad_norm": 2.34594464302063, "learning_rate": 8.73001127994161e-06, "loss": 0.9435, "step": 3590 }, { "epoch": 0.2144392690791831, "grad_norm": 2.2536206245422363, "learning_rate": 8.729347753964568e-06, "loss": 0.8882, "step": 3591 }, { "epoch": 0.21449898483219873, "grad_norm": 1.8483364582061768, "learning_rate": 8.728684227987527e-06, "loss": 0.9562, "step": 3592 }, { "epoch": 0.21455870058521437, "grad_norm": 1.8702794313430786, "learning_rate": 8.728020702010485e-06, "loss": 0.8995, "step": 3593 }, { "epoch": 0.21461841633823003, "grad_norm": 3.1678922176361084, "learning_rate": 8.727357176033443e-06, "loss": 0.9431, "step": 3594 }, { "epoch": 0.21467813209124567, "grad_norm": 3.072326183319092, "learning_rate": 8.7266936500564e-06, "loss": 0.8834, "step": 3595 }, { "epoch": 0.2147378478442613, "grad_norm": 4.781401634216309, "learning_rate": 8.726030124079359e-06, "loss": 0.9332, "step": 3596 }, { "epoch": 0.21479756359727697, "grad_norm": 2.1286613941192627, "learning_rate": 8.725366598102315e-06, "loss": 0.9122, "step": 3597 }, { "epoch": 0.2148572793502926, "grad_norm": 2.5020320415496826, "learning_rate": 8.724703072125275e-06, "loss": 0.9422, "step": 3598 }, { "epoch": 0.21491699510330825, "grad_norm": 1.9920940399169922, "learning_rate": 8.724039546148232e-06, "loss": 0.9225, "step": 3599 }, { "epoch": 0.2149767108563239, "grad_norm": 2.781158924102783, "learning_rate": 8.72337602017119e-06, "loss": 0.8927, "step": 3600 }, { "epoch": 0.2149767108563239, "eval_text_loss": 0.9532829523086548, "eval_text_runtime": 15.2286, "eval_text_samples_per_second": 262.663, "eval_text_steps_per_second": 0.525, "step": 3600 }, { "epoch": 0.2149767108563239, "eval_image_loss": 0.6824573278427124, "eval_image_runtime": 5.2036, "eval_image_samples_per_second": 768.693, "eval_image_steps_per_second": 1.537, "step": 3600 }, { "epoch": 0.2149767108563239, "eval_video_loss": 1.1526505947113037, "eval_video_runtime": 76.2333, "eval_video_samples_per_second": 52.471, "eval_video_steps_per_second": 0.105, "step": 3600 }, { "epoch": 0.21503642660933955, "grad_norm": 1.8630815744400024, "learning_rate": 8.72271249419415e-06, "loss": 0.9344, "step": 3601 }, { "epoch": 0.2150961423623552, "grad_norm": 2.3408586978912354, "learning_rate": 8.722048968217106e-06, "loss": 0.9172, "step": 3602 }, { "epoch": 0.21515585811537083, "grad_norm": 2.3214492797851562, "learning_rate": 8.721385442240064e-06, "loss": 0.9644, "step": 3603 }, { "epoch": 0.21521557386838647, "grad_norm": 4.759840965270996, "learning_rate": 8.720721916263022e-06, "loss": 0.8968, "step": 3604 }, { "epoch": 0.21527528962140213, "grad_norm": 1.9600502252578735, "learning_rate": 8.72005839028598e-06, "loss": 0.949, "step": 3605 }, { "epoch": 0.21533500537441777, "grad_norm": 2.155993700027466, "learning_rate": 8.719394864308938e-06, "loss": 0.9389, "step": 3606 }, { "epoch": 0.2153947211274334, "grad_norm": 2.036830186843872, "learning_rate": 8.718731338331896e-06, "loss": 0.9153, "step": 3607 }, { "epoch": 0.21545443688044907, "grad_norm": 2.0353078842163086, "learning_rate": 8.718067812354854e-06, "loss": 0.9284, "step": 3608 }, { "epoch": 0.2155141526334647, "grad_norm": 1.8002408742904663, "learning_rate": 8.717404286377813e-06, "loss": 0.9138, "step": 3609 }, { "epoch": 0.21557386838648035, "grad_norm": 3.6430745124816895, "learning_rate": 8.71674076040077e-06, "loss": 0.9319, "step": 3610 }, { "epoch": 0.215633584139496, "grad_norm": 2.3806445598602295, "learning_rate": 8.716077234423729e-06, "loss": 0.9064, "step": 3611 }, { "epoch": 0.21569329989251165, "grad_norm": 2.0722579956054688, "learning_rate": 8.715413708446687e-06, "loss": 0.9341, "step": 3612 }, { "epoch": 0.2157530156455273, "grad_norm": 4.489386081695557, "learning_rate": 8.714750182469645e-06, "loss": 0.9064, "step": 3613 }, { "epoch": 0.21581273139854293, "grad_norm": 1.6909310817718506, "learning_rate": 8.714086656492601e-06, "loss": 0.8718, "step": 3614 }, { "epoch": 0.2158724471515586, "grad_norm": 3.3405494689941406, "learning_rate": 8.713423130515561e-06, "loss": 0.9347, "step": 3615 }, { "epoch": 0.21593216290457423, "grad_norm": 2.5317306518554688, "learning_rate": 8.71275960453852e-06, "loss": 0.9026, "step": 3616 }, { "epoch": 0.21599187865758987, "grad_norm": 1.6859556436538696, "learning_rate": 8.712096078561476e-06, "loss": 0.9145, "step": 3617 }, { "epoch": 0.2160515944106055, "grad_norm": 2.1809449195861816, "learning_rate": 8.711432552584434e-06, "loss": 0.8952, "step": 3618 }, { "epoch": 0.21611131016362117, "grad_norm": 2.0304954051971436, "learning_rate": 8.710769026607392e-06, "loss": 0.9096, "step": 3619 }, { "epoch": 0.2161710259166368, "grad_norm": 2.086024761199951, "learning_rate": 8.71010550063035e-06, "loss": 0.8879, "step": 3620 }, { "epoch": 0.21623074166965245, "grad_norm": 2.206866502761841, "learning_rate": 8.709441974653308e-06, "loss": 0.9287, "step": 3621 }, { "epoch": 0.2162904574226681, "grad_norm": 1.7340528964996338, "learning_rate": 8.708778448676266e-06, "loss": 0.9326, "step": 3622 }, { "epoch": 0.21635017317568375, "grad_norm": 2.542088270187378, "learning_rate": 8.708114922699224e-06, "loss": 0.9242, "step": 3623 }, { "epoch": 0.2164098889286994, "grad_norm": 2.6744542121887207, "learning_rate": 8.707451396722182e-06, "loss": 0.9193, "step": 3624 }, { "epoch": 0.21646960468171503, "grad_norm": 2.2722573280334473, "learning_rate": 8.70678787074514e-06, "loss": 0.9231, "step": 3625 }, { "epoch": 0.2165293204347307, "grad_norm": 1.6297645568847656, "learning_rate": 8.706124344768099e-06, "loss": 0.9194, "step": 3626 }, { "epoch": 0.21658903618774633, "grad_norm": 3.754718065261841, "learning_rate": 8.705460818791057e-06, "loss": 0.9282, "step": 3627 }, { "epoch": 0.21664875194076197, "grad_norm": 5.423031806945801, "learning_rate": 8.704797292814015e-06, "loss": 0.9254, "step": 3628 }, { "epoch": 0.2167084676937776, "grad_norm": 1.893823266029358, "learning_rate": 8.704133766836971e-06, "loss": 0.919, "step": 3629 }, { "epoch": 0.21676818344679327, "grad_norm": 2.2344977855682373, "learning_rate": 8.703470240859931e-06, "loss": 0.9322, "step": 3630 }, { "epoch": 0.2168278991998089, "grad_norm": 2.444544792175293, "learning_rate": 8.702806714882889e-06, "loss": 0.9111, "step": 3631 }, { "epoch": 0.21688761495282455, "grad_norm": 1.809496283531189, "learning_rate": 8.702143188905845e-06, "loss": 0.9031, "step": 3632 }, { "epoch": 0.2169473307058402, "grad_norm": 1.9044755697250366, "learning_rate": 8.701479662928805e-06, "loss": 0.9251, "step": 3633 }, { "epoch": 0.21700704645885585, "grad_norm": 2.0594887733459473, "learning_rate": 8.700816136951762e-06, "loss": 0.9441, "step": 3634 }, { "epoch": 0.2170667622118715, "grad_norm": 1.7230592966079712, "learning_rate": 8.70015261097472e-06, "loss": 0.8466, "step": 3635 }, { "epoch": 0.21712647796488713, "grad_norm": 1.9179167747497559, "learning_rate": 8.69948908499768e-06, "loss": 0.8958, "step": 3636 }, { "epoch": 0.2171861937179028, "grad_norm": 1.73003089427948, "learning_rate": 8.698825559020636e-06, "loss": 0.9715, "step": 3637 }, { "epoch": 0.21724590947091843, "grad_norm": 1.8518801927566528, "learning_rate": 8.698162033043594e-06, "loss": 0.9065, "step": 3638 }, { "epoch": 0.21730562522393407, "grad_norm": 2.8396239280700684, "learning_rate": 8.697498507066552e-06, "loss": 0.8725, "step": 3639 }, { "epoch": 0.2173653409769497, "grad_norm": 2.204221248626709, "learning_rate": 8.69683498108951e-06, "loss": 0.9137, "step": 3640 }, { "epoch": 0.21742505672996537, "grad_norm": 1.9571921825408936, "learning_rate": 8.696171455112468e-06, "loss": 0.9115, "step": 3641 }, { "epoch": 0.217484772482981, "grad_norm": 2.426815986633301, "learning_rate": 8.695507929135426e-06, "loss": 0.9217, "step": 3642 }, { "epoch": 0.21754448823599665, "grad_norm": 2.202968120574951, "learning_rate": 8.694844403158385e-06, "loss": 0.9647, "step": 3643 }, { "epoch": 0.2176042039890123, "grad_norm": 2.035059928894043, "learning_rate": 8.694180877181343e-06, "loss": 0.941, "step": 3644 }, { "epoch": 0.21766391974202795, "grad_norm": 2.022185802459717, "learning_rate": 8.6935173512043e-06, "loss": 0.9632, "step": 3645 }, { "epoch": 0.2177236354950436, "grad_norm": 3.0281901359558105, "learning_rate": 8.692853825227259e-06, "loss": 0.9109, "step": 3646 }, { "epoch": 0.21778335124805923, "grad_norm": 2.1885766983032227, "learning_rate": 8.692190299250215e-06, "loss": 0.9361, "step": 3647 }, { "epoch": 0.2178430670010749, "grad_norm": 2.185878038406372, "learning_rate": 8.691526773273175e-06, "loss": 0.9242, "step": 3648 }, { "epoch": 0.21790278275409053, "grad_norm": 2.5512800216674805, "learning_rate": 8.690863247296131e-06, "loss": 0.9342, "step": 3649 }, { "epoch": 0.21796249850710617, "grad_norm": 2.4132237434387207, "learning_rate": 8.69019972131909e-06, "loss": 0.9169, "step": 3650 }, { "epoch": 0.21802221426012183, "grad_norm": 1.8314898014068604, "learning_rate": 8.68953619534205e-06, "loss": 0.9056, "step": 3651 }, { "epoch": 0.21808193001313747, "grad_norm": 1.946905493736267, "learning_rate": 8.688872669365006e-06, "loss": 0.949, "step": 3652 }, { "epoch": 0.2181416457661531, "grad_norm": 2.296250820159912, "learning_rate": 8.688209143387964e-06, "loss": 0.8905, "step": 3653 }, { "epoch": 0.21820136151916875, "grad_norm": 3.3258492946624756, "learning_rate": 8.687545617410922e-06, "loss": 0.9574, "step": 3654 }, { "epoch": 0.2182610772721844, "grad_norm": 1.8004474639892578, "learning_rate": 8.68688209143388e-06, "loss": 0.8711, "step": 3655 }, { "epoch": 0.21832079302520005, "grad_norm": 2.509859800338745, "learning_rate": 8.686218565456838e-06, "loss": 0.9247, "step": 3656 }, { "epoch": 0.2183805087782157, "grad_norm": 1.5449382066726685, "learning_rate": 8.685555039479796e-06, "loss": 0.9047, "step": 3657 }, { "epoch": 0.21844022453123133, "grad_norm": 2.729734182357788, "learning_rate": 8.684891513502754e-06, "loss": 0.9448, "step": 3658 }, { "epoch": 0.218499940284247, "grad_norm": 2.3868956565856934, "learning_rate": 8.684227987525712e-06, "loss": 0.9151, "step": 3659 }, { "epoch": 0.21855965603726263, "grad_norm": 2.7422778606414795, "learning_rate": 8.68356446154867e-06, "loss": 0.9283, "step": 3660 }, { "epoch": 0.21861937179027827, "grad_norm": 1.8228679895401, "learning_rate": 8.682900935571629e-06, "loss": 0.8953, "step": 3661 }, { "epoch": 0.21867908754329393, "grad_norm": 1.8504854440689087, "learning_rate": 8.682237409594587e-06, "loss": 0.9281, "step": 3662 }, { "epoch": 0.21873880329630957, "grad_norm": 2.4965174198150635, "learning_rate": 8.681573883617545e-06, "loss": 0.9378, "step": 3663 }, { "epoch": 0.2187985190493252, "grad_norm": 1.7093349695205688, "learning_rate": 8.680910357640501e-06, "loss": 0.9128, "step": 3664 }, { "epoch": 0.21885823480234085, "grad_norm": 3.491276264190674, "learning_rate": 8.680246831663461e-06, "loss": 0.9445, "step": 3665 }, { "epoch": 0.2189179505553565, "grad_norm": 3.6469595432281494, "learning_rate": 8.67958330568642e-06, "loss": 0.9206, "step": 3666 }, { "epoch": 0.21897766630837215, "grad_norm": 3.6749532222747803, "learning_rate": 8.678919779709376e-06, "loss": 0.9249, "step": 3667 }, { "epoch": 0.2190373820613878, "grad_norm": 1.982113242149353, "learning_rate": 8.678256253732334e-06, "loss": 0.9185, "step": 3668 }, { "epoch": 0.21909709781440345, "grad_norm": 1.8477606773376465, "learning_rate": 8.677592727755292e-06, "loss": 0.9339, "step": 3669 }, { "epoch": 0.2191568135674191, "grad_norm": 2.7295920848846436, "learning_rate": 8.67692920177825e-06, "loss": 0.9025, "step": 3670 }, { "epoch": 0.21921652932043473, "grad_norm": 2.0894360542297363, "learning_rate": 8.676265675801208e-06, "loss": 0.9139, "step": 3671 }, { "epoch": 0.21927624507345037, "grad_norm": 2.236893653869629, "learning_rate": 8.675602149824166e-06, "loss": 0.9094, "step": 3672 }, { "epoch": 0.21933596082646603, "grad_norm": 3.3726394176483154, "learning_rate": 8.674938623847124e-06, "loss": 0.9436, "step": 3673 }, { "epoch": 0.21939567657948167, "grad_norm": 2.4121410846710205, "learning_rate": 8.674275097870082e-06, "loss": 0.8947, "step": 3674 }, { "epoch": 0.2194553923324973, "grad_norm": 2.252587080001831, "learning_rate": 8.67361157189304e-06, "loss": 0.9347, "step": 3675 }, { "epoch": 0.21951510808551297, "grad_norm": 1.9002084732055664, "learning_rate": 8.672948045915998e-06, "loss": 0.9389, "step": 3676 }, { "epoch": 0.2195748238385286, "grad_norm": 1.8460241556167603, "learning_rate": 8.672284519938957e-06, "loss": 0.9301, "step": 3677 }, { "epoch": 0.21963453959154425, "grad_norm": 1.8204398155212402, "learning_rate": 8.671620993961915e-06, "loss": 0.927, "step": 3678 }, { "epoch": 0.21969425534455989, "grad_norm": 2.321719169616699, "learning_rate": 8.670957467984871e-06, "loss": 0.8934, "step": 3679 }, { "epoch": 0.21975397109757555, "grad_norm": 2.1666595935821533, "learning_rate": 8.670293942007831e-06, "loss": 0.8759, "step": 3680 }, { "epoch": 0.2198136868505912, "grad_norm": 2.606754779815674, "learning_rate": 8.669630416030789e-06, "loss": 0.977, "step": 3681 }, { "epoch": 0.21987340260360683, "grad_norm": 2.292293071746826, "learning_rate": 8.668966890053745e-06, "loss": 0.9147, "step": 3682 }, { "epoch": 0.21993311835662246, "grad_norm": 2.5587480068206787, "learning_rate": 8.668303364076705e-06, "loss": 0.9146, "step": 3683 }, { "epoch": 0.21999283410963813, "grad_norm": 2.6685898303985596, "learning_rate": 8.667639838099662e-06, "loss": 0.8964, "step": 3684 }, { "epoch": 0.22005254986265377, "grad_norm": 2.1959598064422607, "learning_rate": 8.66697631212262e-06, "loss": 0.9168, "step": 3685 }, { "epoch": 0.2201122656156694, "grad_norm": 2.350358009338379, "learning_rate": 8.66631278614558e-06, "loss": 0.9394, "step": 3686 }, { "epoch": 0.22017198136868507, "grad_norm": 2.368039131164551, "learning_rate": 8.665649260168536e-06, "loss": 0.9691, "step": 3687 }, { "epoch": 0.2202316971217007, "grad_norm": 1.6605453491210938, "learning_rate": 8.664985734191494e-06, "loss": 0.9245, "step": 3688 }, { "epoch": 0.22029141287471635, "grad_norm": 1.924027681350708, "learning_rate": 8.664322208214452e-06, "loss": 0.8796, "step": 3689 }, { "epoch": 0.22035112862773198, "grad_norm": 2.3569858074188232, "learning_rate": 8.66365868223741e-06, "loss": 0.9157, "step": 3690 }, { "epoch": 0.22041084438074765, "grad_norm": 3.1232852935791016, "learning_rate": 8.662995156260368e-06, "loss": 0.8946, "step": 3691 }, { "epoch": 0.2204705601337633, "grad_norm": 2.2750065326690674, "learning_rate": 8.662331630283326e-06, "loss": 0.8937, "step": 3692 }, { "epoch": 0.22053027588677893, "grad_norm": 2.6395435333251953, "learning_rate": 8.661668104306285e-06, "loss": 0.9473, "step": 3693 }, { "epoch": 0.2205899916397946, "grad_norm": 2.98100209236145, "learning_rate": 8.661004578329243e-06, "loss": 0.9114, "step": 3694 }, { "epoch": 0.22064970739281023, "grad_norm": 3.021183490753174, "learning_rate": 8.6603410523522e-06, "loss": 0.9495, "step": 3695 }, { "epoch": 0.22070942314582587, "grad_norm": 1.9518334865570068, "learning_rate": 8.659677526375159e-06, "loss": 0.919, "step": 3696 }, { "epoch": 0.2207691388988415, "grad_norm": 2.2939136028289795, "learning_rate": 8.659014000398115e-06, "loss": 0.9054, "step": 3697 }, { "epoch": 0.22082885465185717, "grad_norm": 3.2958486080169678, "learning_rate": 8.658350474421075e-06, "loss": 0.9419, "step": 3698 }, { "epoch": 0.2208885704048728, "grad_norm": 1.8146201372146606, "learning_rate": 8.657686948444031e-06, "loss": 0.896, "step": 3699 }, { "epoch": 0.22094828615788845, "grad_norm": 3.5259811878204346, "learning_rate": 8.65702342246699e-06, "loss": 0.9207, "step": 3700 }, { "epoch": 0.22100800191090408, "grad_norm": 2.0374155044555664, "learning_rate": 8.65635989648995e-06, "loss": 0.8928, "step": 3701 }, { "epoch": 0.22106771766391975, "grad_norm": 3.0228421688079834, "learning_rate": 8.655696370512906e-06, "loss": 0.8932, "step": 3702 }, { "epoch": 0.2211274334169354, "grad_norm": 2.072169065475464, "learning_rate": 8.655032844535864e-06, "loss": 0.9269, "step": 3703 }, { "epoch": 0.22118714916995103, "grad_norm": 2.4231910705566406, "learning_rate": 8.654369318558822e-06, "loss": 0.9082, "step": 3704 }, { "epoch": 0.2212468649229667, "grad_norm": 2.203357219696045, "learning_rate": 8.65370579258178e-06, "loss": 0.9265, "step": 3705 }, { "epoch": 0.22130658067598233, "grad_norm": 1.9114508628845215, "learning_rate": 8.653042266604738e-06, "loss": 0.9193, "step": 3706 }, { "epoch": 0.22136629642899797, "grad_norm": 1.9483927488327026, "learning_rate": 8.652378740627696e-06, "loss": 0.9322, "step": 3707 }, { "epoch": 0.2214260121820136, "grad_norm": 2.240621566772461, "learning_rate": 8.651715214650654e-06, "loss": 0.9015, "step": 3708 }, { "epoch": 0.22148572793502927, "grad_norm": 2.381650447845459, "learning_rate": 8.651051688673612e-06, "loss": 0.9432, "step": 3709 }, { "epoch": 0.2215454436880449, "grad_norm": 2.373286247253418, "learning_rate": 8.65038816269657e-06, "loss": 0.9283, "step": 3710 }, { "epoch": 0.22160515944106055, "grad_norm": 2.653296709060669, "learning_rate": 8.649724636719529e-06, "loss": 0.9311, "step": 3711 }, { "epoch": 0.2216648751940762, "grad_norm": 2.2889041900634766, "learning_rate": 8.649061110742487e-06, "loss": 0.9166, "step": 3712 }, { "epoch": 0.22172459094709185, "grad_norm": 4.248009204864502, "learning_rate": 8.648397584765445e-06, "loss": 0.9217, "step": 3713 }, { "epoch": 0.2217843067001075, "grad_norm": 3.4889261722564697, "learning_rate": 8.647734058788401e-06, "loss": 0.9301, "step": 3714 }, { "epoch": 0.22184402245312312, "grad_norm": 2.3373982906341553, "learning_rate": 8.647070532811361e-06, "loss": 0.9156, "step": 3715 }, { "epoch": 0.2219037382061388, "grad_norm": 3.081831216812134, "learning_rate": 8.646407006834319e-06, "loss": 0.9235, "step": 3716 }, { "epoch": 0.22196345395915443, "grad_norm": 2.1973791122436523, "learning_rate": 8.645743480857276e-06, "loss": 0.93, "step": 3717 }, { "epoch": 0.22202316971217007, "grad_norm": 4.811466693878174, "learning_rate": 8.645079954880234e-06, "loss": 0.9638, "step": 3718 }, { "epoch": 0.2220828854651857, "grad_norm": 1.8837871551513672, "learning_rate": 8.644416428903192e-06, "loss": 0.8986, "step": 3719 }, { "epoch": 0.22214260121820137, "grad_norm": 2.50142240524292, "learning_rate": 8.64375290292615e-06, "loss": 0.904, "step": 3720 }, { "epoch": 0.222202316971217, "grad_norm": 3.0741477012634277, "learning_rate": 8.643089376949108e-06, "loss": 0.9251, "step": 3721 }, { "epoch": 0.22226203272423264, "grad_norm": 1.8169106245040894, "learning_rate": 8.642425850972066e-06, "loss": 0.944, "step": 3722 }, { "epoch": 0.2223217484772483, "grad_norm": 2.4356319904327393, "learning_rate": 8.641762324995024e-06, "loss": 0.8772, "step": 3723 }, { "epoch": 0.22238146423026395, "grad_norm": 2.701444625854492, "learning_rate": 8.641098799017982e-06, "loss": 0.9412, "step": 3724 }, { "epoch": 0.22244117998327959, "grad_norm": 3.133162498474121, "learning_rate": 8.64043527304094e-06, "loss": 0.9175, "step": 3725 }, { "epoch": 0.22250089573629522, "grad_norm": 3.6729910373687744, "learning_rate": 8.639771747063898e-06, "loss": 0.902, "step": 3726 }, { "epoch": 0.2225606114893109, "grad_norm": 2.643442392349243, "learning_rate": 8.639108221086857e-06, "loss": 0.9351, "step": 3727 }, { "epoch": 0.22262032724232653, "grad_norm": 1.9899367094039917, "learning_rate": 8.638444695109815e-06, "loss": 0.9171, "step": 3728 }, { "epoch": 0.22268004299534216, "grad_norm": 2.154292583465576, "learning_rate": 8.637781169132771e-06, "loss": 0.9215, "step": 3729 }, { "epoch": 0.22273975874835783, "grad_norm": 2.6300952434539795, "learning_rate": 8.63711764315573e-06, "loss": 0.8921, "step": 3730 }, { "epoch": 0.22279947450137347, "grad_norm": 2.0352509021759033, "learning_rate": 8.636454117178689e-06, "loss": 0.9825, "step": 3731 }, { "epoch": 0.2228591902543891, "grad_norm": 7.018664360046387, "learning_rate": 8.635790591201645e-06, "loss": 0.8998, "step": 3732 }, { "epoch": 0.22291890600740474, "grad_norm": 1.9549167156219482, "learning_rate": 8.635127065224605e-06, "loss": 0.8919, "step": 3733 }, { "epoch": 0.2229786217604204, "grad_norm": 5.196609973907471, "learning_rate": 8.634463539247562e-06, "loss": 0.9489, "step": 3734 }, { "epoch": 0.22303833751343605, "grad_norm": 3.6802186965942383, "learning_rate": 8.63380001327052e-06, "loss": 0.9166, "step": 3735 }, { "epoch": 0.22309805326645168, "grad_norm": 2.0810177326202393, "learning_rate": 8.63313648729348e-06, "loss": 0.8918, "step": 3736 }, { "epoch": 0.22315776901946732, "grad_norm": 1.947487711906433, "learning_rate": 8.632472961316436e-06, "loss": 0.9116, "step": 3737 }, { "epoch": 0.223217484772483, "grad_norm": 3.2986538410186768, "learning_rate": 8.631809435339394e-06, "loss": 0.9247, "step": 3738 }, { "epoch": 0.22327720052549863, "grad_norm": 2.391655683517456, "learning_rate": 8.631145909362352e-06, "loss": 0.896, "step": 3739 }, { "epoch": 0.22333691627851426, "grad_norm": 2.1472527980804443, "learning_rate": 8.63048238338531e-06, "loss": 0.9228, "step": 3740 }, { "epoch": 0.22339663203152993, "grad_norm": 2.901804208755493, "learning_rate": 8.629818857408268e-06, "loss": 0.9577, "step": 3741 }, { "epoch": 0.22345634778454557, "grad_norm": 2.7192728519439697, "learning_rate": 8.629155331431226e-06, "loss": 0.9228, "step": 3742 }, { "epoch": 0.2235160635375612, "grad_norm": 2.5715789794921875, "learning_rate": 8.628491805454184e-06, "loss": 0.9128, "step": 3743 }, { "epoch": 0.22357577929057684, "grad_norm": 3.0311858654022217, "learning_rate": 8.627828279477143e-06, "loss": 0.9531, "step": 3744 }, { "epoch": 0.2236354950435925, "grad_norm": 2.350482225418091, "learning_rate": 8.6271647535001e-06, "loss": 0.9144, "step": 3745 }, { "epoch": 0.22369521079660815, "grad_norm": 2.10604190826416, "learning_rate": 8.626501227523059e-06, "loss": 0.9362, "step": 3746 }, { "epoch": 0.22375492654962378, "grad_norm": 2.39747953414917, "learning_rate": 8.625837701546015e-06, "loss": 0.9179, "step": 3747 }, { "epoch": 0.22381464230263945, "grad_norm": 2.303709030151367, "learning_rate": 8.625174175568975e-06, "loss": 0.9267, "step": 3748 }, { "epoch": 0.2238743580556551, "grad_norm": 2.385793447494507, "learning_rate": 8.624510649591931e-06, "loss": 0.9374, "step": 3749 }, { "epoch": 0.22393407380867072, "grad_norm": 1.6978610754013062, "learning_rate": 8.62384712361489e-06, "loss": 0.8796, "step": 3750 }, { "epoch": 0.22399378956168636, "grad_norm": 1.7854889631271362, "learning_rate": 8.62318359763785e-06, "loss": 0.9181, "step": 3751 }, { "epoch": 0.22405350531470203, "grad_norm": 8.121033668518066, "learning_rate": 8.622520071660806e-06, "loss": 0.9044, "step": 3752 }, { "epoch": 0.22411322106771767, "grad_norm": 1.8350096940994263, "learning_rate": 8.621856545683764e-06, "loss": 0.9284, "step": 3753 }, { "epoch": 0.2241729368207333, "grad_norm": 2.065744400024414, "learning_rate": 8.621193019706722e-06, "loss": 0.9421, "step": 3754 }, { "epoch": 0.22423265257374894, "grad_norm": 1.8859412670135498, "learning_rate": 8.62052949372968e-06, "loss": 0.9195, "step": 3755 }, { "epoch": 0.2242923683267646, "grad_norm": 1.5838193893432617, "learning_rate": 8.619865967752638e-06, "loss": 0.8844, "step": 3756 }, { "epoch": 0.22435208407978025, "grad_norm": 2.4764201641082764, "learning_rate": 8.619202441775596e-06, "loss": 0.9003, "step": 3757 }, { "epoch": 0.22441179983279588, "grad_norm": 2.3820269107818604, "learning_rate": 8.618538915798554e-06, "loss": 0.9203, "step": 3758 }, { "epoch": 0.22447151558581155, "grad_norm": 2.0832481384277344, "learning_rate": 8.617875389821512e-06, "loss": 0.9221, "step": 3759 }, { "epoch": 0.2245312313388272, "grad_norm": 3.1883339881896973, "learning_rate": 8.61721186384447e-06, "loss": 0.8873, "step": 3760 }, { "epoch": 0.22459094709184282, "grad_norm": 2.3074660301208496, "learning_rate": 8.616548337867429e-06, "loss": 0.9338, "step": 3761 }, { "epoch": 0.22465066284485846, "grad_norm": 2.4257752895355225, "learning_rate": 8.615884811890387e-06, "loss": 0.9315, "step": 3762 }, { "epoch": 0.22471037859787413, "grad_norm": 4.120463848114014, "learning_rate": 8.615221285913345e-06, "loss": 0.9062, "step": 3763 }, { "epoch": 0.22477009435088977, "grad_norm": 5.651588439941406, "learning_rate": 8.614557759936301e-06, "loss": 0.893, "step": 3764 }, { "epoch": 0.2248298101039054, "grad_norm": 3.600226402282715, "learning_rate": 8.613894233959261e-06, "loss": 0.9177, "step": 3765 }, { "epoch": 0.22488952585692107, "grad_norm": 5.573716163635254, "learning_rate": 8.613230707982219e-06, "loss": 0.9138, "step": 3766 }, { "epoch": 0.2249492416099367, "grad_norm": 5.3217692375183105, "learning_rate": 8.612567182005175e-06, "loss": 0.9001, "step": 3767 }, { "epoch": 0.22500895736295234, "grad_norm": 2.0812182426452637, "learning_rate": 8.611903656028134e-06, "loss": 0.9493, "step": 3768 }, { "epoch": 0.22506867311596798, "grad_norm": 2.254469156265259, "learning_rate": 8.611240130051092e-06, "loss": 0.9269, "step": 3769 }, { "epoch": 0.22512838886898365, "grad_norm": 3.4803478717803955, "learning_rate": 8.61057660407405e-06, "loss": 0.926, "step": 3770 }, { "epoch": 0.22518810462199929, "grad_norm": 2.2140955924987793, "learning_rate": 8.609913078097008e-06, "loss": 0.8911, "step": 3771 }, { "epoch": 0.22524782037501492, "grad_norm": 3.11942195892334, "learning_rate": 8.609249552119966e-06, "loss": 0.9852, "step": 3772 }, { "epoch": 0.22530753612803056, "grad_norm": 2.3169126510620117, "learning_rate": 8.608586026142924e-06, "loss": 0.8857, "step": 3773 }, { "epoch": 0.22536725188104623, "grad_norm": 2.833340644836426, "learning_rate": 8.607922500165882e-06, "loss": 0.8981, "step": 3774 }, { "epoch": 0.22542696763406186, "grad_norm": 2.233628511428833, "learning_rate": 8.60725897418884e-06, "loss": 0.9329, "step": 3775 }, { "epoch": 0.2254866833870775, "grad_norm": 2.291363477706909, "learning_rate": 8.606595448211798e-06, "loss": 0.9846, "step": 3776 }, { "epoch": 0.22554639914009317, "grad_norm": 2.18833065032959, "learning_rate": 8.605931922234756e-06, "loss": 0.9244, "step": 3777 }, { "epoch": 0.2256061148931088, "grad_norm": 2.1182913780212402, "learning_rate": 8.605268396257715e-06, "loss": 0.9452, "step": 3778 }, { "epoch": 0.22566583064612444, "grad_norm": 1.9324109554290771, "learning_rate": 8.604604870280671e-06, "loss": 0.8708, "step": 3779 }, { "epoch": 0.22572554639914008, "grad_norm": 2.4324822425842285, "learning_rate": 8.60394134430363e-06, "loss": 0.8952, "step": 3780 }, { "epoch": 0.22578526215215575, "grad_norm": 2.2449519634246826, "learning_rate": 8.603277818326589e-06, "loss": 0.8889, "step": 3781 }, { "epoch": 0.22584497790517138, "grad_norm": 1.551552414894104, "learning_rate": 8.602614292349545e-06, "loss": 0.9188, "step": 3782 }, { "epoch": 0.22590469365818702, "grad_norm": 1.9731899499893188, "learning_rate": 8.601950766372505e-06, "loss": 0.9478, "step": 3783 }, { "epoch": 0.2259644094112027, "grad_norm": 1.9259130954742432, "learning_rate": 8.601287240395461e-06, "loss": 0.913, "step": 3784 }, { "epoch": 0.22602412516421833, "grad_norm": 2.362947940826416, "learning_rate": 8.60062371441842e-06, "loss": 0.9498, "step": 3785 }, { "epoch": 0.22608384091723396, "grad_norm": 2.201111316680908, "learning_rate": 8.59996018844138e-06, "loss": 0.8915, "step": 3786 }, { "epoch": 0.2261435566702496, "grad_norm": 2.4432027339935303, "learning_rate": 8.599296662464336e-06, "loss": 0.929, "step": 3787 }, { "epoch": 0.22620327242326527, "grad_norm": 1.9190267324447632, "learning_rate": 8.598633136487294e-06, "loss": 0.9214, "step": 3788 }, { "epoch": 0.2262629881762809, "grad_norm": 2.540498971939087, "learning_rate": 8.597969610510252e-06, "loss": 0.9436, "step": 3789 }, { "epoch": 0.22632270392929654, "grad_norm": 2.276827335357666, "learning_rate": 8.59730608453321e-06, "loss": 0.9256, "step": 3790 }, { "epoch": 0.22638241968231218, "grad_norm": 2.8679089546203613, "learning_rate": 8.596642558556168e-06, "loss": 0.9094, "step": 3791 }, { "epoch": 0.22644213543532785, "grad_norm": 1.9214835166931152, "learning_rate": 8.595979032579126e-06, "loss": 0.8947, "step": 3792 }, { "epoch": 0.22650185118834348, "grad_norm": 2.0831711292266846, "learning_rate": 8.595315506602084e-06, "loss": 0.8652, "step": 3793 }, { "epoch": 0.22656156694135912, "grad_norm": 2.0272152423858643, "learning_rate": 8.594651980625042e-06, "loss": 0.9227, "step": 3794 }, { "epoch": 0.2266212826943748, "grad_norm": 2.698496103286743, "learning_rate": 8.593988454648e-06, "loss": 0.9414, "step": 3795 }, { "epoch": 0.22668099844739042, "grad_norm": 2.4257652759552, "learning_rate": 8.593324928670959e-06, "loss": 0.933, "step": 3796 }, { "epoch": 0.22674071420040606, "grad_norm": 2.744385242462158, "learning_rate": 8.592661402693915e-06, "loss": 0.8992, "step": 3797 }, { "epoch": 0.2268004299534217, "grad_norm": 2.3717055320739746, "learning_rate": 8.591997876716875e-06, "loss": 0.909, "step": 3798 }, { "epoch": 0.22686014570643737, "grad_norm": 2.1422202587127686, "learning_rate": 8.591334350739831e-06, "loss": 0.938, "step": 3799 }, { "epoch": 0.226919861459453, "grad_norm": 2.513367176055908, "learning_rate": 8.59067082476279e-06, "loss": 0.9546, "step": 3800 }, { "epoch": 0.226919861459453, "eval_text_loss": 0.9509502053260803, "eval_text_runtime": 15.178, "eval_text_samples_per_second": 263.539, "eval_text_steps_per_second": 0.527, "step": 3800 }, { "epoch": 0.226919861459453, "eval_image_loss": 0.6807852387428284, "eval_image_runtime": 5.0061, "eval_image_samples_per_second": 799.026, "eval_image_steps_per_second": 1.598, "step": 3800 }, { "epoch": 0.226919861459453, "eval_video_loss": 1.1465256214141846, "eval_video_runtime": 75.9301, "eval_video_samples_per_second": 52.68, "eval_video_steps_per_second": 0.105, "step": 3800 }, { "epoch": 0.22697957721246864, "grad_norm": 2.104189872741699, "learning_rate": 8.59000729878575e-06, "loss": 0.9137, "step": 3801 }, { "epoch": 0.2270392929654843, "grad_norm": 2.4378345012664795, "learning_rate": 8.589343772808706e-06, "loss": 0.897, "step": 3802 }, { "epoch": 0.22709900871849995, "grad_norm": 3.245114326477051, "learning_rate": 8.588680246831664e-06, "loss": 0.878, "step": 3803 }, { "epoch": 0.22715872447151558, "grad_norm": 1.8373751640319824, "learning_rate": 8.588016720854622e-06, "loss": 0.9508, "step": 3804 }, { "epoch": 0.22721844022453122, "grad_norm": 1.7009754180908203, "learning_rate": 8.58735319487758e-06, "loss": 0.9045, "step": 3805 }, { "epoch": 0.2272781559775469, "grad_norm": 1.9909456968307495, "learning_rate": 8.586689668900538e-06, "loss": 0.9178, "step": 3806 }, { "epoch": 0.22733787173056252, "grad_norm": 1.8825534582138062, "learning_rate": 8.586026142923496e-06, "loss": 0.9072, "step": 3807 }, { "epoch": 0.22739758748357816, "grad_norm": 6.242610931396484, "learning_rate": 8.585362616946454e-06, "loss": 0.9551, "step": 3808 }, { "epoch": 0.2274573032365938, "grad_norm": 5.940858364105225, "learning_rate": 8.584699090969412e-06, "loss": 0.9096, "step": 3809 }, { "epoch": 0.22751701898960947, "grad_norm": 1.8697278499603271, "learning_rate": 8.58403556499237e-06, "loss": 0.8951, "step": 3810 }, { "epoch": 0.2275767347426251, "grad_norm": 2.342045783996582, "learning_rate": 8.583372039015328e-06, "loss": 0.9207, "step": 3811 }, { "epoch": 0.22763645049564074, "grad_norm": 2.6023454666137695, "learning_rate": 8.582708513038287e-06, "loss": 0.8769, "step": 3812 }, { "epoch": 0.2276961662486564, "grad_norm": 2.0877223014831543, "learning_rate": 8.582044987061245e-06, "loss": 0.8913, "step": 3813 }, { "epoch": 0.22775588200167204, "grad_norm": 2.7510874271392822, "learning_rate": 8.581381461084201e-06, "loss": 0.9181, "step": 3814 }, { "epoch": 0.22781559775468768, "grad_norm": 2.819563150405884, "learning_rate": 8.580717935107161e-06, "loss": 0.9229, "step": 3815 }, { "epoch": 0.22787531350770332, "grad_norm": 2.2242956161499023, "learning_rate": 8.580054409130119e-06, "loss": 0.9043, "step": 3816 }, { "epoch": 0.22793502926071899, "grad_norm": 1.6279488801956177, "learning_rate": 8.579390883153075e-06, "loss": 0.9258, "step": 3817 }, { "epoch": 0.22799474501373462, "grad_norm": 2.1288626194000244, "learning_rate": 8.578727357176034e-06, "loss": 0.9389, "step": 3818 }, { "epoch": 0.22805446076675026, "grad_norm": 2.5579521656036377, "learning_rate": 8.578063831198992e-06, "loss": 0.9123, "step": 3819 }, { "epoch": 0.22811417651976593, "grad_norm": 2.1368467807769775, "learning_rate": 8.57740030522195e-06, "loss": 0.8879, "step": 3820 }, { "epoch": 0.22817389227278156, "grad_norm": 2.3246030807495117, "learning_rate": 8.576736779244908e-06, "loss": 0.9126, "step": 3821 }, { "epoch": 0.2282336080257972, "grad_norm": 2.5938868522644043, "learning_rate": 8.576073253267866e-06, "loss": 0.9004, "step": 3822 }, { "epoch": 0.22829332377881284, "grad_norm": 1.79948091506958, "learning_rate": 8.575409727290824e-06, "loss": 0.9138, "step": 3823 }, { "epoch": 0.2283530395318285, "grad_norm": 1.9936105012893677, "learning_rate": 8.574746201313782e-06, "loss": 0.9298, "step": 3824 }, { "epoch": 0.22841275528484414, "grad_norm": 1.7799644470214844, "learning_rate": 8.57408267533674e-06, "loss": 0.9011, "step": 3825 }, { "epoch": 0.22847247103785978, "grad_norm": 4.757879734039307, "learning_rate": 8.573419149359698e-06, "loss": 0.9374, "step": 3826 }, { "epoch": 0.22853218679087545, "grad_norm": 3.1316754817962646, "learning_rate": 8.572755623382656e-06, "loss": 0.9185, "step": 3827 }, { "epoch": 0.22859190254389108, "grad_norm": 2.195378303527832, "learning_rate": 8.572092097405615e-06, "loss": 0.9016, "step": 3828 }, { "epoch": 0.22865161829690672, "grad_norm": 2.344299554824829, "learning_rate": 8.571428571428571e-06, "loss": 0.8983, "step": 3829 }, { "epoch": 0.22871133404992236, "grad_norm": 2.291727304458618, "learning_rate": 8.57076504545153e-06, "loss": 0.9279, "step": 3830 }, { "epoch": 0.22877104980293803, "grad_norm": 2.2289700508117676, "learning_rate": 8.570101519474489e-06, "loss": 0.9381, "step": 3831 }, { "epoch": 0.22883076555595366, "grad_norm": 14.281763076782227, "learning_rate": 8.569437993497445e-06, "loss": 0.935, "step": 3832 }, { "epoch": 0.2288904813089693, "grad_norm": 2.0512616634368896, "learning_rate": 8.568774467520405e-06, "loss": 0.9387, "step": 3833 }, { "epoch": 0.22895019706198494, "grad_norm": 2.1444242000579834, "learning_rate": 8.568110941543361e-06, "loss": 0.9324, "step": 3834 }, { "epoch": 0.2290099128150006, "grad_norm": 2.8823907375335693, "learning_rate": 8.56744741556632e-06, "loss": 0.903, "step": 3835 }, { "epoch": 0.22906962856801624, "grad_norm": 1.9563467502593994, "learning_rate": 8.56678388958928e-06, "loss": 0.9074, "step": 3836 }, { "epoch": 0.22912934432103188, "grad_norm": 2.3550362586975098, "learning_rate": 8.566120363612236e-06, "loss": 0.9242, "step": 3837 }, { "epoch": 0.22918906007404755, "grad_norm": 1.9360096454620361, "learning_rate": 8.565456837635194e-06, "loss": 0.9277, "step": 3838 }, { "epoch": 0.22924877582706318, "grad_norm": 1.8090754747390747, "learning_rate": 8.564793311658152e-06, "loss": 0.9095, "step": 3839 }, { "epoch": 0.22930849158007882, "grad_norm": 1.616589069366455, "learning_rate": 8.56412978568111e-06, "loss": 0.9291, "step": 3840 }, { "epoch": 0.22936820733309446, "grad_norm": 2.157435417175293, "learning_rate": 8.563466259704068e-06, "loss": 0.9, "step": 3841 }, { "epoch": 0.22942792308611012, "grad_norm": 1.945785641670227, "learning_rate": 8.562802733727026e-06, "loss": 0.8897, "step": 3842 }, { "epoch": 0.22948763883912576, "grad_norm": 4.474611759185791, "learning_rate": 8.562139207749984e-06, "loss": 0.9184, "step": 3843 }, { "epoch": 0.2295473545921414, "grad_norm": 3.3092803955078125, "learning_rate": 8.561475681772942e-06, "loss": 0.9082, "step": 3844 }, { "epoch": 0.22960707034515707, "grad_norm": 1.6236488819122314, "learning_rate": 8.5608121557959e-06, "loss": 0.8808, "step": 3845 }, { "epoch": 0.2296667860981727, "grad_norm": 8.714719772338867, "learning_rate": 8.560148629818859e-06, "loss": 0.9187, "step": 3846 }, { "epoch": 0.22972650185118834, "grad_norm": 2.4364874362945557, "learning_rate": 8.559485103841815e-06, "loss": 0.9092, "step": 3847 }, { "epoch": 0.22978621760420398, "grad_norm": 2.719362735748291, "learning_rate": 8.558821577864775e-06, "loss": 0.9045, "step": 3848 }, { "epoch": 0.22984593335721965, "grad_norm": 1.9977179765701294, "learning_rate": 8.558158051887731e-06, "loss": 0.9167, "step": 3849 }, { "epoch": 0.22990564911023528, "grad_norm": 1.6154652833938599, "learning_rate": 8.55749452591069e-06, "loss": 0.9128, "step": 3850 }, { "epoch": 0.22996536486325092, "grad_norm": 2.62996768951416, "learning_rate": 8.556830999933649e-06, "loss": 0.9282, "step": 3851 }, { "epoch": 0.23002508061626656, "grad_norm": 3.630361795425415, "learning_rate": 8.556167473956606e-06, "loss": 0.915, "step": 3852 }, { "epoch": 0.23008479636928222, "grad_norm": 1.8977100849151611, "learning_rate": 8.555503947979564e-06, "loss": 0.8955, "step": 3853 }, { "epoch": 0.23014451212229786, "grad_norm": 2.3814022541046143, "learning_rate": 8.554840422002522e-06, "loss": 0.9422, "step": 3854 }, { "epoch": 0.2302042278753135, "grad_norm": 2.0693585872650146, "learning_rate": 8.55417689602548e-06, "loss": 0.929, "step": 3855 }, { "epoch": 0.23026394362832917, "grad_norm": 1.7278692722320557, "learning_rate": 8.553513370048438e-06, "loss": 0.8993, "step": 3856 }, { "epoch": 0.2303236593813448, "grad_norm": 2.0972025394439697, "learning_rate": 8.552849844071396e-06, "loss": 0.9374, "step": 3857 }, { "epoch": 0.23038337513436044, "grad_norm": 2.423677921295166, "learning_rate": 8.552186318094354e-06, "loss": 0.9327, "step": 3858 }, { "epoch": 0.23044309088737608, "grad_norm": 2.5068740844726562, "learning_rate": 8.551522792117312e-06, "loss": 0.9211, "step": 3859 }, { "epoch": 0.23050280664039174, "grad_norm": 1.9936500787734985, "learning_rate": 8.55085926614027e-06, "loss": 0.9118, "step": 3860 }, { "epoch": 0.23056252239340738, "grad_norm": 3.0003087520599365, "learning_rate": 8.550195740163228e-06, "loss": 0.9471, "step": 3861 }, { "epoch": 0.23062223814642302, "grad_norm": 3.4433016777038574, "learning_rate": 8.549532214186187e-06, "loss": 0.8997, "step": 3862 }, { "epoch": 0.23068195389943869, "grad_norm": 2.0828444957733154, "learning_rate": 8.548868688209145e-06, "loss": 0.9334, "step": 3863 }, { "epoch": 0.23074166965245432, "grad_norm": 2.325364112854004, "learning_rate": 8.548205162232101e-06, "loss": 0.9177, "step": 3864 }, { "epoch": 0.23080138540546996, "grad_norm": 2.0004806518554688, "learning_rate": 8.54754163625506e-06, "loss": 0.9696, "step": 3865 }, { "epoch": 0.2308611011584856, "grad_norm": 2.7599265575408936, "learning_rate": 8.546878110278019e-06, "loss": 0.8791, "step": 3866 }, { "epoch": 0.23092081691150126, "grad_norm": 2.8971898555755615, "learning_rate": 8.546214584300975e-06, "loss": 0.9186, "step": 3867 }, { "epoch": 0.2309805326645169, "grad_norm": 3.4014225006103516, "learning_rate": 8.545551058323933e-06, "loss": 0.9375, "step": 3868 }, { "epoch": 0.23104024841753254, "grad_norm": 2.8678789138793945, "learning_rate": 8.544887532346892e-06, "loss": 0.8589, "step": 3869 }, { "epoch": 0.23109996417054818, "grad_norm": 2.055903434753418, "learning_rate": 8.54422400636985e-06, "loss": 0.9474, "step": 3870 }, { "epoch": 0.23115967992356384, "grad_norm": 1.950309157371521, "learning_rate": 8.543560480392808e-06, "loss": 0.9391, "step": 3871 }, { "epoch": 0.23121939567657948, "grad_norm": 2.7359580993652344, "learning_rate": 8.542896954415766e-06, "loss": 0.9376, "step": 3872 }, { "epoch": 0.23127911142959512, "grad_norm": 2.0089149475097656, "learning_rate": 8.542233428438724e-06, "loss": 0.896, "step": 3873 }, { "epoch": 0.23133882718261078, "grad_norm": 2.8971822261810303, "learning_rate": 8.541569902461682e-06, "loss": 0.953, "step": 3874 }, { "epoch": 0.23139854293562642, "grad_norm": 2.1313202381134033, "learning_rate": 8.54090637648464e-06, "loss": 0.8877, "step": 3875 }, { "epoch": 0.23145825868864206, "grad_norm": 1.9872139692306519, "learning_rate": 8.540242850507598e-06, "loss": 0.9022, "step": 3876 }, { "epoch": 0.2315179744416577, "grad_norm": 1.8560724258422852, "learning_rate": 8.539579324530556e-06, "loss": 0.9343, "step": 3877 }, { "epoch": 0.23157769019467336, "grad_norm": 1.9760996103286743, "learning_rate": 8.538915798553514e-06, "loss": 0.9102, "step": 3878 }, { "epoch": 0.231637405947689, "grad_norm": 1.9981762170791626, "learning_rate": 8.538252272576471e-06, "loss": 0.9294, "step": 3879 }, { "epoch": 0.23169712170070464, "grad_norm": 12.124011993408203, "learning_rate": 8.53758874659943e-06, "loss": 0.8764, "step": 3880 }, { "epoch": 0.2317568374537203, "grad_norm": 2.6902809143066406, "learning_rate": 8.536925220622389e-06, "loss": 0.9271, "step": 3881 }, { "epoch": 0.23181655320673594, "grad_norm": 2.184598922729492, "learning_rate": 8.536261694645345e-06, "loss": 0.9137, "step": 3882 }, { "epoch": 0.23187626895975158, "grad_norm": 2.34694504737854, "learning_rate": 8.535598168668305e-06, "loss": 0.932, "step": 3883 }, { "epoch": 0.23193598471276722, "grad_norm": 1.8080500364303589, "learning_rate": 8.534934642691261e-06, "loss": 0.929, "step": 3884 }, { "epoch": 0.23199570046578288, "grad_norm": 2.734832286834717, "learning_rate": 8.53427111671422e-06, "loss": 0.9134, "step": 3885 }, { "epoch": 0.23205541621879852, "grad_norm": 2.819216728210449, "learning_rate": 8.53360759073718e-06, "loss": 0.8923, "step": 3886 }, { "epoch": 0.23211513197181416, "grad_norm": 1.9595977067947388, "learning_rate": 8.532944064760136e-06, "loss": 0.8936, "step": 3887 }, { "epoch": 0.2321748477248298, "grad_norm": 2.2245514392852783, "learning_rate": 8.532280538783094e-06, "loss": 0.9432, "step": 3888 }, { "epoch": 0.23223456347784546, "grad_norm": 2.142216444015503, "learning_rate": 8.531617012806052e-06, "loss": 0.9131, "step": 3889 }, { "epoch": 0.2322942792308611, "grad_norm": 2.591555118560791, "learning_rate": 8.53095348682901e-06, "loss": 0.937, "step": 3890 }, { "epoch": 0.23235399498387674, "grad_norm": 3.018502950668335, "learning_rate": 8.530289960851968e-06, "loss": 0.8917, "step": 3891 }, { "epoch": 0.2324137107368924, "grad_norm": 2.3780031204223633, "learning_rate": 8.529626434874926e-06, "loss": 0.9052, "step": 3892 }, { "epoch": 0.23247342648990804, "grad_norm": 3.3743324279785156, "learning_rate": 8.528962908897884e-06, "loss": 0.9428, "step": 3893 }, { "epoch": 0.23253314224292368, "grad_norm": 2.336972713470459, "learning_rate": 8.528299382920842e-06, "loss": 0.9211, "step": 3894 }, { "epoch": 0.23259285799593932, "grad_norm": 2.2927699089050293, "learning_rate": 8.5276358569438e-06, "loss": 0.8991, "step": 3895 }, { "epoch": 0.23265257374895498, "grad_norm": 2.512561798095703, "learning_rate": 8.526972330966759e-06, "loss": 0.9067, "step": 3896 }, { "epoch": 0.23271228950197062, "grad_norm": 1.970997929573059, "learning_rate": 8.526308804989715e-06, "loss": 0.9291, "step": 3897 }, { "epoch": 0.23277200525498626, "grad_norm": 3.638460874557495, "learning_rate": 8.525645279012675e-06, "loss": 0.9358, "step": 3898 }, { "epoch": 0.23283172100800192, "grad_norm": 2.334749460220337, "learning_rate": 8.524981753035631e-06, "loss": 0.9481, "step": 3899 }, { "epoch": 0.23289143676101756, "grad_norm": 2.0079641342163086, "learning_rate": 8.52431822705859e-06, "loss": 0.918, "step": 3900 }, { "epoch": 0.2329511525140332, "grad_norm": 2.724045753479004, "learning_rate": 8.523654701081549e-06, "loss": 0.9115, "step": 3901 }, { "epoch": 0.23301086826704884, "grad_norm": 2.638070821762085, "learning_rate": 8.522991175104505e-06, "loss": 0.9499, "step": 3902 }, { "epoch": 0.2330705840200645, "grad_norm": 2.478621244430542, "learning_rate": 8.522327649127464e-06, "loss": 0.9204, "step": 3903 }, { "epoch": 0.23313029977308014, "grad_norm": 2.85166072845459, "learning_rate": 8.521664123150422e-06, "loss": 0.9348, "step": 3904 }, { "epoch": 0.23319001552609578, "grad_norm": 4.242486476898193, "learning_rate": 8.52100059717338e-06, "loss": 0.9155, "step": 3905 }, { "epoch": 0.23324973127911142, "grad_norm": 2.2156991958618164, "learning_rate": 8.520337071196338e-06, "loss": 0.8981, "step": 3906 }, { "epoch": 0.23330944703212708, "grad_norm": 2.64888858795166, "learning_rate": 8.519673545219296e-06, "loss": 0.924, "step": 3907 }, { "epoch": 0.23336916278514272, "grad_norm": 2.6450343132019043, "learning_rate": 8.519010019242254e-06, "loss": 0.953, "step": 3908 }, { "epoch": 0.23342887853815836, "grad_norm": 3.2199506759643555, "learning_rate": 8.518346493265212e-06, "loss": 0.8796, "step": 3909 }, { "epoch": 0.23348859429117402, "grad_norm": 2.744405508041382, "learning_rate": 8.51768296728817e-06, "loss": 0.8842, "step": 3910 }, { "epoch": 0.23354831004418966, "grad_norm": 1.8915985822677612, "learning_rate": 8.517019441311128e-06, "loss": 0.8968, "step": 3911 }, { "epoch": 0.2336080257972053, "grad_norm": 1.7881337404251099, "learning_rate": 8.516355915334086e-06, "loss": 0.936, "step": 3912 }, { "epoch": 0.23366774155022094, "grad_norm": 2.1972784996032715, "learning_rate": 8.515692389357045e-06, "loss": 0.9042, "step": 3913 }, { "epoch": 0.2337274573032366, "grad_norm": 1.9183008670806885, "learning_rate": 8.515028863380001e-06, "loss": 0.9219, "step": 3914 }, { "epoch": 0.23378717305625224, "grad_norm": 2.8507823944091797, "learning_rate": 8.51436533740296e-06, "loss": 0.9266, "step": 3915 }, { "epoch": 0.23384688880926788, "grad_norm": 2.3863728046417236, "learning_rate": 8.513701811425919e-06, "loss": 0.904, "step": 3916 }, { "epoch": 0.23390660456228354, "grad_norm": 2.905336380004883, "learning_rate": 8.513038285448875e-06, "loss": 0.8867, "step": 3917 }, { "epoch": 0.23396632031529918, "grad_norm": 2.4291391372680664, "learning_rate": 8.512374759471833e-06, "loss": 0.8921, "step": 3918 }, { "epoch": 0.23402603606831482, "grad_norm": 2.5824673175811768, "learning_rate": 8.511711233494791e-06, "loss": 0.9219, "step": 3919 }, { "epoch": 0.23408575182133046, "grad_norm": 1.7434848546981812, "learning_rate": 8.51104770751775e-06, "loss": 0.9135, "step": 3920 }, { "epoch": 0.23414546757434612, "grad_norm": 2.402561664581299, "learning_rate": 8.510384181540708e-06, "loss": 0.9001, "step": 3921 }, { "epoch": 0.23420518332736176, "grad_norm": 1.889604926109314, "learning_rate": 8.509720655563666e-06, "loss": 0.878, "step": 3922 }, { "epoch": 0.2342648990803774, "grad_norm": 2.2991323471069336, "learning_rate": 8.509057129586624e-06, "loss": 0.908, "step": 3923 }, { "epoch": 0.23432461483339304, "grad_norm": 3.2346320152282715, "learning_rate": 8.508393603609582e-06, "loss": 0.8908, "step": 3924 }, { "epoch": 0.2343843305864087, "grad_norm": 2.540469169616699, "learning_rate": 8.50773007763254e-06, "loss": 0.8955, "step": 3925 }, { "epoch": 0.23444404633942434, "grad_norm": 3.8800792694091797, "learning_rate": 8.507066551655498e-06, "loss": 0.9215, "step": 3926 }, { "epoch": 0.23450376209243998, "grad_norm": 2.89847731590271, "learning_rate": 8.506403025678456e-06, "loss": 0.9381, "step": 3927 }, { "epoch": 0.23456347784545564, "grad_norm": 3.759904146194458, "learning_rate": 8.505739499701414e-06, "loss": 0.9323, "step": 3928 }, { "epoch": 0.23462319359847128, "grad_norm": 2.7401955127716064, "learning_rate": 8.50507597372437e-06, "loss": 0.9292, "step": 3929 }, { "epoch": 0.23468290935148692, "grad_norm": 2.0670928955078125, "learning_rate": 8.50441244774733e-06, "loss": 0.9038, "step": 3930 }, { "epoch": 0.23474262510450256, "grad_norm": 4.1989922523498535, "learning_rate": 8.503748921770289e-06, "loss": 0.9219, "step": 3931 }, { "epoch": 0.23480234085751822, "grad_norm": 2.1983742713928223, "learning_rate": 8.503085395793245e-06, "loss": 0.9274, "step": 3932 }, { "epoch": 0.23486205661053386, "grad_norm": 2.661708354949951, "learning_rate": 8.502421869816205e-06, "loss": 0.9326, "step": 3933 }, { "epoch": 0.2349217723635495, "grad_norm": 2.2366490364074707, "learning_rate": 8.501758343839161e-06, "loss": 0.9382, "step": 3934 }, { "epoch": 0.23498148811656516, "grad_norm": 2.179382085800171, "learning_rate": 8.50109481786212e-06, "loss": 0.9452, "step": 3935 }, { "epoch": 0.2350412038695808, "grad_norm": 3.873222589492798, "learning_rate": 8.50043129188508e-06, "loss": 0.955, "step": 3936 }, { "epoch": 0.23510091962259644, "grad_norm": 2.200763463973999, "learning_rate": 8.499767765908036e-06, "loss": 0.8832, "step": 3937 }, { "epoch": 0.23516063537561208, "grad_norm": 2.1298532485961914, "learning_rate": 8.499104239930994e-06, "loss": 0.9266, "step": 3938 }, { "epoch": 0.23522035112862774, "grad_norm": 3.2511541843414307, "learning_rate": 8.498440713953952e-06, "loss": 0.8968, "step": 3939 }, { "epoch": 0.23528006688164338, "grad_norm": 2.040194272994995, "learning_rate": 8.49777718797691e-06, "loss": 0.9067, "step": 3940 }, { "epoch": 0.23533978263465902, "grad_norm": 3.7017123699188232, "learning_rate": 8.497113661999868e-06, "loss": 0.8697, "step": 3941 }, { "epoch": 0.23539949838767465, "grad_norm": 2.0601940155029297, "learning_rate": 8.496450136022826e-06, "loss": 0.9086, "step": 3942 }, { "epoch": 0.23545921414069032, "grad_norm": 1.874604344367981, "learning_rate": 8.495786610045784e-06, "loss": 0.9423, "step": 3943 }, { "epoch": 0.23551892989370596, "grad_norm": 2.779479742050171, "learning_rate": 8.495123084068742e-06, "loss": 0.9369, "step": 3944 }, { "epoch": 0.2355786456467216, "grad_norm": 3.237377643585205, "learning_rate": 8.4944595580917e-06, "loss": 0.9255, "step": 3945 }, { "epoch": 0.23563836139973726, "grad_norm": 2.6625521183013916, "learning_rate": 8.493796032114659e-06, "loss": 0.9043, "step": 3946 }, { "epoch": 0.2356980771527529, "grad_norm": 2.623164653778076, "learning_rate": 8.493132506137615e-06, "loss": 0.9346, "step": 3947 }, { "epoch": 0.23575779290576854, "grad_norm": 2.517009973526001, "learning_rate": 8.492468980160575e-06, "loss": 0.9211, "step": 3948 }, { "epoch": 0.23581750865878418, "grad_norm": 2.538654327392578, "learning_rate": 8.491805454183531e-06, "loss": 0.8693, "step": 3949 }, { "epoch": 0.23587722441179984, "grad_norm": 2.0417540073394775, "learning_rate": 8.49114192820649e-06, "loss": 0.9211, "step": 3950 }, { "epoch": 0.23593694016481548, "grad_norm": 2.527442693710327, "learning_rate": 8.490478402229449e-06, "loss": 0.924, "step": 3951 }, { "epoch": 0.23599665591783112, "grad_norm": 6.180232524871826, "learning_rate": 8.489814876252405e-06, "loss": 0.9138, "step": 3952 }, { "epoch": 0.23605637167084678, "grad_norm": 2.5731394290924072, "learning_rate": 8.489151350275364e-06, "loss": 0.917, "step": 3953 }, { "epoch": 0.23611608742386242, "grad_norm": 3.1079373359680176, "learning_rate": 8.488487824298322e-06, "loss": 0.9453, "step": 3954 }, { "epoch": 0.23617580317687806, "grad_norm": 2.5389420986175537, "learning_rate": 8.48782429832128e-06, "loss": 0.9404, "step": 3955 }, { "epoch": 0.2362355189298937, "grad_norm": 1.7702122926712036, "learning_rate": 8.487160772344238e-06, "loss": 0.8974, "step": 3956 }, { "epoch": 0.23629523468290936, "grad_norm": 2.1470870971679688, "learning_rate": 8.486497246367196e-06, "loss": 0.9305, "step": 3957 }, { "epoch": 0.236354950435925, "grad_norm": 3.535214424133301, "learning_rate": 8.485833720390154e-06, "loss": 0.9076, "step": 3958 }, { "epoch": 0.23641466618894064, "grad_norm": 2.5464320182800293, "learning_rate": 8.485170194413112e-06, "loss": 0.9051, "step": 3959 }, { "epoch": 0.23647438194195627, "grad_norm": 2.1850051879882812, "learning_rate": 8.48450666843607e-06, "loss": 0.9116, "step": 3960 }, { "epoch": 0.23653409769497194, "grad_norm": 2.223310947418213, "learning_rate": 8.483843142459028e-06, "loss": 0.9189, "step": 3961 }, { "epoch": 0.23659381344798758, "grad_norm": 4.554632186889648, "learning_rate": 8.483179616481986e-06, "loss": 0.9344, "step": 3962 }, { "epoch": 0.23665352920100322, "grad_norm": 2.306596279144287, "learning_rate": 8.482516090504945e-06, "loss": 0.9398, "step": 3963 }, { "epoch": 0.23671324495401888, "grad_norm": 2.201953411102295, "learning_rate": 8.481852564527901e-06, "loss": 0.9181, "step": 3964 }, { "epoch": 0.23677296070703452, "grad_norm": 7.808235168457031, "learning_rate": 8.48118903855086e-06, "loss": 0.9452, "step": 3965 }, { "epoch": 0.23683267646005016, "grad_norm": 2.2908644676208496, "learning_rate": 8.480525512573819e-06, "loss": 0.9093, "step": 3966 }, { "epoch": 0.2368923922130658, "grad_norm": 2.648311138153076, "learning_rate": 8.479861986596775e-06, "loss": 0.9136, "step": 3967 }, { "epoch": 0.23695210796608146, "grad_norm": 1.7274744510650635, "learning_rate": 8.479198460619733e-06, "loss": 0.8843, "step": 3968 }, { "epoch": 0.2370118237190971, "grad_norm": 2.4756064414978027, "learning_rate": 8.478534934642691e-06, "loss": 0.9083, "step": 3969 }, { "epoch": 0.23707153947211274, "grad_norm": 1.8685665130615234, "learning_rate": 8.47787140866565e-06, "loss": 0.9083, "step": 3970 }, { "epoch": 0.2371312552251284, "grad_norm": 1.991831660270691, "learning_rate": 8.477207882688608e-06, "loss": 0.9077, "step": 3971 }, { "epoch": 0.23719097097814404, "grad_norm": 2.7977211475372314, "learning_rate": 8.476544356711566e-06, "loss": 0.9147, "step": 3972 }, { "epoch": 0.23725068673115968, "grad_norm": 2.6434247493743896, "learning_rate": 8.475880830734524e-06, "loss": 0.9337, "step": 3973 }, { "epoch": 0.23731040248417531, "grad_norm": 2.21004056930542, "learning_rate": 8.475217304757482e-06, "loss": 0.8867, "step": 3974 }, { "epoch": 0.23737011823719098, "grad_norm": 2.0420596599578857, "learning_rate": 8.47455377878044e-06, "loss": 0.932, "step": 3975 }, { "epoch": 0.23742983399020662, "grad_norm": 2.8957679271698, "learning_rate": 8.473890252803398e-06, "loss": 0.9629, "step": 3976 }, { "epoch": 0.23748954974322226, "grad_norm": 4.0720295906066895, "learning_rate": 8.473226726826356e-06, "loss": 0.9406, "step": 3977 }, { "epoch": 0.23754926549623792, "grad_norm": 2.5593574047088623, "learning_rate": 8.472563200849314e-06, "loss": 0.901, "step": 3978 }, { "epoch": 0.23760898124925356, "grad_norm": 12.051453590393066, "learning_rate": 8.47189967487227e-06, "loss": 0.8797, "step": 3979 }, { "epoch": 0.2376686970022692, "grad_norm": 1.7443705797195435, "learning_rate": 8.47123614889523e-06, "loss": 0.8757, "step": 3980 }, { "epoch": 0.23772841275528483, "grad_norm": 1.764106273651123, "learning_rate": 8.470572622918189e-06, "loss": 0.8878, "step": 3981 }, { "epoch": 0.2377881285083005, "grad_norm": 4.137120723724365, "learning_rate": 8.469909096941145e-06, "loss": 0.9126, "step": 3982 }, { "epoch": 0.23784784426131614, "grad_norm": 3.5730133056640625, "learning_rate": 8.469245570964105e-06, "loss": 0.8983, "step": 3983 }, { "epoch": 0.23790756001433178, "grad_norm": 2.4529831409454346, "learning_rate": 8.468582044987061e-06, "loss": 0.892, "step": 3984 }, { "epoch": 0.2379672757673474, "grad_norm": 2.284665107727051, "learning_rate": 8.46791851901002e-06, "loss": 0.9347, "step": 3985 }, { "epoch": 0.23802699152036308, "grad_norm": 1.7034369707107544, "learning_rate": 8.467254993032979e-06, "loss": 0.8782, "step": 3986 }, { "epoch": 0.23808670727337872, "grad_norm": 1.9109994173049927, "learning_rate": 8.466591467055936e-06, "loss": 0.9597, "step": 3987 }, { "epoch": 0.23814642302639435, "grad_norm": 2.594341516494751, "learning_rate": 8.465927941078894e-06, "loss": 0.929, "step": 3988 }, { "epoch": 0.23820613877941002, "grad_norm": 2.085984230041504, "learning_rate": 8.465264415101852e-06, "loss": 0.9399, "step": 3989 }, { "epoch": 0.23826585453242566, "grad_norm": 1.8988425731658936, "learning_rate": 8.46460088912481e-06, "loss": 0.9335, "step": 3990 }, { "epoch": 0.2383255702854413, "grad_norm": 2.6257781982421875, "learning_rate": 8.463937363147768e-06, "loss": 0.9282, "step": 3991 }, { "epoch": 0.23838528603845693, "grad_norm": 2.1658620834350586, "learning_rate": 8.463273837170726e-06, "loss": 0.9033, "step": 3992 }, { "epoch": 0.2384450017914726, "grad_norm": 2.433739423751831, "learning_rate": 8.462610311193684e-06, "loss": 0.9167, "step": 3993 }, { "epoch": 0.23850471754448824, "grad_norm": 4.480224609375, "learning_rate": 8.461946785216642e-06, "loss": 0.9188, "step": 3994 }, { "epoch": 0.23856443329750388, "grad_norm": 1.8343786001205444, "learning_rate": 8.4612832592396e-06, "loss": 0.898, "step": 3995 }, { "epoch": 0.23862414905051954, "grad_norm": 2.5331735610961914, "learning_rate": 8.460619733262558e-06, "loss": 0.9436, "step": 3996 }, { "epoch": 0.23868386480353518, "grad_norm": 3.7857866287231445, "learning_rate": 8.459956207285515e-06, "loss": 0.9237, "step": 3997 }, { "epoch": 0.23874358055655082, "grad_norm": 3.3016974925994873, "learning_rate": 8.459292681308475e-06, "loss": 0.9307, "step": 3998 }, { "epoch": 0.23880329630956645, "grad_norm": 3.309695243835449, "learning_rate": 8.458629155331431e-06, "loss": 0.9462, "step": 3999 }, { "epoch": 0.23886301206258212, "grad_norm": 2.3063554763793945, "learning_rate": 8.45796562935439e-06, "loss": 0.8576, "step": 4000 }, { "epoch": 0.23886301206258212, "eval_text_loss": 0.9503079652786255, "eval_text_runtime": 15.1904, "eval_text_samples_per_second": 263.323, "eval_text_steps_per_second": 0.527, "step": 4000 }, { "epoch": 0.23886301206258212, "eval_image_loss": 0.6726723313331604, "eval_image_runtime": 5.0328, "eval_image_samples_per_second": 794.787, "eval_image_steps_per_second": 1.59, "step": 4000 }, { "epoch": 0.23886301206258212, "eval_video_loss": 1.1426520347595215, "eval_video_runtime": 76.9908, "eval_video_samples_per_second": 51.954, "eval_video_steps_per_second": 0.104, "step": 4000 }, { "epoch": 0.23892272781559776, "grad_norm": 3.1175272464752197, "learning_rate": 8.457302103377349e-06, "loss": 0.8949, "step": 4001 }, { "epoch": 0.2389824435686134, "grad_norm": 2.3879926204681396, "learning_rate": 8.456638577400305e-06, "loss": 0.9362, "step": 4002 }, { "epoch": 0.23904215932162903, "grad_norm": 2.393223762512207, "learning_rate": 8.455975051423263e-06, "loss": 0.8962, "step": 4003 }, { "epoch": 0.2391018750746447, "grad_norm": 2.323537588119507, "learning_rate": 8.455311525446222e-06, "loss": 0.888, "step": 4004 }, { "epoch": 0.23916159082766034, "grad_norm": 2.560628890991211, "learning_rate": 8.45464799946918e-06, "loss": 0.9323, "step": 4005 }, { "epoch": 0.23922130658067597, "grad_norm": 7.8647871017456055, "learning_rate": 8.453984473492138e-06, "loss": 0.9039, "step": 4006 }, { "epoch": 0.23928102233369164, "grad_norm": 2.091942548751831, "learning_rate": 8.453320947515096e-06, "loss": 0.9291, "step": 4007 }, { "epoch": 0.23934073808670728, "grad_norm": 2.3311309814453125, "learning_rate": 8.452657421538054e-06, "loss": 0.9435, "step": 4008 }, { "epoch": 0.23940045383972292, "grad_norm": 2.437546968460083, "learning_rate": 8.451993895561012e-06, "loss": 0.8796, "step": 4009 }, { "epoch": 0.23946016959273855, "grad_norm": 2.018327236175537, "learning_rate": 8.45133036958397e-06, "loss": 0.9464, "step": 4010 }, { "epoch": 0.23951988534575422, "grad_norm": 2.5238404273986816, "learning_rate": 8.450666843606928e-06, "loss": 0.9063, "step": 4011 }, { "epoch": 0.23957960109876986, "grad_norm": 2.07721209526062, "learning_rate": 8.450003317629886e-06, "loss": 0.9127, "step": 4012 }, { "epoch": 0.2396393168517855, "grad_norm": 2.1966633796691895, "learning_rate": 8.449339791652844e-06, "loss": 0.9335, "step": 4013 }, { "epoch": 0.23969903260480116, "grad_norm": 2.979215383529663, "learning_rate": 8.448676265675801e-06, "loss": 0.893, "step": 4014 }, { "epoch": 0.2397587483578168, "grad_norm": 1.962847352027893, "learning_rate": 8.44801273969876e-06, "loss": 0.9018, "step": 4015 }, { "epoch": 0.23981846411083244, "grad_norm": 2.818305492401123, "learning_rate": 8.447349213721719e-06, "loss": 0.9281, "step": 4016 }, { "epoch": 0.23987817986384807, "grad_norm": 3.944586753845215, "learning_rate": 8.446685687744675e-06, "loss": 0.9027, "step": 4017 }, { "epoch": 0.23993789561686374, "grad_norm": 2.343038558959961, "learning_rate": 8.446022161767633e-06, "loss": 0.9335, "step": 4018 }, { "epoch": 0.23999761136987938, "grad_norm": 1.9225448369979858, "learning_rate": 8.445358635790591e-06, "loss": 0.9175, "step": 4019 }, { "epoch": 0.24005732712289501, "grad_norm": 14.334506034851074, "learning_rate": 8.44469510981355e-06, "loss": 0.9127, "step": 4020 }, { "epoch": 0.24011704287591065, "grad_norm": 5.601776123046875, "learning_rate": 8.444031583836508e-06, "loss": 0.9163, "step": 4021 }, { "epoch": 0.24017675862892632, "grad_norm": 1.9636213779449463, "learning_rate": 8.443368057859466e-06, "loss": 0.8995, "step": 4022 }, { "epoch": 0.24023647438194196, "grad_norm": 5.5572943687438965, "learning_rate": 8.442704531882424e-06, "loss": 0.8992, "step": 4023 }, { "epoch": 0.2402961901349576, "grad_norm": 2.5328564643859863, "learning_rate": 8.442041005905382e-06, "loss": 0.9364, "step": 4024 }, { "epoch": 0.24035590588797326, "grad_norm": 2.146942377090454, "learning_rate": 8.44137747992834e-06, "loss": 0.947, "step": 4025 }, { "epoch": 0.2404156216409889, "grad_norm": 1.7909786701202393, "learning_rate": 8.440713953951298e-06, "loss": 0.8864, "step": 4026 }, { "epoch": 0.24047533739400453, "grad_norm": 2.872197151184082, "learning_rate": 8.440050427974256e-06, "loss": 0.9205, "step": 4027 }, { "epoch": 0.24053505314702017, "grad_norm": 3.0617291927337646, "learning_rate": 8.439386901997214e-06, "loss": 0.8844, "step": 4028 }, { "epoch": 0.24059476890003584, "grad_norm": 2.7671685218811035, "learning_rate": 8.43872337602017e-06, "loss": 0.8977, "step": 4029 }, { "epoch": 0.24065448465305148, "grad_norm": 1.9797435998916626, "learning_rate": 8.43805985004313e-06, "loss": 0.9437, "step": 4030 }, { "epoch": 0.2407142004060671, "grad_norm": 2.220741033554077, "learning_rate": 8.437396324066089e-06, "loss": 0.9242, "step": 4031 }, { "epoch": 0.24077391615908278, "grad_norm": 2.3530759811401367, "learning_rate": 8.436732798089045e-06, "loss": 0.914, "step": 4032 }, { "epoch": 0.24083363191209842, "grad_norm": 2.290536403656006, "learning_rate": 8.436069272112005e-06, "loss": 0.9266, "step": 4033 }, { "epoch": 0.24089334766511405, "grad_norm": 2.2698566913604736, "learning_rate": 8.435405746134961e-06, "loss": 0.8962, "step": 4034 }, { "epoch": 0.2409530634181297, "grad_norm": 2.0710692405700684, "learning_rate": 8.43474222015792e-06, "loss": 0.8979, "step": 4035 }, { "epoch": 0.24101277917114536, "grad_norm": 2.350168466567993, "learning_rate": 8.434078694180879e-06, "loss": 0.9098, "step": 4036 }, { "epoch": 0.241072494924161, "grad_norm": 1.7339708805084229, "learning_rate": 8.433415168203835e-06, "loss": 0.8835, "step": 4037 }, { "epoch": 0.24113221067717663, "grad_norm": 2.5150697231292725, "learning_rate": 8.432751642226794e-06, "loss": 0.941, "step": 4038 }, { "epoch": 0.24119192643019227, "grad_norm": 5.006139278411865, "learning_rate": 8.432088116249752e-06, "loss": 0.8921, "step": 4039 }, { "epoch": 0.24125164218320794, "grad_norm": 2.421999454498291, "learning_rate": 8.43142459027271e-06, "loss": 0.9064, "step": 4040 }, { "epoch": 0.24131135793622357, "grad_norm": 3.123509645462036, "learning_rate": 8.430761064295668e-06, "loss": 0.8899, "step": 4041 }, { "epoch": 0.2413710736892392, "grad_norm": 3.417128801345825, "learning_rate": 8.430097538318626e-06, "loss": 0.9294, "step": 4042 }, { "epoch": 0.24143078944225488, "grad_norm": 2.4626576900482178, "learning_rate": 8.429434012341584e-06, "loss": 0.92, "step": 4043 }, { "epoch": 0.24149050519527052, "grad_norm": 2.2784316539764404, "learning_rate": 8.428770486364542e-06, "loss": 0.8809, "step": 4044 }, { "epoch": 0.24155022094828615, "grad_norm": 1.8164769411087036, "learning_rate": 8.4281069603875e-06, "loss": 0.9094, "step": 4045 }, { "epoch": 0.2416099367013018, "grad_norm": 2.0148260593414307, "learning_rate": 8.427443434410458e-06, "loss": 0.9151, "step": 4046 }, { "epoch": 0.24166965245431746, "grad_norm": 3.256927490234375, "learning_rate": 8.426779908433415e-06, "loss": 0.901, "step": 4047 }, { "epoch": 0.2417293682073331, "grad_norm": 1.8658299446105957, "learning_rate": 8.426116382456375e-06, "loss": 0.8856, "step": 4048 }, { "epoch": 0.24178908396034873, "grad_norm": 2.624502658843994, "learning_rate": 8.425452856479331e-06, "loss": 0.9006, "step": 4049 }, { "epoch": 0.2418487997133644, "grad_norm": 3.212306022644043, "learning_rate": 8.424789330502289e-06, "loss": 0.9098, "step": 4050 }, { "epoch": 0.24190851546638004, "grad_norm": 2.2038145065307617, "learning_rate": 8.424125804525249e-06, "loss": 0.9107, "step": 4051 }, { "epoch": 0.24196823121939567, "grad_norm": 2.03698468208313, "learning_rate": 8.423462278548205e-06, "loss": 0.9234, "step": 4052 }, { "epoch": 0.2420279469724113, "grad_norm": 1.940966010093689, "learning_rate": 8.422798752571163e-06, "loss": 0.8965, "step": 4053 }, { "epoch": 0.24208766272542698, "grad_norm": 2.3695006370544434, "learning_rate": 8.422135226594122e-06, "loss": 0.9354, "step": 4054 }, { "epoch": 0.24214737847844262, "grad_norm": 2.4775381088256836, "learning_rate": 8.42147170061708e-06, "loss": 0.9146, "step": 4055 }, { "epoch": 0.24220709423145825, "grad_norm": 2.5788183212280273, "learning_rate": 8.420808174640038e-06, "loss": 0.9443, "step": 4056 }, { "epoch": 0.2422668099844739, "grad_norm": 2.2722885608673096, "learning_rate": 8.420144648662996e-06, "loss": 0.8819, "step": 4057 }, { "epoch": 0.24232652573748956, "grad_norm": 2.7394800186157227, "learning_rate": 8.419481122685954e-06, "loss": 0.8859, "step": 4058 }, { "epoch": 0.2423862414905052, "grad_norm": 2.1435158252716064, "learning_rate": 8.418817596708912e-06, "loss": 0.9252, "step": 4059 }, { "epoch": 0.24244595724352083, "grad_norm": 2.151951551437378, "learning_rate": 8.41815407073187e-06, "loss": 0.9048, "step": 4060 }, { "epoch": 0.2425056729965365, "grad_norm": 1.6634571552276611, "learning_rate": 8.417490544754828e-06, "loss": 0.9129, "step": 4061 }, { "epoch": 0.24256538874955214, "grad_norm": 2.330267906188965, "learning_rate": 8.416827018777786e-06, "loss": 0.9112, "step": 4062 }, { "epoch": 0.24262510450256777, "grad_norm": 2.2697715759277344, "learning_rate": 8.416163492800744e-06, "loss": 0.8986, "step": 4063 }, { "epoch": 0.2426848202555834, "grad_norm": 5.790398120880127, "learning_rate": 8.4154999668237e-06, "loss": 0.9182, "step": 4064 }, { "epoch": 0.24274453600859908, "grad_norm": 2.4052469730377197, "learning_rate": 8.41483644084666e-06, "loss": 0.8985, "step": 4065 }, { "epoch": 0.24280425176161471, "grad_norm": 2.108344793319702, "learning_rate": 8.414172914869619e-06, "loss": 0.9282, "step": 4066 }, { "epoch": 0.24286396751463035, "grad_norm": 2.1831581592559814, "learning_rate": 8.413509388892575e-06, "loss": 0.9213, "step": 4067 }, { "epoch": 0.24292368326764602, "grad_norm": 1.8337924480438232, "learning_rate": 8.412845862915535e-06, "loss": 0.9157, "step": 4068 }, { "epoch": 0.24298339902066166, "grad_norm": 4.581411361694336, "learning_rate": 8.412182336938491e-06, "loss": 0.928, "step": 4069 }, { "epoch": 0.2430431147736773, "grad_norm": 3.7263729572296143, "learning_rate": 8.41151881096145e-06, "loss": 0.9158, "step": 4070 }, { "epoch": 0.24310283052669293, "grad_norm": 3.2330164909362793, "learning_rate": 8.410855284984408e-06, "loss": 0.9119, "step": 4071 }, { "epoch": 0.2431625462797086, "grad_norm": 1.9386779069900513, "learning_rate": 8.410191759007366e-06, "loss": 0.8749, "step": 4072 }, { "epoch": 0.24322226203272423, "grad_norm": 2.9167933464050293, "learning_rate": 8.409528233030324e-06, "loss": 0.9142, "step": 4073 }, { "epoch": 0.24328197778573987, "grad_norm": 2.544698476791382, "learning_rate": 8.408864707053282e-06, "loss": 0.914, "step": 4074 }, { "epoch": 0.2433416935387555, "grad_norm": 2.3234221935272217, "learning_rate": 8.40820118107624e-06, "loss": 0.9327, "step": 4075 }, { "epoch": 0.24340140929177118, "grad_norm": 2.0455124378204346, "learning_rate": 8.407537655099198e-06, "loss": 0.89, "step": 4076 }, { "epoch": 0.2434611250447868, "grad_norm": 2.9425320625305176, "learning_rate": 8.406874129122156e-06, "loss": 0.9344, "step": 4077 }, { "epoch": 0.24352084079780245, "grad_norm": 2.253784656524658, "learning_rate": 8.406210603145114e-06, "loss": 0.8939, "step": 4078 }, { "epoch": 0.24358055655081812, "grad_norm": 4.112626552581787, "learning_rate": 8.40554707716807e-06, "loss": 0.9136, "step": 4079 }, { "epoch": 0.24364027230383375, "grad_norm": 2.234020709991455, "learning_rate": 8.40488355119103e-06, "loss": 0.8831, "step": 4080 }, { "epoch": 0.2436999880568494, "grad_norm": 2.5538108348846436, "learning_rate": 8.404220025213989e-06, "loss": 0.9245, "step": 4081 }, { "epoch": 0.24375970380986503, "grad_norm": 3.0576486587524414, "learning_rate": 8.403556499236945e-06, "loss": 0.9539, "step": 4082 }, { "epoch": 0.2438194195628807, "grad_norm": 2.116901397705078, "learning_rate": 8.402892973259905e-06, "loss": 0.9345, "step": 4083 }, { "epoch": 0.24387913531589633, "grad_norm": 3.0499117374420166, "learning_rate": 8.402229447282861e-06, "loss": 0.9178, "step": 4084 }, { "epoch": 0.24393885106891197, "grad_norm": 2.3034186363220215, "learning_rate": 8.40156592130582e-06, "loss": 0.9153, "step": 4085 }, { "epoch": 0.24399856682192764, "grad_norm": 2.912921905517578, "learning_rate": 8.400902395328779e-06, "loss": 0.8984, "step": 4086 }, { "epoch": 0.24405828257494327, "grad_norm": 2.599708080291748, "learning_rate": 8.400238869351735e-06, "loss": 0.9034, "step": 4087 }, { "epoch": 0.2441179983279589, "grad_norm": 3.209939479827881, "learning_rate": 8.399575343374694e-06, "loss": 0.9273, "step": 4088 }, { "epoch": 0.24417771408097455, "grad_norm": 2.2703609466552734, "learning_rate": 8.398911817397652e-06, "loss": 0.962, "step": 4089 }, { "epoch": 0.24423742983399022, "grad_norm": 3.034883975982666, "learning_rate": 8.39824829142061e-06, "loss": 0.8841, "step": 4090 }, { "epoch": 0.24429714558700585, "grad_norm": 1.6616625785827637, "learning_rate": 8.397584765443568e-06, "loss": 0.8977, "step": 4091 }, { "epoch": 0.2443568613400215, "grad_norm": 2.6124329566955566, "learning_rate": 8.396921239466526e-06, "loss": 0.9229, "step": 4092 }, { "epoch": 0.24441657709303713, "grad_norm": 3.4515600204467773, "learning_rate": 8.396257713489484e-06, "loss": 0.9205, "step": 4093 }, { "epoch": 0.2444762928460528, "grad_norm": 1.9732928276062012, "learning_rate": 8.395594187512442e-06, "loss": 0.8815, "step": 4094 }, { "epoch": 0.24453600859906843, "grad_norm": 3.4683358669281006, "learning_rate": 8.3949306615354e-06, "loss": 0.8875, "step": 4095 }, { "epoch": 0.24459572435208407, "grad_norm": 2.173215389251709, "learning_rate": 8.394267135558358e-06, "loss": 0.8844, "step": 4096 }, { "epoch": 0.24465544010509974, "grad_norm": 3.506671190261841, "learning_rate": 8.393603609581316e-06, "loss": 0.9179, "step": 4097 }, { "epoch": 0.24471515585811537, "grad_norm": 2.052873134613037, "learning_rate": 8.392940083604275e-06, "loss": 0.9185, "step": 4098 }, { "epoch": 0.244774871611131, "grad_norm": 2.2417876720428467, "learning_rate": 8.392276557627231e-06, "loss": 0.8987, "step": 4099 }, { "epoch": 0.24483458736414665, "grad_norm": 2.4513211250305176, "learning_rate": 8.391613031650189e-06, "loss": 0.9213, "step": 4100 }, { "epoch": 0.24489430311716232, "grad_norm": 2.3156967163085938, "learning_rate": 8.390949505673149e-06, "loss": 0.9046, "step": 4101 }, { "epoch": 0.24495401887017795, "grad_norm": 2.0713438987731934, "learning_rate": 8.390285979696105e-06, "loss": 0.8888, "step": 4102 }, { "epoch": 0.2450137346231936, "grad_norm": 1.843824863433838, "learning_rate": 8.389622453719063e-06, "loss": 0.8878, "step": 4103 }, { "epoch": 0.24507345037620926, "grad_norm": 2.1160032749176025, "learning_rate": 8.388958927742021e-06, "loss": 0.9172, "step": 4104 }, { "epoch": 0.2451331661292249, "grad_norm": 3.4349701404571533, "learning_rate": 8.38829540176498e-06, "loss": 0.8743, "step": 4105 }, { "epoch": 0.24519288188224053, "grad_norm": 2.5643372535705566, "learning_rate": 8.387631875787938e-06, "loss": 0.9262, "step": 4106 }, { "epoch": 0.24525259763525617, "grad_norm": 1.8326166868209839, "learning_rate": 8.386968349810896e-06, "loss": 0.9208, "step": 4107 }, { "epoch": 0.24531231338827184, "grad_norm": 2.0148165225982666, "learning_rate": 8.386304823833854e-06, "loss": 0.8927, "step": 4108 }, { "epoch": 0.24537202914128747, "grad_norm": 3.0901312828063965, "learning_rate": 8.385641297856812e-06, "loss": 0.8971, "step": 4109 }, { "epoch": 0.2454317448943031, "grad_norm": 2.1241447925567627, "learning_rate": 8.38497777187977e-06, "loss": 0.9085, "step": 4110 }, { "epoch": 0.24549146064731875, "grad_norm": 2.858139753341675, "learning_rate": 8.384314245902728e-06, "loss": 0.8851, "step": 4111 }, { "epoch": 0.24555117640033441, "grad_norm": 2.507251262664795, "learning_rate": 8.383650719925686e-06, "loss": 0.9439, "step": 4112 }, { "epoch": 0.24561089215335005, "grad_norm": 2.8127024173736572, "learning_rate": 8.382987193948644e-06, "loss": 0.9549, "step": 4113 }, { "epoch": 0.2456706079063657, "grad_norm": 3.422312021255493, "learning_rate": 8.3823236679716e-06, "loss": 0.9366, "step": 4114 }, { "epoch": 0.24573032365938136, "grad_norm": 2.447681188583374, "learning_rate": 8.38166014199456e-06, "loss": 0.9096, "step": 4115 }, { "epoch": 0.245790039412397, "grad_norm": 2.4183616638183594, "learning_rate": 8.380996616017519e-06, "loss": 0.9328, "step": 4116 }, { "epoch": 0.24584975516541263, "grad_norm": 2.919755697250366, "learning_rate": 8.380333090040475e-06, "loss": 0.9188, "step": 4117 }, { "epoch": 0.24590947091842827, "grad_norm": 2.356182813644409, "learning_rate": 8.379669564063435e-06, "loss": 0.8908, "step": 4118 }, { "epoch": 0.24596918667144393, "grad_norm": 1.95780348777771, "learning_rate": 8.379006038086391e-06, "loss": 0.8969, "step": 4119 }, { "epoch": 0.24602890242445957, "grad_norm": 2.618971586227417, "learning_rate": 8.37834251210935e-06, "loss": 0.9406, "step": 4120 }, { "epoch": 0.2460886181774752, "grad_norm": 2.7209482192993164, "learning_rate": 8.377678986132307e-06, "loss": 0.9397, "step": 4121 }, { "epoch": 0.24614833393049088, "grad_norm": 3.3275840282440186, "learning_rate": 8.377015460155266e-06, "loss": 0.9132, "step": 4122 }, { "epoch": 0.2462080496835065, "grad_norm": 2.233855724334717, "learning_rate": 8.376351934178224e-06, "loss": 0.9024, "step": 4123 }, { "epoch": 0.24626776543652215, "grad_norm": 4.155838966369629, "learning_rate": 8.375688408201182e-06, "loss": 0.9128, "step": 4124 }, { "epoch": 0.2463274811895378, "grad_norm": 2.6519277095794678, "learning_rate": 8.37502488222414e-06, "loss": 0.9001, "step": 4125 }, { "epoch": 0.24638719694255345, "grad_norm": 2.8798093795776367, "learning_rate": 8.374361356247098e-06, "loss": 0.9143, "step": 4126 }, { "epoch": 0.2464469126955691, "grad_norm": 3.447864055633545, "learning_rate": 8.373697830270056e-06, "loss": 0.9521, "step": 4127 }, { "epoch": 0.24650662844858473, "grad_norm": 3.845487117767334, "learning_rate": 8.373034304293014e-06, "loss": 0.9234, "step": 4128 }, { "epoch": 0.2465663442016004, "grad_norm": 2.691347360610962, "learning_rate": 8.37237077831597e-06, "loss": 0.9352, "step": 4129 }, { "epoch": 0.24662605995461603, "grad_norm": 6.265195369720459, "learning_rate": 8.37170725233893e-06, "loss": 0.9304, "step": 4130 }, { "epoch": 0.24668577570763167, "grad_norm": 2.4822921752929688, "learning_rate": 8.371043726361888e-06, "loss": 0.9068, "step": 4131 }, { "epoch": 0.2467454914606473, "grad_norm": 3.8830342292785645, "learning_rate": 8.370380200384845e-06, "loss": 0.8842, "step": 4132 }, { "epoch": 0.24680520721366297, "grad_norm": 1.7293318510055542, "learning_rate": 8.369716674407805e-06, "loss": 0.9091, "step": 4133 }, { "epoch": 0.2468649229666786, "grad_norm": 2.826779365539551, "learning_rate": 8.369053148430761e-06, "loss": 0.8634, "step": 4134 }, { "epoch": 0.24692463871969425, "grad_norm": 1.820074200630188, "learning_rate": 8.36838962245372e-06, "loss": 0.9355, "step": 4135 }, { "epoch": 0.2469843544727099, "grad_norm": 3.6732406616210938, "learning_rate": 8.367726096476679e-06, "loss": 0.8731, "step": 4136 }, { "epoch": 0.24704407022572555, "grad_norm": 4.307351112365723, "learning_rate": 8.367062570499635e-06, "loss": 0.9277, "step": 4137 }, { "epoch": 0.2471037859787412, "grad_norm": 2.5327200889587402, "learning_rate": 8.366399044522593e-06, "loss": 0.8863, "step": 4138 }, { "epoch": 0.24716350173175683, "grad_norm": 2.807983160018921, "learning_rate": 8.365735518545552e-06, "loss": 0.9385, "step": 4139 }, { "epoch": 0.2472232174847725, "grad_norm": 2.486804246902466, "learning_rate": 8.36507199256851e-06, "loss": 0.9071, "step": 4140 }, { "epoch": 0.24728293323778813, "grad_norm": 2.412334442138672, "learning_rate": 8.364408466591468e-06, "loss": 0.8978, "step": 4141 }, { "epoch": 0.24734264899080377, "grad_norm": 1.8999221324920654, "learning_rate": 8.363744940614426e-06, "loss": 0.903, "step": 4142 }, { "epoch": 0.2474023647438194, "grad_norm": 1.9343819618225098, "learning_rate": 8.363081414637384e-06, "loss": 0.9107, "step": 4143 }, { "epoch": 0.24746208049683507, "grad_norm": 2.797489881515503, "learning_rate": 8.362417888660342e-06, "loss": 0.9041, "step": 4144 }, { "epoch": 0.2475217962498507, "grad_norm": 2.911060333251953, "learning_rate": 8.3617543626833e-06, "loss": 0.9061, "step": 4145 }, { "epoch": 0.24758151200286635, "grad_norm": 2.432385206222534, "learning_rate": 8.361090836706258e-06, "loss": 0.9011, "step": 4146 }, { "epoch": 0.24764122775588202, "grad_norm": 2.6550114154815674, "learning_rate": 8.360427310729216e-06, "loss": 0.8856, "step": 4147 }, { "epoch": 0.24770094350889765, "grad_norm": 2.212244749069214, "learning_rate": 8.359763784752174e-06, "loss": 0.9102, "step": 4148 }, { "epoch": 0.2477606592619133, "grad_norm": 3.1374220848083496, "learning_rate": 8.359100258775131e-06, "loss": 0.913, "step": 4149 }, { "epoch": 0.24782037501492893, "grad_norm": 1.706539511680603, "learning_rate": 8.358436732798089e-06, "loss": 0.8531, "step": 4150 }, { "epoch": 0.2478800907679446, "grad_norm": 2.2704615592956543, "learning_rate": 8.357773206821049e-06, "loss": 0.9408, "step": 4151 }, { "epoch": 0.24793980652096023, "grad_norm": 2.8487353324890137, "learning_rate": 8.357109680844005e-06, "loss": 0.9526, "step": 4152 }, { "epoch": 0.24799952227397587, "grad_norm": 2.512526035308838, "learning_rate": 8.356446154866963e-06, "loss": 0.9177, "step": 4153 }, { "epoch": 0.2480592380269915, "grad_norm": 2.7118098735809326, "learning_rate": 8.355782628889921e-06, "loss": 0.8983, "step": 4154 }, { "epoch": 0.24811895378000717, "grad_norm": 2.6615896224975586, "learning_rate": 8.35511910291288e-06, "loss": 0.9184, "step": 4155 }, { "epoch": 0.2481786695330228, "grad_norm": 1.757232666015625, "learning_rate": 8.354455576935838e-06, "loss": 0.9085, "step": 4156 }, { "epoch": 0.24823838528603845, "grad_norm": 3.7592263221740723, "learning_rate": 8.353792050958796e-06, "loss": 0.9205, "step": 4157 }, { "epoch": 0.24829810103905411, "grad_norm": 3.714855909347534, "learning_rate": 8.353128524981754e-06, "loss": 0.8958, "step": 4158 }, { "epoch": 0.24835781679206975, "grad_norm": 2.1118249893188477, "learning_rate": 8.352464999004712e-06, "loss": 0.8888, "step": 4159 }, { "epoch": 0.2484175325450854, "grad_norm": 2.508796453475952, "learning_rate": 8.35180147302767e-06, "loss": 0.9039, "step": 4160 }, { "epoch": 0.24847724829810103, "grad_norm": 2.125926971435547, "learning_rate": 8.351137947050628e-06, "loss": 0.8956, "step": 4161 }, { "epoch": 0.2485369640511167, "grad_norm": 2.269284725189209, "learning_rate": 8.350474421073586e-06, "loss": 0.8949, "step": 4162 }, { "epoch": 0.24859667980413233, "grad_norm": 2.2225823402404785, "learning_rate": 8.349810895096544e-06, "loss": 0.8975, "step": 4163 }, { "epoch": 0.24865639555714797, "grad_norm": 1.9785250425338745, "learning_rate": 8.3491473691195e-06, "loss": 0.9042, "step": 4164 }, { "epoch": 0.24871611131016363, "grad_norm": 3.3267903327941895, "learning_rate": 8.34848384314246e-06, "loss": 0.898, "step": 4165 }, { "epoch": 0.24877582706317927, "grad_norm": 1.798281192779541, "learning_rate": 8.347820317165419e-06, "loss": 0.9022, "step": 4166 }, { "epoch": 0.2488355428161949, "grad_norm": 2.3831257820129395, "learning_rate": 8.347156791188375e-06, "loss": 0.8788, "step": 4167 }, { "epoch": 0.24889525856921055, "grad_norm": 2.337951421737671, "learning_rate": 8.346493265211335e-06, "loss": 0.9121, "step": 4168 }, { "epoch": 0.2489549743222262, "grad_norm": 2.74385666847229, "learning_rate": 8.345829739234291e-06, "loss": 0.8759, "step": 4169 }, { "epoch": 0.24901469007524185, "grad_norm": 2.5872697830200195, "learning_rate": 8.34516621325725e-06, "loss": 0.9197, "step": 4170 }, { "epoch": 0.2490744058282575, "grad_norm": 2.0145623683929443, "learning_rate": 8.344502687280207e-06, "loss": 0.8647, "step": 4171 }, { "epoch": 0.24913412158127313, "grad_norm": 2.0868117809295654, "learning_rate": 8.343839161303166e-06, "loss": 0.9141, "step": 4172 }, { "epoch": 0.2491938373342888, "grad_norm": 2.1355834007263184, "learning_rate": 8.343175635326124e-06, "loss": 0.911, "step": 4173 }, { "epoch": 0.24925355308730443, "grad_norm": 2.0171194076538086, "learning_rate": 8.342512109349082e-06, "loss": 0.8786, "step": 4174 }, { "epoch": 0.24931326884032007, "grad_norm": 1.6787136793136597, "learning_rate": 8.34184858337204e-06, "loss": 0.9164, "step": 4175 }, { "epoch": 0.24937298459333573, "grad_norm": 2.0289275646209717, "learning_rate": 8.341185057394998e-06, "loss": 0.9288, "step": 4176 }, { "epoch": 0.24943270034635137, "grad_norm": 2.118435859680176, "learning_rate": 8.340521531417956e-06, "loss": 0.929, "step": 4177 }, { "epoch": 0.249492416099367, "grad_norm": 1.9286272525787354, "learning_rate": 8.339858005440914e-06, "loss": 0.9528, "step": 4178 }, { "epoch": 0.24955213185238265, "grad_norm": 3.5098280906677246, "learning_rate": 8.33919447946387e-06, "loss": 0.8755, "step": 4179 }, { "epoch": 0.2496118476053983, "grad_norm": 2.4265215396881104, "learning_rate": 8.33853095348683e-06, "loss": 0.9066, "step": 4180 }, { "epoch": 0.24967156335841395, "grad_norm": 2.1971867084503174, "learning_rate": 8.337867427509788e-06, "loss": 0.9311, "step": 4181 }, { "epoch": 0.2497312791114296, "grad_norm": 1.5300652980804443, "learning_rate": 8.337203901532745e-06, "loss": 0.901, "step": 4182 }, { "epoch": 0.24979099486444525, "grad_norm": 2.333767890930176, "learning_rate": 8.336540375555705e-06, "loss": 0.9223, "step": 4183 }, { "epoch": 0.2498507106174609, "grad_norm": 4.97572660446167, "learning_rate": 8.335876849578661e-06, "loss": 0.8871, "step": 4184 }, { "epoch": 0.24991042637047653, "grad_norm": 1.7539564371109009, "learning_rate": 8.335213323601619e-06, "loss": 0.8932, "step": 4185 }, { "epoch": 0.24997014212349217, "grad_norm": 2.5454084873199463, "learning_rate": 8.334549797624579e-06, "loss": 0.9195, "step": 4186 }, { "epoch": 0.25002985787650783, "grad_norm": 2.107483148574829, "learning_rate": 8.333886271647535e-06, "loss": 0.8954, "step": 4187 }, { "epoch": 0.25008957362952344, "grad_norm": 3.965193033218384, "learning_rate": 8.333222745670493e-06, "loss": 0.8754, "step": 4188 }, { "epoch": 0.2501492893825391, "grad_norm": 2.0457301139831543, "learning_rate": 8.332559219693452e-06, "loss": 0.9259, "step": 4189 }, { "epoch": 0.2502090051355548, "grad_norm": 2.0888798236846924, "learning_rate": 8.33189569371641e-06, "loss": 0.9265, "step": 4190 }, { "epoch": 0.2502687208885704, "grad_norm": 2.3237836360931396, "learning_rate": 8.331232167739368e-06, "loss": 0.8733, "step": 4191 }, { "epoch": 0.25032843664158605, "grad_norm": 2.9067461490631104, "learning_rate": 8.330568641762326e-06, "loss": 0.8985, "step": 4192 }, { "epoch": 0.2503881523946017, "grad_norm": 2.6918416023254395, "learning_rate": 8.329905115785284e-06, "loss": 0.8787, "step": 4193 }, { "epoch": 0.2504478681476173, "grad_norm": 2.328742742538452, "learning_rate": 8.329241589808242e-06, "loss": 0.9135, "step": 4194 }, { "epoch": 0.250507583900633, "grad_norm": 2.338592290878296, "learning_rate": 8.3285780638312e-06, "loss": 0.937, "step": 4195 }, { "epoch": 0.25056729965364866, "grad_norm": 2.643233299255371, "learning_rate": 8.327914537854158e-06, "loss": 0.9277, "step": 4196 }, { "epoch": 0.25062701540666427, "grad_norm": 2.3457908630371094, "learning_rate": 8.327251011877116e-06, "loss": 0.9025, "step": 4197 }, { "epoch": 0.25068673115967993, "grad_norm": 2.263007879257202, "learning_rate": 8.326587485900074e-06, "loss": 0.8846, "step": 4198 }, { "epoch": 0.25074644691269554, "grad_norm": 2.267568349838257, "learning_rate": 8.32592395992303e-06, "loss": 0.911, "step": 4199 }, { "epoch": 0.2508061626657112, "grad_norm": 2.2978343963623047, "learning_rate": 8.325260433945989e-06, "loss": 0.9341, "step": 4200 }, { "epoch": 0.2508061626657112, "eval_text_loss": 0.9469536542892456, "eval_text_runtime": 15.2259, "eval_text_samples_per_second": 262.71, "eval_text_steps_per_second": 0.525, "step": 4200 }, { "epoch": 0.2508061626657112, "eval_image_loss": 0.6719712018966675, "eval_image_runtime": 4.9765, "eval_image_samples_per_second": 803.778, "eval_image_steps_per_second": 1.608, "step": 4200 }, { "epoch": 0.2508061626657112, "eval_video_loss": 1.141599178314209, "eval_video_runtime": 76.1842, "eval_video_samples_per_second": 52.504, "eval_video_steps_per_second": 0.105, "step": 4200 }, { "epoch": 0.2508658784187269, "grad_norm": 3.088934898376465, "learning_rate": 8.324596907968949e-06, "loss": 0.8693, "step": 4201 }, { "epoch": 0.2509255941717425, "grad_norm": 1.8505969047546387, "learning_rate": 8.323933381991905e-06, "loss": 0.8826, "step": 4202 }, { "epoch": 0.25098530992475815, "grad_norm": 1.9819856882095337, "learning_rate": 8.323269856014863e-06, "loss": 0.8716, "step": 4203 }, { "epoch": 0.2510450256777738, "grad_norm": 1.9756076335906982, "learning_rate": 8.322606330037821e-06, "loss": 0.8905, "step": 4204 }, { "epoch": 0.2511047414307894, "grad_norm": 2.9980342388153076, "learning_rate": 8.32194280406078e-06, "loss": 0.8863, "step": 4205 }, { "epoch": 0.2511644571838051, "grad_norm": 3.5065460205078125, "learning_rate": 8.321279278083738e-06, "loss": 0.9313, "step": 4206 }, { "epoch": 0.25122417293682076, "grad_norm": 7.552477836608887, "learning_rate": 8.320615752106696e-06, "loss": 0.9348, "step": 4207 }, { "epoch": 0.25128388868983637, "grad_norm": 1.7039592266082764, "learning_rate": 8.319952226129654e-06, "loss": 0.9422, "step": 4208 }, { "epoch": 0.25134360444285203, "grad_norm": 2.801330327987671, "learning_rate": 8.319288700152612e-06, "loss": 0.9243, "step": 4209 }, { "epoch": 0.2514033201958677, "grad_norm": 1.9389169216156006, "learning_rate": 8.31862517417557e-06, "loss": 0.9144, "step": 4210 }, { "epoch": 0.2514630359488833, "grad_norm": 2.6573777198791504, "learning_rate": 8.317961648198528e-06, "loss": 0.8914, "step": 4211 }, { "epoch": 0.25152275170189897, "grad_norm": 3.9416685104370117, "learning_rate": 8.317298122221486e-06, "loss": 0.9311, "step": 4212 }, { "epoch": 0.2515824674549146, "grad_norm": 1.9955998659133911, "learning_rate": 8.316634596244444e-06, "loss": 0.924, "step": 4213 }, { "epoch": 0.25164218320793025, "grad_norm": 2.0801823139190674, "learning_rate": 8.3159710702674e-06, "loss": 0.9098, "step": 4214 }, { "epoch": 0.2517018989609459, "grad_norm": 1.91863214969635, "learning_rate": 8.31530754429036e-06, "loss": 0.9148, "step": 4215 }, { "epoch": 0.2517616147139615, "grad_norm": 4.202106475830078, "learning_rate": 8.314644018313319e-06, "loss": 0.9263, "step": 4216 }, { "epoch": 0.2518213304669772, "grad_norm": 2.038018226623535, "learning_rate": 8.313980492336275e-06, "loss": 0.8985, "step": 4217 }, { "epoch": 0.25188104621999285, "grad_norm": 13.166483879089355, "learning_rate": 8.313316966359235e-06, "loss": 0.9301, "step": 4218 }, { "epoch": 0.25194076197300846, "grad_norm": 2.423426628112793, "learning_rate": 8.312653440382191e-06, "loss": 0.9272, "step": 4219 }, { "epoch": 0.25200047772602413, "grad_norm": 3.1714348793029785, "learning_rate": 8.31198991440515e-06, "loss": 0.9051, "step": 4220 }, { "epoch": 0.2520601934790398, "grad_norm": 2.158468246459961, "learning_rate": 8.311326388428107e-06, "loss": 0.9406, "step": 4221 }, { "epoch": 0.2521199092320554, "grad_norm": 1.8913395404815674, "learning_rate": 8.310662862451065e-06, "loss": 0.9216, "step": 4222 }, { "epoch": 0.25217962498507107, "grad_norm": 1.807375192642212, "learning_rate": 8.309999336474024e-06, "loss": 0.8739, "step": 4223 }, { "epoch": 0.2522393407380867, "grad_norm": 2.2784550189971924, "learning_rate": 8.309335810496982e-06, "loss": 0.897, "step": 4224 }, { "epoch": 0.25229905649110235, "grad_norm": 2.0678694248199463, "learning_rate": 8.30867228451994e-06, "loss": 0.9428, "step": 4225 }, { "epoch": 0.252358772244118, "grad_norm": 2.0377278327941895, "learning_rate": 8.308008758542898e-06, "loss": 0.8917, "step": 4226 }, { "epoch": 0.2524184879971336, "grad_norm": 2.490832567214966, "learning_rate": 8.307345232565856e-06, "loss": 0.9609, "step": 4227 }, { "epoch": 0.2524782037501493, "grad_norm": 1.9721095561981201, "learning_rate": 8.306681706588814e-06, "loss": 0.919, "step": 4228 }, { "epoch": 0.25253791950316495, "grad_norm": 1.9218385219573975, "learning_rate": 8.30601818061177e-06, "loss": 0.9474, "step": 4229 }, { "epoch": 0.25259763525618056, "grad_norm": 2.180945873260498, "learning_rate": 8.30535465463473e-06, "loss": 0.9254, "step": 4230 }, { "epoch": 0.25265735100919623, "grad_norm": 3.3041746616363525, "learning_rate": 8.304691128657688e-06, "loss": 0.9204, "step": 4231 }, { "epoch": 0.2527170667622119, "grad_norm": 1.7868551015853882, "learning_rate": 8.304027602680645e-06, "loss": 0.8819, "step": 4232 }, { "epoch": 0.2527767825152275, "grad_norm": 2.8600387573242188, "learning_rate": 8.303364076703605e-06, "loss": 0.8928, "step": 4233 }, { "epoch": 0.25283649826824317, "grad_norm": 2.0162246227264404, "learning_rate": 8.302700550726561e-06, "loss": 0.89, "step": 4234 }, { "epoch": 0.2528962140212588, "grad_norm": 2.030306816101074, "learning_rate": 8.302037024749519e-06, "loss": 0.9077, "step": 4235 }, { "epoch": 0.25295592977427445, "grad_norm": 3.371704578399658, "learning_rate": 8.301373498772479e-06, "loss": 0.9061, "step": 4236 }, { "epoch": 0.2530156455272901, "grad_norm": 3.8137950897216797, "learning_rate": 8.300709972795435e-06, "loss": 0.9139, "step": 4237 }, { "epoch": 0.2530753612803057, "grad_norm": 2.1971185207366943, "learning_rate": 8.300046446818393e-06, "loss": 0.9105, "step": 4238 }, { "epoch": 0.2531350770333214, "grad_norm": 3.3043620586395264, "learning_rate": 8.299382920841351e-06, "loss": 0.8745, "step": 4239 }, { "epoch": 0.25319479278633705, "grad_norm": 2.398386240005493, "learning_rate": 8.29871939486431e-06, "loss": 0.9282, "step": 4240 }, { "epoch": 0.25325450853935266, "grad_norm": 3.49753999710083, "learning_rate": 8.298055868887268e-06, "loss": 0.9166, "step": 4241 }, { "epoch": 0.25331422429236833, "grad_norm": 1.9233289957046509, "learning_rate": 8.297392342910226e-06, "loss": 0.9115, "step": 4242 }, { "epoch": 0.253373940045384, "grad_norm": 2.063075304031372, "learning_rate": 8.296728816933184e-06, "loss": 0.8738, "step": 4243 }, { "epoch": 0.2534336557983996, "grad_norm": 2.2126195430755615, "learning_rate": 8.296065290956142e-06, "loss": 0.9205, "step": 4244 }, { "epoch": 0.25349337155141527, "grad_norm": 2.0665252208709717, "learning_rate": 8.2954017649791e-06, "loss": 0.9228, "step": 4245 }, { "epoch": 0.25355308730443094, "grad_norm": 2.5720231533050537, "learning_rate": 8.294738239002058e-06, "loss": 0.9426, "step": 4246 }, { "epoch": 0.25361280305744655, "grad_norm": 2.529839277267456, "learning_rate": 8.294074713025016e-06, "loss": 0.9082, "step": 4247 }, { "epoch": 0.2536725188104622, "grad_norm": 2.0062785148620605, "learning_rate": 8.293411187047974e-06, "loss": 0.9316, "step": 4248 }, { "epoch": 0.2537322345634778, "grad_norm": 2.355511426925659, "learning_rate": 8.29274766107093e-06, "loss": 0.8862, "step": 4249 }, { "epoch": 0.2537919503164935, "grad_norm": 3.3550732135772705, "learning_rate": 8.292084135093889e-06, "loss": 0.9146, "step": 4250 }, { "epoch": 0.25385166606950915, "grad_norm": 2.239091396331787, "learning_rate": 8.291420609116849e-06, "loss": 0.8552, "step": 4251 }, { "epoch": 0.25391138182252476, "grad_norm": 2.570823907852173, "learning_rate": 8.290757083139805e-06, "loss": 0.8897, "step": 4252 }, { "epoch": 0.2539710975755404, "grad_norm": 1.992361307144165, "learning_rate": 8.290093557162763e-06, "loss": 0.957, "step": 4253 }, { "epoch": 0.2540308133285561, "grad_norm": 1.965872883796692, "learning_rate": 8.289430031185721e-06, "loss": 0.9014, "step": 4254 }, { "epoch": 0.2540905290815717, "grad_norm": 2.132843017578125, "learning_rate": 8.28876650520868e-06, "loss": 0.906, "step": 4255 }, { "epoch": 0.25415024483458737, "grad_norm": 2.6174232959747314, "learning_rate": 8.288102979231637e-06, "loss": 0.8963, "step": 4256 }, { "epoch": 0.25420996058760303, "grad_norm": 3.1270861625671387, "learning_rate": 8.287439453254596e-06, "loss": 0.9092, "step": 4257 }, { "epoch": 0.25426967634061864, "grad_norm": 1.8123310804367065, "learning_rate": 8.286775927277554e-06, "loss": 0.9239, "step": 4258 }, { "epoch": 0.2543293920936343, "grad_norm": 2.0175588130950928, "learning_rate": 8.286112401300512e-06, "loss": 0.9031, "step": 4259 }, { "epoch": 0.2543891078466499, "grad_norm": 2.620875358581543, "learning_rate": 8.28544887532347e-06, "loss": 0.8926, "step": 4260 }, { "epoch": 0.2544488235996656, "grad_norm": 3.874040365219116, "learning_rate": 8.284785349346428e-06, "loss": 0.9353, "step": 4261 }, { "epoch": 0.25450853935268125, "grad_norm": 3.0879275798797607, "learning_rate": 8.284121823369386e-06, "loss": 0.8922, "step": 4262 }, { "epoch": 0.25456825510569686, "grad_norm": 3.0298385620117188, "learning_rate": 8.283458297392344e-06, "loss": 0.884, "step": 4263 }, { "epoch": 0.2546279708587125, "grad_norm": 1.6393606662750244, "learning_rate": 8.2827947714153e-06, "loss": 0.9074, "step": 4264 }, { "epoch": 0.2546876866117282, "grad_norm": 4.044653415679932, "learning_rate": 8.28213124543826e-06, "loss": 0.9505, "step": 4265 }, { "epoch": 0.2547474023647438, "grad_norm": 2.4703588485717773, "learning_rate": 8.281467719461218e-06, "loss": 0.9092, "step": 4266 }, { "epoch": 0.25480711811775947, "grad_norm": 2.1293134689331055, "learning_rate": 8.280804193484175e-06, "loss": 0.9053, "step": 4267 }, { "epoch": 0.25486683387077513, "grad_norm": 2.1235153675079346, "learning_rate": 8.280140667507135e-06, "loss": 0.9252, "step": 4268 }, { "epoch": 0.25492654962379074, "grad_norm": 2.4604287147521973, "learning_rate": 8.279477141530091e-06, "loss": 0.8978, "step": 4269 }, { "epoch": 0.2549862653768064, "grad_norm": 1.9872463941574097, "learning_rate": 8.27881361555305e-06, "loss": 0.9338, "step": 4270 }, { "epoch": 0.2550459811298221, "grad_norm": 3.7544679641723633, "learning_rate": 8.278150089576007e-06, "loss": 0.8828, "step": 4271 }, { "epoch": 0.2551056968828377, "grad_norm": 2.096301794052124, "learning_rate": 8.277486563598965e-06, "loss": 0.9073, "step": 4272 }, { "epoch": 0.25516541263585335, "grad_norm": 1.945508360862732, "learning_rate": 8.276823037621923e-06, "loss": 0.9137, "step": 4273 }, { "epoch": 0.25522512838886896, "grad_norm": 2.253873109817505, "learning_rate": 8.276159511644882e-06, "loss": 0.9587, "step": 4274 }, { "epoch": 0.2552848441418846, "grad_norm": 3.7767174243927, "learning_rate": 8.27549598566784e-06, "loss": 0.9389, "step": 4275 }, { "epoch": 0.2553445598949003, "grad_norm": 2.250485420227051, "learning_rate": 8.274832459690798e-06, "loss": 0.9201, "step": 4276 }, { "epoch": 0.2554042756479159, "grad_norm": 2.5418529510498047, "learning_rate": 8.274168933713756e-06, "loss": 0.9255, "step": 4277 }, { "epoch": 0.25546399140093157, "grad_norm": 2.7808728218078613, "learning_rate": 8.273505407736714e-06, "loss": 0.9445, "step": 4278 }, { "epoch": 0.25552370715394723, "grad_norm": 2.786316156387329, "learning_rate": 8.27284188175967e-06, "loss": 0.8735, "step": 4279 }, { "epoch": 0.25558342290696284, "grad_norm": 2.422008752822876, "learning_rate": 8.27217835578263e-06, "loss": 0.889, "step": 4280 }, { "epoch": 0.2556431386599785, "grad_norm": 3.464012861251831, "learning_rate": 8.271514829805588e-06, "loss": 0.9154, "step": 4281 }, { "epoch": 0.2557028544129942, "grad_norm": 2.229248285293579, "learning_rate": 8.270851303828545e-06, "loss": 0.9111, "step": 4282 }, { "epoch": 0.2557625701660098, "grad_norm": 2.0501530170440674, "learning_rate": 8.270187777851504e-06, "loss": 0.9306, "step": 4283 }, { "epoch": 0.25582228591902545, "grad_norm": 2.854151964187622, "learning_rate": 8.269524251874461e-06, "loss": 0.9727, "step": 4284 }, { "epoch": 0.25588200167204106, "grad_norm": 2.5801467895507812, "learning_rate": 8.268860725897419e-06, "loss": 0.9418, "step": 4285 }, { "epoch": 0.2559417174250567, "grad_norm": 3.931359052658081, "learning_rate": 8.268197199920379e-06, "loss": 0.9226, "step": 4286 }, { "epoch": 0.2560014331780724, "grad_norm": 4.607269763946533, "learning_rate": 8.267533673943335e-06, "loss": 0.8936, "step": 4287 }, { "epoch": 0.256061148931088, "grad_norm": 2.082610607147217, "learning_rate": 8.266870147966293e-06, "loss": 0.8908, "step": 4288 }, { "epoch": 0.25612086468410367, "grad_norm": 2.906663656234741, "learning_rate": 8.266206621989251e-06, "loss": 0.9341, "step": 4289 }, { "epoch": 0.25618058043711933, "grad_norm": 2.1041080951690674, "learning_rate": 8.26554309601221e-06, "loss": 0.8895, "step": 4290 }, { "epoch": 0.25624029619013494, "grad_norm": 3.4107930660247803, "learning_rate": 8.264879570035168e-06, "loss": 0.9233, "step": 4291 }, { "epoch": 0.2563000119431506, "grad_norm": 3.3500285148620605, "learning_rate": 8.264216044058126e-06, "loss": 0.9426, "step": 4292 }, { "epoch": 0.2563597276961663, "grad_norm": 1.9822611808776855, "learning_rate": 8.263552518081084e-06, "loss": 0.9381, "step": 4293 }, { "epoch": 0.2564194434491819, "grad_norm": 2.5583136081695557, "learning_rate": 8.262888992104042e-06, "loss": 0.8906, "step": 4294 }, { "epoch": 0.25647915920219755, "grad_norm": 2.774312734603882, "learning_rate": 8.262225466127e-06, "loss": 0.8797, "step": 4295 }, { "epoch": 0.25653887495521316, "grad_norm": 1.7486592531204224, "learning_rate": 8.261561940149958e-06, "loss": 0.8955, "step": 4296 }, { "epoch": 0.2565985907082288, "grad_norm": 1.9078668355941772, "learning_rate": 8.260898414172916e-06, "loss": 0.8935, "step": 4297 }, { "epoch": 0.2566583064612445, "grad_norm": 1.8126438856124878, "learning_rate": 8.260234888195874e-06, "loss": 0.9148, "step": 4298 }, { "epoch": 0.2567180222142601, "grad_norm": 2.4101834297180176, "learning_rate": 8.25957136221883e-06, "loss": 0.8902, "step": 4299 }, { "epoch": 0.25677773796727577, "grad_norm": 1.7572892904281616, "learning_rate": 8.258907836241789e-06, "loss": 0.9368, "step": 4300 }, { "epoch": 0.25683745372029143, "grad_norm": 2.1492581367492676, "learning_rate": 8.258244310264749e-06, "loss": 0.9051, "step": 4301 }, { "epoch": 0.25689716947330704, "grad_norm": 2.3842008113861084, "learning_rate": 8.257580784287705e-06, "loss": 0.9042, "step": 4302 }, { "epoch": 0.2569568852263227, "grad_norm": 2.1625921726226807, "learning_rate": 8.256917258310663e-06, "loss": 0.8824, "step": 4303 }, { "epoch": 0.25701660097933837, "grad_norm": 3.139084815979004, "learning_rate": 8.256253732333621e-06, "loss": 0.8803, "step": 4304 }, { "epoch": 0.257076316732354, "grad_norm": 5.9621901512146, "learning_rate": 8.25559020635658e-06, "loss": 0.9132, "step": 4305 }, { "epoch": 0.25713603248536965, "grad_norm": 3.0274174213409424, "learning_rate": 8.254926680379537e-06, "loss": 0.9258, "step": 4306 }, { "epoch": 0.2571957482383853, "grad_norm": 2.3070950508117676, "learning_rate": 8.254263154402496e-06, "loss": 0.9569, "step": 4307 }, { "epoch": 0.2572554639914009, "grad_norm": 2.936530590057373, "learning_rate": 8.253599628425454e-06, "loss": 0.8868, "step": 4308 }, { "epoch": 0.2573151797444166, "grad_norm": 2.4337964057922363, "learning_rate": 8.252936102448412e-06, "loss": 0.8982, "step": 4309 }, { "epoch": 0.2573748954974322, "grad_norm": 2.2056360244750977, "learning_rate": 8.25227257647137e-06, "loss": 0.9202, "step": 4310 }, { "epoch": 0.25743461125044786, "grad_norm": 2.1999025344848633, "learning_rate": 8.251609050494328e-06, "loss": 0.8941, "step": 4311 }, { "epoch": 0.25749432700346353, "grad_norm": 2.2296230792999268, "learning_rate": 8.250945524517286e-06, "loss": 0.8612, "step": 4312 }, { "epoch": 0.25755404275647914, "grad_norm": 1.7420527935028076, "learning_rate": 8.250281998540244e-06, "loss": 0.839, "step": 4313 }, { "epoch": 0.2576137585094948, "grad_norm": 10.366883277893066, "learning_rate": 8.2496184725632e-06, "loss": 0.8858, "step": 4314 }, { "epoch": 0.25767347426251047, "grad_norm": 3.2905614376068115, "learning_rate": 8.24895494658616e-06, "loss": 0.9118, "step": 4315 }, { "epoch": 0.2577331900155261, "grad_norm": 2.1912734508514404, "learning_rate": 8.248291420609118e-06, "loss": 0.9013, "step": 4316 }, { "epoch": 0.25779290576854175, "grad_norm": 4.329145908355713, "learning_rate": 8.247627894632075e-06, "loss": 0.9407, "step": 4317 }, { "epoch": 0.2578526215215574, "grad_norm": 3.0728750228881836, "learning_rate": 8.246964368655035e-06, "loss": 0.9067, "step": 4318 }, { "epoch": 0.257912337274573, "grad_norm": 2.298708438873291, "learning_rate": 8.246300842677991e-06, "loss": 0.9079, "step": 4319 }, { "epoch": 0.2579720530275887, "grad_norm": 1.7561297416687012, "learning_rate": 8.245637316700949e-06, "loss": 0.9154, "step": 4320 }, { "epoch": 0.2580317687806043, "grad_norm": 1.9099465608596802, "learning_rate": 8.244973790723907e-06, "loss": 0.894, "step": 4321 }, { "epoch": 0.25809148453361996, "grad_norm": 1.6377573013305664, "learning_rate": 8.244310264746865e-06, "loss": 0.9241, "step": 4322 }, { "epoch": 0.25815120028663563, "grad_norm": 1.98758065700531, "learning_rate": 8.243646738769823e-06, "loss": 0.9199, "step": 4323 }, { "epoch": 0.25821091603965124, "grad_norm": 2.180760622024536, "learning_rate": 8.242983212792782e-06, "loss": 0.9178, "step": 4324 }, { "epoch": 0.2582706317926669, "grad_norm": 2.9043188095092773, "learning_rate": 8.24231968681574e-06, "loss": 0.9155, "step": 4325 }, { "epoch": 0.25833034754568257, "grad_norm": 3.7660653591156006, "learning_rate": 8.241656160838698e-06, "loss": 0.907, "step": 4326 }, { "epoch": 0.2583900632986982, "grad_norm": 2.4352262020111084, "learning_rate": 8.240992634861656e-06, "loss": 0.9406, "step": 4327 }, { "epoch": 0.25844977905171385, "grad_norm": 6.167219638824463, "learning_rate": 8.240329108884614e-06, "loss": 0.9094, "step": 4328 }, { "epoch": 0.2585094948047295, "grad_norm": 3.076732873916626, "learning_rate": 8.23966558290757e-06, "loss": 0.9046, "step": 4329 }, { "epoch": 0.2585692105577451, "grad_norm": 2.4206831455230713, "learning_rate": 8.23900205693053e-06, "loss": 0.88, "step": 4330 }, { "epoch": 0.2586289263107608, "grad_norm": 1.7562254667282104, "learning_rate": 8.238338530953488e-06, "loss": 0.8736, "step": 4331 }, { "epoch": 0.2586886420637764, "grad_norm": 4.001763343811035, "learning_rate": 8.237675004976445e-06, "loss": 0.941, "step": 4332 }, { "epoch": 0.25874835781679206, "grad_norm": 2.4229960441589355, "learning_rate": 8.237011478999404e-06, "loss": 0.9349, "step": 4333 }, { "epoch": 0.25880807356980773, "grad_norm": 2.3250699043273926, "learning_rate": 8.23634795302236e-06, "loss": 0.8814, "step": 4334 }, { "epoch": 0.25886778932282334, "grad_norm": 3.5184311866760254, "learning_rate": 8.235684427045319e-06, "loss": 0.9549, "step": 4335 }, { "epoch": 0.258927505075839, "grad_norm": 2.3744747638702393, "learning_rate": 8.235020901068279e-06, "loss": 0.8997, "step": 4336 }, { "epoch": 0.25898722082885467, "grad_norm": 3.3535797595977783, "learning_rate": 8.234357375091235e-06, "loss": 0.9273, "step": 4337 }, { "epoch": 0.2590469365818703, "grad_norm": 2.772326946258545, "learning_rate": 8.233693849114193e-06, "loss": 0.9159, "step": 4338 }, { "epoch": 0.25910665233488595, "grad_norm": 2.0562150478363037, "learning_rate": 8.233030323137151e-06, "loss": 0.8554, "step": 4339 }, { "epoch": 0.2591663680879016, "grad_norm": 2.089156150817871, "learning_rate": 8.23236679716011e-06, "loss": 0.8821, "step": 4340 }, { "epoch": 0.2592260838409172, "grad_norm": 3.7568953037261963, "learning_rate": 8.231703271183068e-06, "loss": 0.8885, "step": 4341 }, { "epoch": 0.2592857995939329, "grad_norm": 1.8366315364837646, "learning_rate": 8.231039745206026e-06, "loss": 0.926, "step": 4342 }, { "epoch": 0.25934551534694855, "grad_norm": 2.538651466369629, "learning_rate": 8.230376219228984e-06, "loss": 0.9286, "step": 4343 }, { "epoch": 0.25940523109996416, "grad_norm": 2.001734495162964, "learning_rate": 8.229712693251942e-06, "loss": 0.9012, "step": 4344 }, { "epoch": 0.2594649468529798, "grad_norm": 2.297776699066162, "learning_rate": 8.2290491672749e-06, "loss": 0.9375, "step": 4345 }, { "epoch": 0.25952466260599544, "grad_norm": 6.509593486785889, "learning_rate": 8.228385641297858e-06, "loss": 0.8979, "step": 4346 }, { "epoch": 0.2595843783590111, "grad_norm": 3.5833358764648438, "learning_rate": 8.227722115320816e-06, "loss": 0.9172, "step": 4347 }, { "epoch": 0.25964409411202677, "grad_norm": 2.54032039642334, "learning_rate": 8.227058589343774e-06, "loss": 0.898, "step": 4348 }, { "epoch": 0.2597038098650424, "grad_norm": 1.8172335624694824, "learning_rate": 8.22639506336673e-06, "loss": 0.8581, "step": 4349 }, { "epoch": 0.25976352561805804, "grad_norm": 1.8851381540298462, "learning_rate": 8.225731537389689e-06, "loss": 0.8883, "step": 4350 }, { "epoch": 0.2598232413710737, "grad_norm": 5.738800048828125, "learning_rate": 8.225068011412649e-06, "loss": 0.9308, "step": 4351 }, { "epoch": 0.2598829571240893, "grad_norm": 2.726621150970459, "learning_rate": 8.224404485435605e-06, "loss": 0.9033, "step": 4352 }, { "epoch": 0.259942672877105, "grad_norm": 2.8865363597869873, "learning_rate": 8.223740959458563e-06, "loss": 0.8997, "step": 4353 }, { "epoch": 0.26000238863012065, "grad_norm": 1.9298659563064575, "learning_rate": 8.223077433481521e-06, "loss": 0.9023, "step": 4354 }, { "epoch": 0.26006210438313626, "grad_norm": 1.9298897981643677, "learning_rate": 8.22241390750448e-06, "loss": 0.89, "step": 4355 }, { "epoch": 0.2601218201361519, "grad_norm": 1.980963110923767, "learning_rate": 8.221750381527437e-06, "loss": 0.889, "step": 4356 }, { "epoch": 0.26018153588916754, "grad_norm": 2.859187602996826, "learning_rate": 8.221086855550395e-06, "loss": 0.9144, "step": 4357 }, { "epoch": 0.2602412516421832, "grad_norm": 2.468208074569702, "learning_rate": 8.220423329573354e-06, "loss": 0.9306, "step": 4358 }, { "epoch": 0.26030096739519887, "grad_norm": 2.950806140899658, "learning_rate": 8.219759803596312e-06, "loss": 0.8861, "step": 4359 }, { "epoch": 0.2603606831482145, "grad_norm": 2.2195043563842773, "learning_rate": 8.21909627761927e-06, "loss": 0.8763, "step": 4360 }, { "epoch": 0.26042039890123014, "grad_norm": 3.4167144298553467, "learning_rate": 8.218432751642228e-06, "loss": 0.905, "step": 4361 }, { "epoch": 0.2604801146542458, "grad_norm": 3.6890757083892822, "learning_rate": 8.217769225665186e-06, "loss": 0.9146, "step": 4362 }, { "epoch": 0.2605398304072614, "grad_norm": 4.269090175628662, "learning_rate": 8.217105699688144e-06, "loss": 0.9054, "step": 4363 }, { "epoch": 0.2605995461602771, "grad_norm": 1.8751542568206787, "learning_rate": 8.2164421737111e-06, "loss": 0.9121, "step": 4364 }, { "epoch": 0.26065926191329275, "grad_norm": 3.6178367137908936, "learning_rate": 8.21577864773406e-06, "loss": 0.9167, "step": 4365 }, { "epoch": 0.26071897766630836, "grad_norm": 3.493138074874878, "learning_rate": 8.215115121757018e-06, "loss": 0.8939, "step": 4366 }, { "epoch": 0.260778693419324, "grad_norm": 2.4832839965820312, "learning_rate": 8.214451595779975e-06, "loss": 0.8969, "step": 4367 }, { "epoch": 0.26083840917233964, "grad_norm": 2.2804768085479736, "learning_rate": 8.213788069802935e-06, "loss": 0.8993, "step": 4368 }, { "epoch": 0.2608981249253553, "grad_norm": 3.0656657218933105, "learning_rate": 8.213124543825891e-06, "loss": 0.9184, "step": 4369 }, { "epoch": 0.26095784067837097, "grad_norm": 2.8661065101623535, "learning_rate": 8.212461017848849e-06, "loss": 0.9057, "step": 4370 }, { "epoch": 0.2610175564313866, "grad_norm": 2.495753765106201, "learning_rate": 8.211797491871807e-06, "loss": 0.918, "step": 4371 }, { "epoch": 0.26107727218440224, "grad_norm": 2.211878538131714, "learning_rate": 8.211133965894765e-06, "loss": 0.9437, "step": 4372 }, { "epoch": 0.2611369879374179, "grad_norm": 2.0049588680267334, "learning_rate": 8.210470439917723e-06, "loss": 0.9314, "step": 4373 }, { "epoch": 0.2611967036904335, "grad_norm": 1.7952589988708496, "learning_rate": 8.209806913940681e-06, "loss": 0.9288, "step": 4374 }, { "epoch": 0.2612564194434492, "grad_norm": 2.5758087635040283, "learning_rate": 8.20914338796364e-06, "loss": 0.9072, "step": 4375 }, { "epoch": 0.26131613519646485, "grad_norm": 4.010349273681641, "learning_rate": 8.208479861986598e-06, "loss": 0.897, "step": 4376 }, { "epoch": 0.26137585094948046, "grad_norm": 2.376558303833008, "learning_rate": 8.207816336009556e-06, "loss": 0.8859, "step": 4377 }, { "epoch": 0.2614355667024961, "grad_norm": 4.599728584289551, "learning_rate": 8.207152810032514e-06, "loss": 0.9359, "step": 4378 }, { "epoch": 0.2614952824555118, "grad_norm": 1.9911184310913086, "learning_rate": 8.20648928405547e-06, "loss": 0.9237, "step": 4379 }, { "epoch": 0.2615549982085274, "grad_norm": 3.4272143840789795, "learning_rate": 8.20582575807843e-06, "loss": 0.9221, "step": 4380 }, { "epoch": 0.26161471396154307, "grad_norm": 2.3472347259521484, "learning_rate": 8.205162232101388e-06, "loss": 0.8985, "step": 4381 }, { "epoch": 0.2616744297145587, "grad_norm": 2.135154962539673, "learning_rate": 8.204498706124345e-06, "loss": 0.9257, "step": 4382 }, { "epoch": 0.26173414546757434, "grad_norm": 4.241727828979492, "learning_rate": 8.203835180147304e-06, "loss": 0.9555, "step": 4383 }, { "epoch": 0.26179386122059, "grad_norm": 2.3181984424591064, "learning_rate": 8.20317165417026e-06, "loss": 0.9353, "step": 4384 }, { "epoch": 0.2618535769736056, "grad_norm": 2.5891644954681396, "learning_rate": 8.202508128193219e-06, "loss": 0.9218, "step": 4385 }, { "epoch": 0.2619132927266213, "grad_norm": 2.3055875301361084, "learning_rate": 8.201844602216179e-06, "loss": 0.9351, "step": 4386 }, { "epoch": 0.26197300847963695, "grad_norm": 2.100518226623535, "learning_rate": 8.201181076239135e-06, "loss": 0.9004, "step": 4387 }, { "epoch": 0.26203272423265256, "grad_norm": 2.3288381099700928, "learning_rate": 8.200517550262093e-06, "loss": 0.8868, "step": 4388 }, { "epoch": 0.2620924399856682, "grad_norm": 3.3097920417785645, "learning_rate": 8.199854024285051e-06, "loss": 0.9213, "step": 4389 }, { "epoch": 0.2621521557386839, "grad_norm": 2.7522099018096924, "learning_rate": 8.19919049830801e-06, "loss": 0.9062, "step": 4390 }, { "epoch": 0.2622118714916995, "grad_norm": 1.9129583835601807, "learning_rate": 8.198526972330967e-06, "loss": 0.9153, "step": 4391 }, { "epoch": 0.26227158724471517, "grad_norm": 1.5655356645584106, "learning_rate": 8.197863446353926e-06, "loss": 0.916, "step": 4392 }, { "epoch": 0.2623313029977308, "grad_norm": 1.9057575464248657, "learning_rate": 8.197199920376884e-06, "loss": 0.8827, "step": 4393 }, { "epoch": 0.26239101875074644, "grad_norm": 2.836535930633545, "learning_rate": 8.196536394399842e-06, "loss": 0.8712, "step": 4394 }, { "epoch": 0.2624507345037621, "grad_norm": 2.6624248027801514, "learning_rate": 8.1958728684228e-06, "loss": 0.9098, "step": 4395 }, { "epoch": 0.2625104502567777, "grad_norm": 2.4343793392181396, "learning_rate": 8.195209342445758e-06, "loss": 0.8691, "step": 4396 }, { "epoch": 0.2625701660097934, "grad_norm": 4.795013904571533, "learning_rate": 8.194545816468716e-06, "loss": 0.87, "step": 4397 }, { "epoch": 0.26262988176280905, "grad_norm": 2.5986387729644775, "learning_rate": 8.193882290491674e-06, "loss": 0.8805, "step": 4398 }, { "epoch": 0.26268959751582466, "grad_norm": 2.6790850162506104, "learning_rate": 8.19321876451463e-06, "loss": 0.899, "step": 4399 }, { "epoch": 0.2627493132688403, "grad_norm": 2.209798574447632, "learning_rate": 8.192555238537589e-06, "loss": 0.9039, "step": 4400 }, { "epoch": 0.2627493132688403, "eval_text_loss": 0.9451122283935547, "eval_text_runtime": 15.2036, "eval_text_samples_per_second": 263.095, "eval_text_steps_per_second": 0.526, "step": 4400 }, { "epoch": 0.2627493132688403, "eval_image_loss": 0.6715342998504639, "eval_image_runtime": 5.1349, "eval_image_samples_per_second": 778.989, "eval_image_steps_per_second": 1.558, "step": 4400 }, { "epoch": 0.2627493132688403, "eval_video_loss": 1.1333868503570557, "eval_video_runtime": 76.2248, "eval_video_samples_per_second": 52.476, "eval_video_steps_per_second": 0.105, "step": 4400 }, { "epoch": 0.262809029021856, "grad_norm": 1.7843208312988281, "learning_rate": 8.191891712560548e-06, "loss": 0.888, "step": 4401 }, { "epoch": 0.2628687447748716, "grad_norm": 2.4959702491760254, "learning_rate": 8.191228186583505e-06, "loss": 0.9483, "step": 4402 }, { "epoch": 0.26292846052788726, "grad_norm": 2.2633399963378906, "learning_rate": 8.190564660606463e-06, "loss": 0.8914, "step": 4403 }, { "epoch": 0.2629881762809029, "grad_norm": 1.8066257238388062, "learning_rate": 8.189901134629421e-06, "loss": 0.888, "step": 4404 }, { "epoch": 0.26304789203391854, "grad_norm": 1.8822804689407349, "learning_rate": 8.18923760865238e-06, "loss": 0.9144, "step": 4405 }, { "epoch": 0.2631076077869342, "grad_norm": 2.450839042663574, "learning_rate": 8.188574082675337e-06, "loss": 0.8665, "step": 4406 }, { "epoch": 0.2631673235399498, "grad_norm": 1.84932541847229, "learning_rate": 8.187910556698295e-06, "loss": 0.9089, "step": 4407 }, { "epoch": 0.2632270392929655, "grad_norm": 3.190096616744995, "learning_rate": 8.187247030721253e-06, "loss": 0.9636, "step": 4408 }, { "epoch": 0.26328675504598115, "grad_norm": 4.991544246673584, "learning_rate": 8.186583504744212e-06, "loss": 0.9155, "step": 4409 }, { "epoch": 0.26334647079899676, "grad_norm": 2.5854289531707764, "learning_rate": 8.18591997876717e-06, "loss": 0.911, "step": 4410 }, { "epoch": 0.2634061865520124, "grad_norm": 2.2349307537078857, "learning_rate": 8.185256452790128e-06, "loss": 0.9189, "step": 4411 }, { "epoch": 0.2634659023050281, "grad_norm": 2.163147211074829, "learning_rate": 8.184592926813086e-06, "loss": 0.8711, "step": 4412 }, { "epoch": 0.2635256180580437, "grad_norm": 2.029092788696289, "learning_rate": 8.183929400836044e-06, "loss": 0.9232, "step": 4413 }, { "epoch": 0.26358533381105936, "grad_norm": 20.27703285217285, "learning_rate": 8.183265874859e-06, "loss": 0.9172, "step": 4414 }, { "epoch": 0.26364504956407503, "grad_norm": 2.2587437629699707, "learning_rate": 8.18260234888196e-06, "loss": 0.8954, "step": 4415 }, { "epoch": 0.26370476531709064, "grad_norm": 1.9874335527420044, "learning_rate": 8.181938822904918e-06, "loss": 0.9198, "step": 4416 }, { "epoch": 0.2637644810701063, "grad_norm": 2.695324182510376, "learning_rate": 8.181275296927875e-06, "loss": 0.8921, "step": 4417 }, { "epoch": 0.2638241968231219, "grad_norm": 3.3464365005493164, "learning_rate": 8.180611770950835e-06, "loss": 0.8894, "step": 4418 }, { "epoch": 0.2638839125761376, "grad_norm": 2.1837964057922363, "learning_rate": 8.179948244973791e-06, "loss": 0.9023, "step": 4419 }, { "epoch": 0.26394362832915325, "grad_norm": 2.287747859954834, "learning_rate": 8.179284718996749e-06, "loss": 0.917, "step": 4420 }, { "epoch": 0.26400334408216886, "grad_norm": 2.358794927597046, "learning_rate": 8.178621193019707e-06, "loss": 0.8546, "step": 4421 }, { "epoch": 0.2640630598351845, "grad_norm": 2.263568878173828, "learning_rate": 8.177957667042665e-06, "loss": 0.8689, "step": 4422 }, { "epoch": 0.2641227755882002, "grad_norm": 2.668675184249878, "learning_rate": 8.177294141065623e-06, "loss": 0.8855, "step": 4423 }, { "epoch": 0.2641824913412158, "grad_norm": 2.0305986404418945, "learning_rate": 8.176630615088581e-06, "loss": 0.8969, "step": 4424 }, { "epoch": 0.26424220709423146, "grad_norm": 2.581648826599121, "learning_rate": 8.17596708911154e-06, "loss": 0.8872, "step": 4425 }, { "epoch": 0.26430192284724713, "grad_norm": 2.3862204551696777, "learning_rate": 8.175303563134498e-06, "loss": 0.8962, "step": 4426 }, { "epoch": 0.26436163860026274, "grad_norm": 2.7726335525512695, "learning_rate": 8.174640037157456e-06, "loss": 0.9051, "step": 4427 }, { "epoch": 0.2644213543532784, "grad_norm": 3.278524398803711, "learning_rate": 8.173976511180414e-06, "loss": 0.9208, "step": 4428 }, { "epoch": 0.264481070106294, "grad_norm": 2.0539023876190186, "learning_rate": 8.17331298520337e-06, "loss": 0.9006, "step": 4429 }, { "epoch": 0.2645407858593097, "grad_norm": 1.6543467044830322, "learning_rate": 8.17264945922633e-06, "loss": 0.885, "step": 4430 }, { "epoch": 0.26460050161232535, "grad_norm": 3.823598861694336, "learning_rate": 8.171985933249288e-06, "loss": 0.9039, "step": 4431 }, { "epoch": 0.26466021736534096, "grad_norm": 2.440135955810547, "learning_rate": 8.171322407272245e-06, "loss": 0.9019, "step": 4432 }, { "epoch": 0.2647199331183566, "grad_norm": 2.207033634185791, "learning_rate": 8.170658881295204e-06, "loss": 0.8791, "step": 4433 }, { "epoch": 0.2647796488713723, "grad_norm": 2.8333189487457275, "learning_rate": 8.16999535531816e-06, "loss": 0.8834, "step": 4434 }, { "epoch": 0.2648393646243879, "grad_norm": 2.0160672664642334, "learning_rate": 8.169331829341119e-06, "loss": 0.8984, "step": 4435 }, { "epoch": 0.26489908037740356, "grad_norm": 1.8311214447021484, "learning_rate": 8.168668303364079e-06, "loss": 0.9224, "step": 4436 }, { "epoch": 0.2649587961304192, "grad_norm": 2.5617401599884033, "learning_rate": 8.168004777387035e-06, "loss": 0.9003, "step": 4437 }, { "epoch": 0.26501851188343484, "grad_norm": 2.2476511001586914, "learning_rate": 8.167341251409993e-06, "loss": 0.9284, "step": 4438 }, { "epoch": 0.2650782276364505, "grad_norm": 2.022301197052002, "learning_rate": 8.166677725432951e-06, "loss": 0.8451, "step": 4439 }, { "epoch": 0.26513794338946617, "grad_norm": 1.9702403545379639, "learning_rate": 8.16601419945591e-06, "loss": 0.9197, "step": 4440 }, { "epoch": 0.2651976591424818, "grad_norm": 1.7926703691482544, "learning_rate": 8.165350673478867e-06, "loss": 0.8664, "step": 4441 }, { "epoch": 0.26525737489549744, "grad_norm": 4.181570529937744, "learning_rate": 8.164687147501826e-06, "loss": 0.9047, "step": 4442 }, { "epoch": 0.26531709064851305, "grad_norm": 2.4265244007110596, "learning_rate": 8.164023621524784e-06, "loss": 0.8897, "step": 4443 }, { "epoch": 0.2653768064015287, "grad_norm": 2.443889856338501, "learning_rate": 8.163360095547742e-06, "loss": 0.8912, "step": 4444 }, { "epoch": 0.2654365221545444, "grad_norm": 2.386963129043579, "learning_rate": 8.1626965695707e-06, "loss": 0.9025, "step": 4445 }, { "epoch": 0.26549623790756, "grad_norm": 3.4161596298217773, "learning_rate": 8.162033043593658e-06, "loss": 0.8997, "step": 4446 }, { "epoch": 0.26555595366057566, "grad_norm": 2.208101511001587, "learning_rate": 8.161369517616616e-06, "loss": 0.9034, "step": 4447 }, { "epoch": 0.2656156694135913, "grad_norm": 2.811354875564575, "learning_rate": 8.160705991639574e-06, "loss": 0.9496, "step": 4448 }, { "epoch": 0.26567538516660694, "grad_norm": 3.2000091075897217, "learning_rate": 8.16004246566253e-06, "loss": 0.9315, "step": 4449 }, { "epoch": 0.2657351009196226, "grad_norm": 2.639143228530884, "learning_rate": 8.159378939685489e-06, "loss": 0.9264, "step": 4450 }, { "epoch": 0.26579481667263827, "grad_norm": 2.45363712310791, "learning_rate": 8.158715413708448e-06, "loss": 0.9004, "step": 4451 }, { "epoch": 0.2658545324256539, "grad_norm": 2.989307165145874, "learning_rate": 8.158051887731405e-06, "loss": 0.8955, "step": 4452 }, { "epoch": 0.26591424817866954, "grad_norm": 2.3933727741241455, "learning_rate": 8.157388361754363e-06, "loss": 0.9182, "step": 4453 }, { "epoch": 0.26597396393168515, "grad_norm": 3.212299346923828, "learning_rate": 8.156724835777321e-06, "loss": 0.8804, "step": 4454 }, { "epoch": 0.2660336796847008, "grad_norm": 2.541752338409424, "learning_rate": 8.156061309800279e-06, "loss": 0.8622, "step": 4455 }, { "epoch": 0.2660933954377165, "grad_norm": 2.53013277053833, "learning_rate": 8.155397783823237e-06, "loss": 0.9127, "step": 4456 }, { "epoch": 0.2661531111907321, "grad_norm": 1.9348742961883545, "learning_rate": 8.154734257846195e-06, "loss": 0.8419, "step": 4457 }, { "epoch": 0.26621282694374776, "grad_norm": 2.3017938137054443, "learning_rate": 8.154070731869153e-06, "loss": 0.9247, "step": 4458 }, { "epoch": 0.2662725426967634, "grad_norm": 10.782515525817871, "learning_rate": 8.153407205892112e-06, "loss": 0.9212, "step": 4459 }, { "epoch": 0.26633225844977904, "grad_norm": 4.498027324676514, "learning_rate": 8.15274367991507e-06, "loss": 0.9267, "step": 4460 }, { "epoch": 0.2663919742027947, "grad_norm": 3.5287230014801025, "learning_rate": 8.152080153938028e-06, "loss": 0.8898, "step": 4461 }, { "epoch": 0.26645168995581037, "grad_norm": 3.563772201538086, "learning_rate": 8.151416627960986e-06, "loss": 0.9108, "step": 4462 }, { "epoch": 0.266511405708826, "grad_norm": 2.588261604309082, "learning_rate": 8.150753101983944e-06, "loss": 0.8929, "step": 4463 }, { "epoch": 0.26657112146184164, "grad_norm": 2.5801873207092285, "learning_rate": 8.1500895760069e-06, "loss": 0.8788, "step": 4464 }, { "epoch": 0.26663083721485725, "grad_norm": 1.6852188110351562, "learning_rate": 8.14942605002986e-06, "loss": 0.8661, "step": 4465 }, { "epoch": 0.2666905529678729, "grad_norm": 1.807667851448059, "learning_rate": 8.148762524052818e-06, "loss": 0.8778, "step": 4466 }, { "epoch": 0.2667502687208886, "grad_norm": 2.493995428085327, "learning_rate": 8.148098998075775e-06, "loss": 0.9087, "step": 4467 }, { "epoch": 0.2668099844739042, "grad_norm": 2.040290355682373, "learning_rate": 8.147435472098734e-06, "loss": 0.8628, "step": 4468 }, { "epoch": 0.26686970022691986, "grad_norm": 2.9483907222747803, "learning_rate": 8.146771946121691e-06, "loss": 0.963, "step": 4469 }, { "epoch": 0.2669294159799355, "grad_norm": 2.7419931888580322, "learning_rate": 8.146108420144649e-06, "loss": 0.8897, "step": 4470 }, { "epoch": 0.26698913173295113, "grad_norm": 7.200314044952393, "learning_rate": 8.145444894167607e-06, "loss": 0.9291, "step": 4471 }, { "epoch": 0.2670488474859668, "grad_norm": 2.046085834503174, "learning_rate": 8.144781368190565e-06, "loss": 0.9179, "step": 4472 }, { "epoch": 0.26710856323898247, "grad_norm": 5.644901275634766, "learning_rate": 8.144117842213523e-06, "loss": 0.8944, "step": 4473 }, { "epoch": 0.2671682789919981, "grad_norm": 2.9931538105010986, "learning_rate": 8.143454316236481e-06, "loss": 0.9357, "step": 4474 }, { "epoch": 0.26722799474501374, "grad_norm": 2.4633662700653076, "learning_rate": 8.14279079025944e-06, "loss": 0.9228, "step": 4475 }, { "epoch": 0.2672877104980294, "grad_norm": 4.394349575042725, "learning_rate": 8.142127264282398e-06, "loss": 0.8894, "step": 4476 }, { "epoch": 0.267347426251045, "grad_norm": 2.5150694847106934, "learning_rate": 8.141463738305356e-06, "loss": 0.934, "step": 4477 }, { "epoch": 0.2674071420040607, "grad_norm": 1.8489034175872803, "learning_rate": 8.140800212328314e-06, "loss": 0.904, "step": 4478 }, { "epoch": 0.2674668577570763, "grad_norm": 3.098386287689209, "learning_rate": 8.14013668635127e-06, "loss": 0.8893, "step": 4479 }, { "epoch": 0.26752657351009196, "grad_norm": 3.476959228515625, "learning_rate": 8.13947316037423e-06, "loss": 0.9105, "step": 4480 }, { "epoch": 0.2675862892631076, "grad_norm": 6.478596210479736, "learning_rate": 8.138809634397188e-06, "loss": 0.8885, "step": 4481 }, { "epoch": 0.26764600501612323, "grad_norm": 2.0623207092285156, "learning_rate": 8.138146108420144e-06, "loss": 0.901, "step": 4482 }, { "epoch": 0.2677057207691389, "grad_norm": 2.4515633583068848, "learning_rate": 8.137482582443104e-06, "loss": 0.9389, "step": 4483 }, { "epoch": 0.26776543652215457, "grad_norm": 2.1156795024871826, "learning_rate": 8.13681905646606e-06, "loss": 0.8787, "step": 4484 }, { "epoch": 0.2678251522751702, "grad_norm": 2.075253963470459, "learning_rate": 8.136155530489019e-06, "loss": 0.9265, "step": 4485 }, { "epoch": 0.26788486802818584, "grad_norm": 2.975590705871582, "learning_rate": 8.135492004511979e-06, "loss": 0.8921, "step": 4486 }, { "epoch": 0.2679445837812015, "grad_norm": 2.816743850708008, "learning_rate": 8.134828478534935e-06, "loss": 0.906, "step": 4487 }, { "epoch": 0.2680042995342171, "grad_norm": 2.5242457389831543, "learning_rate": 8.134164952557893e-06, "loss": 0.9377, "step": 4488 }, { "epoch": 0.2680640152872328, "grad_norm": 2.150359630584717, "learning_rate": 8.133501426580851e-06, "loss": 0.9107, "step": 4489 }, { "epoch": 0.2681237310402484, "grad_norm": 1.6875554323196411, "learning_rate": 8.13283790060381e-06, "loss": 0.8988, "step": 4490 }, { "epoch": 0.26818344679326406, "grad_norm": 2.9895060062408447, "learning_rate": 8.132174374626767e-06, "loss": 0.9251, "step": 4491 }, { "epoch": 0.2682431625462797, "grad_norm": 2.7103965282440186, "learning_rate": 8.131510848649725e-06, "loss": 0.92, "step": 4492 }, { "epoch": 0.26830287829929533, "grad_norm": 4.237582683563232, "learning_rate": 8.130847322672684e-06, "loss": 0.9162, "step": 4493 }, { "epoch": 0.268362594052311, "grad_norm": 2.0909199714660645, "learning_rate": 8.130183796695642e-06, "loss": 0.8906, "step": 4494 }, { "epoch": 0.26842230980532666, "grad_norm": 2.7353122234344482, "learning_rate": 8.1295202707186e-06, "loss": 0.8822, "step": 4495 }, { "epoch": 0.2684820255583423, "grad_norm": 2.8246898651123047, "learning_rate": 8.128856744741558e-06, "loss": 0.9449, "step": 4496 }, { "epoch": 0.26854174131135794, "grad_norm": 2.2301957607269287, "learning_rate": 8.128193218764516e-06, "loss": 0.8856, "step": 4497 }, { "epoch": 0.2686014570643736, "grad_norm": 2.7943825721740723, "learning_rate": 8.127529692787474e-06, "loss": 0.9185, "step": 4498 }, { "epoch": 0.2686611728173892, "grad_norm": 3.526201009750366, "learning_rate": 8.12686616681043e-06, "loss": 0.8559, "step": 4499 }, { "epoch": 0.2687208885704049, "grad_norm": 3.359562397003174, "learning_rate": 8.126202640833389e-06, "loss": 0.9166, "step": 4500 }, { "epoch": 0.2687806043234205, "grad_norm": 2.766885995864868, "learning_rate": 8.125539114856348e-06, "loss": 0.944, "step": 4501 }, { "epoch": 0.26884032007643616, "grad_norm": 3.3401525020599365, "learning_rate": 8.124875588879305e-06, "loss": 0.9309, "step": 4502 }, { "epoch": 0.2689000358294518, "grad_norm": 2.449660062789917, "learning_rate": 8.124212062902263e-06, "loss": 0.8933, "step": 4503 }, { "epoch": 0.26895975158246743, "grad_norm": 1.7106664180755615, "learning_rate": 8.123548536925221e-06, "loss": 0.9023, "step": 4504 }, { "epoch": 0.2690194673354831, "grad_norm": 3.1922192573547363, "learning_rate": 8.122885010948179e-06, "loss": 0.883, "step": 4505 }, { "epoch": 0.26907918308849876, "grad_norm": 2.063546895980835, "learning_rate": 8.122221484971137e-06, "loss": 0.8963, "step": 4506 }, { "epoch": 0.2691388988415144, "grad_norm": 5.292263031005859, "learning_rate": 8.121557958994095e-06, "loss": 0.9472, "step": 4507 }, { "epoch": 0.26919861459453004, "grad_norm": 2.936821460723877, "learning_rate": 8.120894433017053e-06, "loss": 0.9088, "step": 4508 }, { "epoch": 0.2692583303475457, "grad_norm": 3.2506160736083984, "learning_rate": 8.120230907040011e-06, "loss": 0.9075, "step": 4509 }, { "epoch": 0.2693180461005613, "grad_norm": 2.8060169219970703, "learning_rate": 8.11956738106297e-06, "loss": 0.9032, "step": 4510 }, { "epoch": 0.269377761853577, "grad_norm": 6.476625442504883, "learning_rate": 8.118903855085928e-06, "loss": 0.8885, "step": 4511 }, { "epoch": 0.26943747760659265, "grad_norm": 2.322957992553711, "learning_rate": 8.118240329108886e-06, "loss": 0.887, "step": 4512 }, { "epoch": 0.26949719335960826, "grad_norm": 3.258241653442383, "learning_rate": 8.117576803131844e-06, "loss": 0.9396, "step": 4513 }, { "epoch": 0.2695569091126239, "grad_norm": 3.1143910884857178, "learning_rate": 8.1169132771548e-06, "loss": 0.8875, "step": 4514 }, { "epoch": 0.26961662486563953, "grad_norm": 2.0489542484283447, "learning_rate": 8.11624975117776e-06, "loss": 0.8808, "step": 4515 }, { "epoch": 0.2696763406186552, "grad_norm": 2.387582778930664, "learning_rate": 8.115586225200718e-06, "loss": 0.8805, "step": 4516 }, { "epoch": 0.26973605637167086, "grad_norm": 2.8931381702423096, "learning_rate": 8.114922699223675e-06, "loss": 0.8703, "step": 4517 }, { "epoch": 0.2697957721246865, "grad_norm": 1.9661269187927246, "learning_rate": 8.114259173246634e-06, "loss": 0.915, "step": 4518 }, { "epoch": 0.26985548787770214, "grad_norm": 5.779547691345215, "learning_rate": 8.11359564726959e-06, "loss": 0.8886, "step": 4519 }, { "epoch": 0.2699152036307178, "grad_norm": 2.1924216747283936, "learning_rate": 8.112932121292549e-06, "loss": 0.8837, "step": 4520 }, { "epoch": 0.2699749193837334, "grad_norm": 2.299848794937134, "learning_rate": 8.112268595315507e-06, "loss": 0.9195, "step": 4521 }, { "epoch": 0.2700346351367491, "grad_norm": 4.721573352813721, "learning_rate": 8.111605069338465e-06, "loss": 0.9183, "step": 4522 }, { "epoch": 0.27009435088976474, "grad_norm": 2.280353307723999, "learning_rate": 8.110941543361423e-06, "loss": 0.8959, "step": 4523 }, { "epoch": 0.27015406664278035, "grad_norm": 2.304316282272339, "learning_rate": 8.110278017384381e-06, "loss": 0.906, "step": 4524 }, { "epoch": 0.270213782395796, "grad_norm": 2.5679707527160645, "learning_rate": 8.10961449140734e-06, "loss": 0.927, "step": 4525 }, { "epoch": 0.27027349814881163, "grad_norm": 1.8994637727737427, "learning_rate": 8.108950965430297e-06, "loss": 0.917, "step": 4526 }, { "epoch": 0.2703332139018273, "grad_norm": 1.8741605281829834, "learning_rate": 8.108287439453256e-06, "loss": 0.8565, "step": 4527 }, { "epoch": 0.27039292965484296, "grad_norm": 1.6982924938201904, "learning_rate": 8.107623913476214e-06, "loss": 0.9299, "step": 4528 }, { "epoch": 0.27045264540785857, "grad_norm": 2.4635508060455322, "learning_rate": 8.10696038749917e-06, "loss": 0.9059, "step": 4529 }, { "epoch": 0.27051236116087424, "grad_norm": 1.749000072479248, "learning_rate": 8.10629686152213e-06, "loss": 0.8771, "step": 4530 }, { "epoch": 0.2705720769138899, "grad_norm": 2.5741405487060547, "learning_rate": 8.105633335545088e-06, "loss": 0.8651, "step": 4531 }, { "epoch": 0.2706317926669055, "grad_norm": 2.696431875228882, "learning_rate": 8.104969809568044e-06, "loss": 0.8882, "step": 4532 }, { "epoch": 0.2706915084199212, "grad_norm": 1.611048936843872, "learning_rate": 8.104306283591004e-06, "loss": 0.8836, "step": 4533 }, { "epoch": 0.27075122417293684, "grad_norm": 1.8103529214859009, "learning_rate": 8.10364275761396e-06, "loss": 0.8877, "step": 4534 }, { "epoch": 0.27081093992595245, "grad_norm": 2.222372055053711, "learning_rate": 8.102979231636919e-06, "loss": 0.8822, "step": 4535 }, { "epoch": 0.2708706556789681, "grad_norm": 1.8862295150756836, "learning_rate": 8.102315705659879e-06, "loss": 0.8967, "step": 4536 }, { "epoch": 0.27093037143198373, "grad_norm": 2.344097852706909, "learning_rate": 8.101652179682835e-06, "loss": 0.909, "step": 4537 }, { "epoch": 0.2709900871849994, "grad_norm": 2.2529070377349854, "learning_rate": 8.100988653705793e-06, "loss": 0.9128, "step": 4538 }, { "epoch": 0.27104980293801506, "grad_norm": 1.6982474327087402, "learning_rate": 8.100325127728751e-06, "loss": 0.8442, "step": 4539 }, { "epoch": 0.27110951869103067, "grad_norm": 3.474489688873291, "learning_rate": 8.09966160175171e-06, "loss": 0.9189, "step": 4540 }, { "epoch": 0.27116923444404634, "grad_norm": 2.166074514389038, "learning_rate": 8.098998075774667e-06, "loss": 0.8959, "step": 4541 }, { "epoch": 0.271228950197062, "grad_norm": 2.6794044971466064, "learning_rate": 8.098334549797625e-06, "loss": 0.9359, "step": 4542 }, { "epoch": 0.2712886659500776, "grad_norm": 7.2439680099487305, "learning_rate": 8.097671023820584e-06, "loss": 0.9352, "step": 4543 }, { "epoch": 0.2713483817030933, "grad_norm": 2.0623977184295654, "learning_rate": 8.097007497843542e-06, "loss": 0.91, "step": 4544 }, { "epoch": 0.27140809745610894, "grad_norm": 1.6776299476623535, "learning_rate": 8.0963439718665e-06, "loss": 0.9018, "step": 4545 }, { "epoch": 0.27146781320912455, "grad_norm": 2.72235107421875, "learning_rate": 8.095680445889458e-06, "loss": 0.9109, "step": 4546 }, { "epoch": 0.2715275289621402, "grad_norm": 1.9897739887237549, "learning_rate": 8.095016919912416e-06, "loss": 0.9313, "step": 4547 }, { "epoch": 0.2715872447151559, "grad_norm": 6.22433614730835, "learning_rate": 8.094353393935374e-06, "loss": 0.919, "step": 4548 }, { "epoch": 0.2716469604681715, "grad_norm": 3.301682949066162, "learning_rate": 8.09368986795833e-06, "loss": 0.9051, "step": 4549 }, { "epoch": 0.27170667622118716, "grad_norm": 2.2295796871185303, "learning_rate": 8.093026341981289e-06, "loss": 0.8793, "step": 4550 }, { "epoch": 0.27176639197420277, "grad_norm": 2.6645731925964355, "learning_rate": 8.092362816004248e-06, "loss": 0.9331, "step": 4551 }, { "epoch": 0.27182610772721844, "grad_norm": 1.8789016008377075, "learning_rate": 8.091699290027205e-06, "loss": 0.8914, "step": 4552 }, { "epoch": 0.2718858234802341, "grad_norm": 2.0655710697174072, "learning_rate": 8.091035764050163e-06, "loss": 0.8989, "step": 4553 }, { "epoch": 0.2719455392332497, "grad_norm": 3.829923629760742, "learning_rate": 8.090372238073121e-06, "loss": 0.9168, "step": 4554 }, { "epoch": 0.2720052549862654, "grad_norm": 2.0692429542541504, "learning_rate": 8.089708712096079e-06, "loss": 0.8965, "step": 4555 }, { "epoch": 0.27206497073928104, "grad_norm": 3.341771125793457, "learning_rate": 8.089045186119037e-06, "loss": 0.9164, "step": 4556 }, { "epoch": 0.27212468649229665, "grad_norm": 2.0986835956573486, "learning_rate": 8.088381660141995e-06, "loss": 0.8843, "step": 4557 }, { "epoch": 0.2721844022453123, "grad_norm": 2.0442817211151123, "learning_rate": 8.087718134164953e-06, "loss": 0.9258, "step": 4558 }, { "epoch": 0.272244117998328, "grad_norm": 2.1112234592437744, "learning_rate": 8.087054608187911e-06, "loss": 0.9148, "step": 4559 }, { "epoch": 0.2723038337513436, "grad_norm": 2.058339834213257, "learning_rate": 8.08639108221087e-06, "loss": 0.8868, "step": 4560 }, { "epoch": 0.27236354950435926, "grad_norm": 2.2445008754730225, "learning_rate": 8.085727556233828e-06, "loss": 0.9682, "step": 4561 }, { "epoch": 0.27242326525737487, "grad_norm": 1.8298944234848022, "learning_rate": 8.085064030256786e-06, "loss": 0.9076, "step": 4562 }, { "epoch": 0.27248298101039053, "grad_norm": 5.218899726867676, "learning_rate": 8.084400504279744e-06, "loss": 0.9129, "step": 4563 }, { "epoch": 0.2725426967634062, "grad_norm": 2.4599640369415283, "learning_rate": 8.0837369783027e-06, "loss": 0.9077, "step": 4564 }, { "epoch": 0.2726024125164218, "grad_norm": 2.2480573654174805, "learning_rate": 8.08307345232566e-06, "loss": 0.8534, "step": 4565 }, { "epoch": 0.2726621282694375, "grad_norm": 1.9352741241455078, "learning_rate": 8.082409926348618e-06, "loss": 0.8936, "step": 4566 }, { "epoch": 0.27272184402245314, "grad_norm": 3.019223690032959, "learning_rate": 8.081746400371575e-06, "loss": 0.8788, "step": 4567 }, { "epoch": 0.27278155977546875, "grad_norm": 2.4442222118377686, "learning_rate": 8.081082874394534e-06, "loss": 0.9306, "step": 4568 }, { "epoch": 0.2728412755284844, "grad_norm": 7.601566314697266, "learning_rate": 8.08041934841749e-06, "loss": 0.9526, "step": 4569 }, { "epoch": 0.2729009912815001, "grad_norm": 2.1357240676879883, "learning_rate": 8.079755822440449e-06, "loss": 0.899, "step": 4570 }, { "epoch": 0.2729607070345157, "grad_norm": 2.1751389503479004, "learning_rate": 8.079092296463407e-06, "loss": 0.9047, "step": 4571 }, { "epoch": 0.27302042278753136, "grad_norm": 2.1897873878479004, "learning_rate": 8.078428770486365e-06, "loss": 0.915, "step": 4572 }, { "epoch": 0.273080138540547, "grad_norm": 1.9365856647491455, "learning_rate": 8.077765244509323e-06, "loss": 0.9162, "step": 4573 }, { "epoch": 0.27313985429356263, "grad_norm": 2.622359037399292, "learning_rate": 8.077101718532281e-06, "loss": 0.8636, "step": 4574 }, { "epoch": 0.2731995700465783, "grad_norm": 1.7495332956314087, "learning_rate": 8.07643819255524e-06, "loss": 0.909, "step": 4575 }, { "epoch": 0.2732592857995939, "grad_norm": 3.4774253368377686, "learning_rate": 8.075774666578197e-06, "loss": 0.8969, "step": 4576 }, { "epoch": 0.2733190015526096, "grad_norm": 2.702416181564331, "learning_rate": 8.075111140601156e-06, "loss": 0.8495, "step": 4577 }, { "epoch": 0.27337871730562524, "grad_norm": 1.83803129196167, "learning_rate": 8.074447614624114e-06, "loss": 0.8885, "step": 4578 }, { "epoch": 0.27343843305864085, "grad_norm": 2.309142589569092, "learning_rate": 8.07378408864707e-06, "loss": 0.8854, "step": 4579 }, { "epoch": 0.2734981488116565, "grad_norm": 2.25636625289917, "learning_rate": 8.07312056267003e-06, "loss": 0.9284, "step": 4580 }, { "epoch": 0.2735578645646722, "grad_norm": 2.483506202697754, "learning_rate": 8.072457036692986e-06, "loss": 0.9439, "step": 4581 }, { "epoch": 0.2736175803176878, "grad_norm": 3.457988977432251, "learning_rate": 8.071793510715944e-06, "loss": 0.9116, "step": 4582 }, { "epoch": 0.27367729607070346, "grad_norm": 2.017169713973999, "learning_rate": 8.071129984738904e-06, "loss": 0.8811, "step": 4583 }, { "epoch": 0.2737370118237191, "grad_norm": 2.003732919692993, "learning_rate": 8.07046645876186e-06, "loss": 0.911, "step": 4584 }, { "epoch": 0.27379672757673473, "grad_norm": 2.902580738067627, "learning_rate": 8.069802932784819e-06, "loss": 0.9097, "step": 4585 }, { "epoch": 0.2738564433297504, "grad_norm": 4.261101722717285, "learning_rate": 8.069139406807777e-06, "loss": 0.8697, "step": 4586 }, { "epoch": 0.273916159082766, "grad_norm": 2.406299352645874, "learning_rate": 8.068475880830735e-06, "loss": 0.9301, "step": 4587 }, { "epoch": 0.2739758748357817, "grad_norm": 2.4234204292297363, "learning_rate": 8.067812354853693e-06, "loss": 0.908, "step": 4588 }, { "epoch": 0.27403559058879734, "grad_norm": 1.986472249031067, "learning_rate": 8.067148828876651e-06, "loss": 0.9498, "step": 4589 }, { "epoch": 0.27409530634181295, "grad_norm": 2.5752737522125244, "learning_rate": 8.066485302899609e-06, "loss": 0.893, "step": 4590 }, { "epoch": 0.2741550220948286, "grad_norm": 2.7040929794311523, "learning_rate": 8.065821776922567e-06, "loss": 0.9041, "step": 4591 }, { "epoch": 0.2742147378478443, "grad_norm": 1.9648395776748657, "learning_rate": 8.065158250945525e-06, "loss": 0.9127, "step": 4592 }, { "epoch": 0.2742744536008599, "grad_norm": 2.2260684967041016, "learning_rate": 8.064494724968483e-06, "loss": 0.9271, "step": 4593 }, { "epoch": 0.27433416935387556, "grad_norm": 2.23577618598938, "learning_rate": 8.063831198991442e-06, "loss": 0.9442, "step": 4594 }, { "epoch": 0.2743938851068912, "grad_norm": 3.0521645545959473, "learning_rate": 8.0631676730144e-06, "loss": 0.9195, "step": 4595 }, { "epoch": 0.27445360085990683, "grad_norm": 2.0861165523529053, "learning_rate": 8.062504147037356e-06, "loss": 0.8992, "step": 4596 }, { "epoch": 0.2745133166129225, "grad_norm": 2.110738515853882, "learning_rate": 8.061840621060316e-06, "loss": 0.8789, "step": 4597 }, { "epoch": 0.2745730323659381, "grad_norm": 2.4961748123168945, "learning_rate": 8.061177095083274e-06, "loss": 0.9114, "step": 4598 }, { "epoch": 0.2746327481189538, "grad_norm": 2.91190242767334, "learning_rate": 8.06051356910623e-06, "loss": 0.887, "step": 4599 }, { "epoch": 0.27469246387196944, "grad_norm": 8.102673530578613, "learning_rate": 8.059850043129188e-06, "loss": 0.8945, "step": 4600 }, { "epoch": 0.27469246387196944, "eval_text_loss": 0.9449099898338318, "eval_text_runtime": 15.227, "eval_text_samples_per_second": 262.691, "eval_text_steps_per_second": 0.525, "step": 4600 }, { "epoch": 0.27469246387196944, "eval_image_loss": 0.6668318510055542, "eval_image_runtime": 4.9537, "eval_image_samples_per_second": 807.469, "eval_image_steps_per_second": 1.615, "step": 4600 }, { "epoch": 0.27469246387196944, "eval_video_loss": 1.130873203277588, "eval_video_runtime": 76.3977, "eval_video_samples_per_second": 52.358, "eval_video_steps_per_second": 0.105, "step": 4600 }, { "epoch": 0.27475217962498505, "grad_norm": 2.213564395904541, "learning_rate": 8.059186517152147e-06, "loss": 0.8762, "step": 4601 }, { "epoch": 0.2748118953780007, "grad_norm": 2.002230167388916, "learning_rate": 8.058522991175105e-06, "loss": 0.893, "step": 4602 }, { "epoch": 0.2748716111310164, "grad_norm": 1.978020191192627, "learning_rate": 8.057859465198063e-06, "loss": 0.8776, "step": 4603 }, { "epoch": 0.274931326884032, "grad_norm": 3.1160078048706055, "learning_rate": 8.057195939221021e-06, "loss": 0.881, "step": 4604 }, { "epoch": 0.27499104263704766, "grad_norm": 3.8130788803100586, "learning_rate": 8.056532413243979e-06, "loss": 0.8988, "step": 4605 }, { "epoch": 0.2750507583900633, "grad_norm": 2.6890532970428467, "learning_rate": 8.055868887266937e-06, "loss": 0.9065, "step": 4606 }, { "epoch": 0.27511047414307893, "grad_norm": 2.290210247039795, "learning_rate": 8.055205361289895e-06, "loss": 0.9046, "step": 4607 }, { "epoch": 0.2751701898960946, "grad_norm": 2.627351760864258, "learning_rate": 8.054541835312853e-06, "loss": 0.9466, "step": 4608 }, { "epoch": 0.27522990564911026, "grad_norm": 2.2602508068084717, "learning_rate": 8.053878309335811e-06, "loss": 0.9043, "step": 4609 }, { "epoch": 0.2752896214021259, "grad_norm": 3.584500551223755, "learning_rate": 8.05321478335877e-06, "loss": 0.9007, "step": 4610 }, { "epoch": 0.27534933715514154, "grad_norm": 2.250556230545044, "learning_rate": 8.052551257381726e-06, "loss": 0.888, "step": 4611 }, { "epoch": 0.27540905290815715, "grad_norm": 2.3458423614501953, "learning_rate": 8.051887731404686e-06, "loss": 0.9234, "step": 4612 }, { "epoch": 0.2754687686611728, "grad_norm": 2.1413064002990723, "learning_rate": 8.051224205427644e-06, "loss": 0.9084, "step": 4613 }, { "epoch": 0.2755284844141885, "grad_norm": 1.7777087688446045, "learning_rate": 8.0505606794506e-06, "loss": 0.9269, "step": 4614 }, { "epoch": 0.2755882001672041, "grad_norm": 4.782949447631836, "learning_rate": 8.04989715347356e-06, "loss": 0.9043, "step": 4615 }, { "epoch": 0.27564791592021975, "grad_norm": 2.574976921081543, "learning_rate": 8.049233627496516e-06, "loss": 0.9239, "step": 4616 }, { "epoch": 0.2757076316732354, "grad_norm": 2.0745809078216553, "learning_rate": 8.048570101519474e-06, "loss": 0.8906, "step": 4617 }, { "epoch": 0.27576734742625103, "grad_norm": 2.8034634590148926, "learning_rate": 8.047906575542434e-06, "loss": 0.9287, "step": 4618 }, { "epoch": 0.2758270631792667, "grad_norm": 3.476041793823242, "learning_rate": 8.04724304956539e-06, "loss": 0.9044, "step": 4619 }, { "epoch": 0.27588677893228236, "grad_norm": 4.316910266876221, "learning_rate": 8.046579523588349e-06, "loss": 0.8605, "step": 4620 }, { "epoch": 0.27594649468529797, "grad_norm": 1.81804358959198, "learning_rate": 8.045915997611307e-06, "loss": 0.8974, "step": 4621 }, { "epoch": 0.27600621043831364, "grad_norm": 2.7502853870391846, "learning_rate": 8.045252471634265e-06, "loss": 0.9246, "step": 4622 }, { "epoch": 0.27606592619132925, "grad_norm": 7.119842529296875, "learning_rate": 8.044588945657223e-06, "loss": 0.8509, "step": 4623 }, { "epoch": 0.2761256419443449, "grad_norm": 2.3748271465301514, "learning_rate": 8.043925419680181e-06, "loss": 0.9038, "step": 4624 }, { "epoch": 0.2761853576973606, "grad_norm": 2.347552537918091, "learning_rate": 8.04326189370314e-06, "loss": 0.8982, "step": 4625 }, { "epoch": 0.2762450734503762, "grad_norm": 1.9375697374343872, "learning_rate": 8.042598367726097e-06, "loss": 0.9154, "step": 4626 }, { "epoch": 0.27630478920339185, "grad_norm": 1.9432661533355713, "learning_rate": 8.041934841749055e-06, "loss": 0.9205, "step": 4627 }, { "epoch": 0.2763645049564075, "grad_norm": 2.4602744579315186, "learning_rate": 8.041271315772014e-06, "loss": 0.9641, "step": 4628 }, { "epoch": 0.27642422070942313, "grad_norm": 2.3457865715026855, "learning_rate": 8.04060778979497e-06, "loss": 0.9155, "step": 4629 }, { "epoch": 0.2764839364624388, "grad_norm": 2.743551254272461, "learning_rate": 8.03994426381793e-06, "loss": 0.9057, "step": 4630 }, { "epoch": 0.27654365221545446, "grad_norm": 2.1543362140655518, "learning_rate": 8.039280737840886e-06, "loss": 0.882, "step": 4631 }, { "epoch": 0.27660336796847007, "grad_norm": 2.349463701248169, "learning_rate": 8.038617211863844e-06, "loss": 0.8761, "step": 4632 }, { "epoch": 0.27666308372148574, "grad_norm": 2.5533883571624756, "learning_rate": 8.037953685886804e-06, "loss": 0.9149, "step": 4633 }, { "epoch": 0.27672279947450135, "grad_norm": 3.882657289505005, "learning_rate": 8.03729015990976e-06, "loss": 0.8902, "step": 4634 }, { "epoch": 0.276782515227517, "grad_norm": 2.182331085205078, "learning_rate": 8.036626633932719e-06, "loss": 0.9073, "step": 4635 }, { "epoch": 0.2768422309805327, "grad_norm": 3.017334222793579, "learning_rate": 8.035963107955677e-06, "loss": 0.8827, "step": 4636 }, { "epoch": 0.2769019467335483, "grad_norm": 3.134166955947876, "learning_rate": 8.035299581978635e-06, "loss": 0.8691, "step": 4637 }, { "epoch": 0.27696166248656395, "grad_norm": 1.7596665620803833, "learning_rate": 8.034636056001593e-06, "loss": 0.9045, "step": 4638 }, { "epoch": 0.2770213782395796, "grad_norm": 4.276048183441162, "learning_rate": 8.033972530024551e-06, "loss": 0.9041, "step": 4639 }, { "epoch": 0.27708109399259523, "grad_norm": 3.5842597484588623, "learning_rate": 8.033309004047509e-06, "loss": 0.8318, "step": 4640 }, { "epoch": 0.2771408097456109, "grad_norm": 4.904874324798584, "learning_rate": 8.032645478070467e-06, "loss": 0.9174, "step": 4641 }, { "epoch": 0.27720052549862656, "grad_norm": 1.630623698234558, "learning_rate": 8.031981952093425e-06, "loss": 0.9262, "step": 4642 }, { "epoch": 0.27726024125164217, "grad_norm": 2.002586603164673, "learning_rate": 8.031318426116383e-06, "loss": 0.8629, "step": 4643 }, { "epoch": 0.27731995700465784, "grad_norm": 1.9216437339782715, "learning_rate": 8.030654900139341e-06, "loss": 0.8962, "step": 4644 }, { "epoch": 0.2773796727576735, "grad_norm": 1.8384023904800415, "learning_rate": 8.0299913741623e-06, "loss": 0.86, "step": 4645 }, { "epoch": 0.2774393885106891, "grad_norm": 1.7501521110534668, "learning_rate": 8.029327848185256e-06, "loss": 0.8817, "step": 4646 }, { "epoch": 0.2774991042637048, "grad_norm": 1.7936738729476929, "learning_rate": 8.028664322208216e-06, "loss": 0.9201, "step": 4647 }, { "epoch": 0.2775588200167204, "grad_norm": 2.366459369659424, "learning_rate": 8.028000796231174e-06, "loss": 0.9266, "step": 4648 }, { "epoch": 0.27761853576973605, "grad_norm": 3.070901393890381, "learning_rate": 8.02733727025413e-06, "loss": 0.8833, "step": 4649 }, { "epoch": 0.2776782515227517, "grad_norm": 2.2310924530029297, "learning_rate": 8.026673744277088e-06, "loss": 0.8962, "step": 4650 }, { "epoch": 0.27773796727576733, "grad_norm": 2.3625354766845703, "learning_rate": 8.026010218300047e-06, "loss": 0.9281, "step": 4651 }, { "epoch": 0.277797683028783, "grad_norm": 2.5002636909484863, "learning_rate": 8.025346692323005e-06, "loss": 0.9463, "step": 4652 }, { "epoch": 0.27785739878179866, "grad_norm": 4.183746337890625, "learning_rate": 8.024683166345963e-06, "loss": 0.9286, "step": 4653 }, { "epoch": 0.27791711453481427, "grad_norm": 2.9327099323272705, "learning_rate": 8.02401964036892e-06, "loss": 0.8993, "step": 4654 }, { "epoch": 0.27797683028782993, "grad_norm": 2.959047555923462, "learning_rate": 8.023356114391879e-06, "loss": 0.8805, "step": 4655 }, { "epoch": 0.2780365460408456, "grad_norm": 2.4625861644744873, "learning_rate": 8.022692588414837e-06, "loss": 0.8735, "step": 4656 }, { "epoch": 0.2780962617938612, "grad_norm": 4.211373805999756, "learning_rate": 8.022029062437795e-06, "loss": 0.906, "step": 4657 }, { "epoch": 0.2781559775468769, "grad_norm": 3.1850509643554688, "learning_rate": 8.021365536460753e-06, "loss": 0.9602, "step": 4658 }, { "epoch": 0.2782156932998925, "grad_norm": 1.878467082977295, "learning_rate": 8.020702010483711e-06, "loss": 0.8906, "step": 4659 }, { "epoch": 0.27827540905290815, "grad_norm": 4.571531772613525, "learning_rate": 8.02003848450667e-06, "loss": 0.9031, "step": 4660 }, { "epoch": 0.2783351248059238, "grad_norm": 3.9924819469451904, "learning_rate": 8.019374958529626e-06, "loss": 0.9025, "step": 4661 }, { "epoch": 0.2783948405589394, "grad_norm": 2.5256850719451904, "learning_rate": 8.018711432552586e-06, "loss": 0.9014, "step": 4662 }, { "epoch": 0.2784545563119551, "grad_norm": 1.9137979745864868, "learning_rate": 8.018047906575544e-06, "loss": 0.865, "step": 4663 }, { "epoch": 0.27851427206497076, "grad_norm": 3.068552017211914, "learning_rate": 8.0173843805985e-06, "loss": 0.9072, "step": 4664 }, { "epoch": 0.27857398781798637, "grad_norm": 1.833406686782837, "learning_rate": 8.01672085462146e-06, "loss": 0.9025, "step": 4665 }, { "epoch": 0.27863370357100203, "grad_norm": 2.245521068572998, "learning_rate": 8.016057328644416e-06, "loss": 0.9223, "step": 4666 }, { "epoch": 0.2786934193240177, "grad_norm": 3.092820882797241, "learning_rate": 8.015393802667374e-06, "loss": 0.9234, "step": 4667 }, { "epoch": 0.2787531350770333, "grad_norm": 1.7716996669769287, "learning_rate": 8.014730276690334e-06, "loss": 0.9367, "step": 4668 }, { "epoch": 0.278812850830049, "grad_norm": 2.5592689514160156, "learning_rate": 8.01406675071329e-06, "loss": 0.8914, "step": 4669 }, { "epoch": 0.2788725665830646, "grad_norm": 1.6052813529968262, "learning_rate": 8.013403224736249e-06, "loss": 0.8717, "step": 4670 }, { "epoch": 0.27893228233608025, "grad_norm": 3.5958433151245117, "learning_rate": 8.012739698759207e-06, "loss": 0.9004, "step": 4671 }, { "epoch": 0.2789919980890959, "grad_norm": 4.004790306091309, "learning_rate": 8.012076172782165e-06, "loss": 0.9254, "step": 4672 }, { "epoch": 0.2790517138421115, "grad_norm": 3.2628681659698486, "learning_rate": 8.011412646805123e-06, "loss": 0.9193, "step": 4673 }, { "epoch": 0.2791114295951272, "grad_norm": 9.955321311950684, "learning_rate": 8.010749120828081e-06, "loss": 0.9171, "step": 4674 }, { "epoch": 0.27917114534814286, "grad_norm": 2.1594388484954834, "learning_rate": 8.01008559485104e-06, "loss": 0.907, "step": 4675 }, { "epoch": 0.27923086110115847, "grad_norm": 3.435685873031616, "learning_rate": 8.009422068873997e-06, "loss": 0.8728, "step": 4676 }, { "epoch": 0.27929057685417413, "grad_norm": 2.5450093746185303, "learning_rate": 8.008758542896955e-06, "loss": 0.9052, "step": 4677 }, { "epoch": 0.2793502926071898, "grad_norm": 1.7214841842651367, "learning_rate": 8.008095016919914e-06, "loss": 0.8972, "step": 4678 }, { "epoch": 0.2794100083602054, "grad_norm": 10.198583602905273, "learning_rate": 8.00743149094287e-06, "loss": 0.943, "step": 4679 }, { "epoch": 0.2794697241132211, "grad_norm": 2.2606115341186523, "learning_rate": 8.00676796496583e-06, "loss": 0.8907, "step": 4680 }, { "epoch": 0.27952943986623674, "grad_norm": 2.2164530754089355, "learning_rate": 8.006104438988786e-06, "loss": 0.9261, "step": 4681 }, { "epoch": 0.27958915561925235, "grad_norm": 1.5706082582473755, "learning_rate": 8.005440913011744e-06, "loss": 0.9297, "step": 4682 }, { "epoch": 0.279648871372268, "grad_norm": 2.770392417907715, "learning_rate": 8.004777387034704e-06, "loss": 0.9118, "step": 4683 }, { "epoch": 0.2797085871252836, "grad_norm": 3.094568967819214, "learning_rate": 8.00411386105766e-06, "loss": 0.897, "step": 4684 }, { "epoch": 0.2797683028782993, "grad_norm": 2.6723434925079346, "learning_rate": 8.003450335080619e-06, "loss": 0.9092, "step": 4685 }, { "epoch": 0.27982801863131496, "grad_norm": 8.046465873718262, "learning_rate": 8.002786809103577e-06, "loss": 0.9288, "step": 4686 }, { "epoch": 0.27988773438433057, "grad_norm": 2.1631531715393066, "learning_rate": 8.002123283126535e-06, "loss": 0.9132, "step": 4687 }, { "epoch": 0.27994745013734623, "grad_norm": 1.5267466306686401, "learning_rate": 8.001459757149493e-06, "loss": 0.9236, "step": 4688 }, { "epoch": 0.2800071658903619, "grad_norm": 2.660548686981201, "learning_rate": 8.000796231172451e-06, "loss": 0.9053, "step": 4689 }, { "epoch": 0.2800668816433775, "grad_norm": 2.0419962406158447, "learning_rate": 8.000132705195409e-06, "loss": 0.8722, "step": 4690 }, { "epoch": 0.2801265973963932, "grad_norm": 3.6281023025512695, "learning_rate": 7.999469179218367e-06, "loss": 0.8878, "step": 4691 }, { "epoch": 0.28018631314940884, "grad_norm": 2.90024995803833, "learning_rate": 7.998805653241325e-06, "loss": 0.8809, "step": 4692 }, { "epoch": 0.28024602890242445, "grad_norm": 2.4334726333618164, "learning_rate": 7.998142127264283e-06, "loss": 0.9371, "step": 4693 }, { "epoch": 0.2803057446554401, "grad_norm": 2.4501144886016846, "learning_rate": 7.997478601287241e-06, "loss": 0.9263, "step": 4694 }, { "epoch": 0.2803654604084557, "grad_norm": 2.2941620349884033, "learning_rate": 7.9968150753102e-06, "loss": 0.8763, "step": 4695 }, { "epoch": 0.2804251761614714, "grad_norm": 2.333019971847534, "learning_rate": 7.996151549333156e-06, "loss": 0.9257, "step": 4696 }, { "epoch": 0.28048489191448706, "grad_norm": 1.7379074096679688, "learning_rate": 7.995488023356116e-06, "loss": 0.8989, "step": 4697 }, { "epoch": 0.28054460766750267, "grad_norm": 2.56539249420166, "learning_rate": 7.994824497379074e-06, "loss": 0.8736, "step": 4698 }, { "epoch": 0.28060432342051833, "grad_norm": 3.5103423595428467, "learning_rate": 7.99416097140203e-06, "loss": 0.8919, "step": 4699 }, { "epoch": 0.280664039173534, "grad_norm": 1.6837763786315918, "learning_rate": 7.993497445424988e-06, "loss": 0.9063, "step": 4700 }, { "epoch": 0.2807237549265496, "grad_norm": 2.248246192932129, "learning_rate": 7.992833919447946e-06, "loss": 0.9255, "step": 4701 }, { "epoch": 0.2807834706795653, "grad_norm": 2.0529732704162598, "learning_rate": 7.992170393470905e-06, "loss": 0.8897, "step": 4702 }, { "epoch": 0.28084318643258094, "grad_norm": 4.4375834465026855, "learning_rate": 7.991506867493863e-06, "loss": 0.8982, "step": 4703 }, { "epoch": 0.28090290218559655, "grad_norm": 2.299088954925537, "learning_rate": 7.99084334151682e-06, "loss": 0.9593, "step": 4704 }, { "epoch": 0.2809626179386122, "grad_norm": 2.320429563522339, "learning_rate": 7.990179815539779e-06, "loss": 0.9174, "step": 4705 }, { "epoch": 0.2810223336916279, "grad_norm": 1.8009830713272095, "learning_rate": 7.989516289562737e-06, "loss": 0.8957, "step": 4706 }, { "epoch": 0.2810820494446435, "grad_norm": 2.504148244857788, "learning_rate": 7.988852763585695e-06, "loss": 0.8961, "step": 4707 }, { "epoch": 0.28114176519765915, "grad_norm": 1.9856771230697632, "learning_rate": 7.988189237608653e-06, "loss": 0.8988, "step": 4708 }, { "epoch": 0.28120148095067476, "grad_norm": 3.05006742477417, "learning_rate": 7.987525711631611e-06, "loss": 0.9115, "step": 4709 }, { "epoch": 0.28126119670369043, "grad_norm": 2.985994338989258, "learning_rate": 7.98686218565457e-06, "loss": 0.9512, "step": 4710 }, { "epoch": 0.2813209124567061, "grad_norm": 2.1930360794067383, "learning_rate": 7.986198659677526e-06, "loss": 0.8851, "step": 4711 }, { "epoch": 0.2813806282097217, "grad_norm": 38.34357452392578, "learning_rate": 7.985535133700486e-06, "loss": 0.8908, "step": 4712 }, { "epoch": 0.28144034396273737, "grad_norm": 2.602739095687866, "learning_rate": 7.984871607723444e-06, "loss": 0.9038, "step": 4713 }, { "epoch": 0.28150005971575304, "grad_norm": 2.5215046405792236, "learning_rate": 7.9842080817464e-06, "loss": 0.8948, "step": 4714 }, { "epoch": 0.28155977546876865, "grad_norm": 2.288421869277954, "learning_rate": 7.98354455576936e-06, "loss": 0.9239, "step": 4715 }, { "epoch": 0.2816194912217843, "grad_norm": 2.8628122806549072, "learning_rate": 7.982881029792316e-06, "loss": 0.9231, "step": 4716 }, { "epoch": 0.2816792069748, "grad_norm": 2.1378583908081055, "learning_rate": 7.982217503815274e-06, "loss": 0.8963, "step": 4717 }, { "epoch": 0.2817389227278156, "grad_norm": 4.502227306365967, "learning_rate": 7.981553977838234e-06, "loss": 0.9047, "step": 4718 }, { "epoch": 0.28179863848083125, "grad_norm": 1.954636573791504, "learning_rate": 7.98089045186119e-06, "loss": 0.9033, "step": 4719 }, { "epoch": 0.28185835423384686, "grad_norm": 4.425040245056152, "learning_rate": 7.980226925884149e-06, "loss": 0.8823, "step": 4720 }, { "epoch": 0.28191806998686253, "grad_norm": 2.5225844383239746, "learning_rate": 7.979563399907107e-06, "loss": 0.9166, "step": 4721 }, { "epoch": 0.2819777857398782, "grad_norm": 3.154693126678467, "learning_rate": 7.978899873930065e-06, "loss": 0.907, "step": 4722 }, { "epoch": 0.2820375014928938, "grad_norm": 2.1251752376556396, "learning_rate": 7.978236347953023e-06, "loss": 0.8811, "step": 4723 }, { "epoch": 0.28209721724590947, "grad_norm": 2.055133819580078, "learning_rate": 7.977572821975981e-06, "loss": 0.9073, "step": 4724 }, { "epoch": 0.28215693299892514, "grad_norm": 2.2777018547058105, "learning_rate": 7.97690929599894e-06, "loss": 0.8823, "step": 4725 }, { "epoch": 0.28221664875194075, "grad_norm": 2.521749973297119, "learning_rate": 7.976245770021897e-06, "loss": 0.8969, "step": 4726 }, { "epoch": 0.2822763645049564, "grad_norm": 1.826400876045227, "learning_rate": 7.975582244044855e-06, "loss": 0.8906, "step": 4727 }, { "epoch": 0.2823360802579721, "grad_norm": 2.0347602367401123, "learning_rate": 7.974918718067813e-06, "loss": 0.8571, "step": 4728 }, { "epoch": 0.2823957960109877, "grad_norm": 2.2351555824279785, "learning_rate": 7.97425519209077e-06, "loss": 0.8918, "step": 4729 }, { "epoch": 0.28245551176400335, "grad_norm": 1.538987398147583, "learning_rate": 7.97359166611373e-06, "loss": 0.9244, "step": 4730 }, { "epoch": 0.28251522751701896, "grad_norm": 2.1017472743988037, "learning_rate": 7.972928140136686e-06, "loss": 0.8681, "step": 4731 }, { "epoch": 0.28257494327003463, "grad_norm": 2.1143064498901367, "learning_rate": 7.972264614159644e-06, "loss": 0.9278, "step": 4732 }, { "epoch": 0.2826346590230503, "grad_norm": 2.051682949066162, "learning_rate": 7.971601088182604e-06, "loss": 0.9194, "step": 4733 }, { "epoch": 0.2826943747760659, "grad_norm": 2.064258575439453, "learning_rate": 7.97093756220556e-06, "loss": 0.8709, "step": 4734 }, { "epoch": 0.28275409052908157, "grad_norm": 20.01387596130371, "learning_rate": 7.970274036228518e-06, "loss": 0.9001, "step": 4735 }, { "epoch": 0.28281380628209724, "grad_norm": 2.0264623165130615, "learning_rate": 7.969610510251477e-06, "loss": 0.8878, "step": 4736 }, { "epoch": 0.28287352203511285, "grad_norm": 3.1904704570770264, "learning_rate": 7.968946984274435e-06, "loss": 0.9374, "step": 4737 }, { "epoch": 0.2829332377881285, "grad_norm": 2.3059887886047363, "learning_rate": 7.968283458297393e-06, "loss": 0.9168, "step": 4738 }, { "epoch": 0.2829929535411442, "grad_norm": 1.9798704385757446, "learning_rate": 7.967619932320351e-06, "loss": 0.9052, "step": 4739 }, { "epoch": 0.2830526692941598, "grad_norm": 2.4654624462127686, "learning_rate": 7.966956406343309e-06, "loss": 0.9032, "step": 4740 }, { "epoch": 0.28311238504717545, "grad_norm": 3.092930555343628, "learning_rate": 7.966292880366267e-06, "loss": 0.8807, "step": 4741 }, { "epoch": 0.2831721008001911, "grad_norm": 2.3359758853912354, "learning_rate": 7.965629354389225e-06, "loss": 0.9077, "step": 4742 }, { "epoch": 0.28323181655320673, "grad_norm": 1.67989981174469, "learning_rate": 7.964965828412183e-06, "loss": 0.9063, "step": 4743 }, { "epoch": 0.2832915323062224, "grad_norm": 2.2355268001556396, "learning_rate": 7.964302302435141e-06, "loss": 0.8999, "step": 4744 }, { "epoch": 0.283351248059238, "grad_norm": 3.161970376968384, "learning_rate": 7.9636387764581e-06, "loss": 0.9234, "step": 4745 }, { "epoch": 0.28341096381225367, "grad_norm": 2.534895896911621, "learning_rate": 7.962975250481056e-06, "loss": 0.8736, "step": 4746 }, { "epoch": 0.28347067956526933, "grad_norm": 7.070088863372803, "learning_rate": 7.962311724504016e-06, "loss": 0.911, "step": 4747 }, { "epoch": 0.28353039531828494, "grad_norm": 1.5442010164260864, "learning_rate": 7.961648198526974e-06, "loss": 0.9088, "step": 4748 }, { "epoch": 0.2835901110713006, "grad_norm": 2.6575381755828857, "learning_rate": 7.96098467254993e-06, "loss": 0.9501, "step": 4749 }, { "epoch": 0.2836498268243163, "grad_norm": 2.162238359451294, "learning_rate": 7.960321146572888e-06, "loss": 0.8969, "step": 4750 }, { "epoch": 0.2837095425773319, "grad_norm": 2.5202078819274902, "learning_rate": 7.959657620595846e-06, "loss": 0.9237, "step": 4751 }, { "epoch": 0.28376925833034755, "grad_norm": 2.4084677696228027, "learning_rate": 7.958994094618804e-06, "loss": 0.9195, "step": 4752 }, { "epoch": 0.2838289740833632, "grad_norm": 1.8085099458694458, "learning_rate": 7.958330568641763e-06, "loss": 0.8911, "step": 4753 }, { "epoch": 0.2838886898363788, "grad_norm": 2.3217248916625977, "learning_rate": 7.95766704266472e-06, "loss": 0.9039, "step": 4754 }, { "epoch": 0.2839484055893945, "grad_norm": 5.443002223968506, "learning_rate": 7.957003516687679e-06, "loss": 0.8393, "step": 4755 }, { "epoch": 0.2840081213424101, "grad_norm": 2.945435047149658, "learning_rate": 7.956339990710637e-06, "loss": 0.9367, "step": 4756 }, { "epoch": 0.28406783709542577, "grad_norm": 2.308323383331299, "learning_rate": 7.955676464733595e-06, "loss": 0.8983, "step": 4757 }, { "epoch": 0.28412755284844143, "grad_norm": 2.60573410987854, "learning_rate": 7.955012938756553e-06, "loss": 0.902, "step": 4758 }, { "epoch": 0.28418726860145704, "grad_norm": 1.9957208633422852, "learning_rate": 7.954349412779511e-06, "loss": 0.9026, "step": 4759 }, { "epoch": 0.2842469843544727, "grad_norm": 1.9513919353485107, "learning_rate": 7.95368588680247e-06, "loss": 0.8989, "step": 4760 }, { "epoch": 0.2843067001074884, "grad_norm": 2.4922356605529785, "learning_rate": 7.953022360825426e-06, "loss": 0.9056, "step": 4761 }, { "epoch": 0.284366415860504, "grad_norm": 1.5719012022018433, "learning_rate": 7.952358834848385e-06, "loss": 0.9254, "step": 4762 }, { "epoch": 0.28442613161351965, "grad_norm": 2.7023589611053467, "learning_rate": 7.951695308871344e-06, "loss": 0.8724, "step": 4763 }, { "epoch": 0.2844858473665353, "grad_norm": 3.486737012863159, "learning_rate": 7.9510317828943e-06, "loss": 0.8899, "step": 4764 }, { "epoch": 0.2845455631195509, "grad_norm": 2.6288392543792725, "learning_rate": 7.95036825691726e-06, "loss": 0.9094, "step": 4765 }, { "epoch": 0.2846052788725666, "grad_norm": 2.1803760528564453, "learning_rate": 7.949704730940216e-06, "loss": 0.9196, "step": 4766 }, { "epoch": 0.2846649946255822, "grad_norm": 2.415255546569824, "learning_rate": 7.949041204963174e-06, "loss": 0.9107, "step": 4767 }, { "epoch": 0.28472471037859787, "grad_norm": 2.3751723766326904, "learning_rate": 7.948377678986134e-06, "loss": 0.9356, "step": 4768 }, { "epoch": 0.28478442613161353, "grad_norm": 2.139850378036499, "learning_rate": 7.94771415300909e-06, "loss": 0.8959, "step": 4769 }, { "epoch": 0.28484414188462914, "grad_norm": 2.3716366291046143, "learning_rate": 7.947050627032049e-06, "loss": 0.9174, "step": 4770 }, { "epoch": 0.2849038576376448, "grad_norm": 2.403172492980957, "learning_rate": 7.946387101055007e-06, "loss": 0.911, "step": 4771 }, { "epoch": 0.2849635733906605, "grad_norm": 1.8297817707061768, "learning_rate": 7.945723575077965e-06, "loss": 0.8847, "step": 4772 }, { "epoch": 0.2850232891436761, "grad_norm": 1.9485251903533936, "learning_rate": 7.945060049100923e-06, "loss": 0.9173, "step": 4773 }, { "epoch": 0.28508300489669175, "grad_norm": 2.5500593185424805, "learning_rate": 7.944396523123881e-06, "loss": 0.9101, "step": 4774 }, { "epoch": 0.2851427206497074, "grad_norm": 5.135622501373291, "learning_rate": 7.943732997146839e-06, "loss": 0.8749, "step": 4775 }, { "epoch": 0.285202436402723, "grad_norm": 2.3742918968200684, "learning_rate": 7.943069471169797e-06, "loss": 0.8781, "step": 4776 }, { "epoch": 0.2852621521557387, "grad_norm": 2.037637233734131, "learning_rate": 7.942405945192755e-06, "loss": 0.9443, "step": 4777 }, { "epoch": 0.28532186790875436, "grad_norm": 2.4069392681121826, "learning_rate": 7.941742419215713e-06, "loss": 0.8822, "step": 4778 }, { "epoch": 0.28538158366176997, "grad_norm": 2.91615891456604, "learning_rate": 7.94107889323867e-06, "loss": 0.9021, "step": 4779 }, { "epoch": 0.28544129941478563, "grad_norm": 2.076545000076294, "learning_rate": 7.94041536726163e-06, "loss": 0.9283, "step": 4780 }, { "epoch": 0.28550101516780124, "grad_norm": 2.1695985794067383, "learning_rate": 7.939751841284586e-06, "loss": 0.9363, "step": 4781 }, { "epoch": 0.2855607309208169, "grad_norm": 1.7305158376693726, "learning_rate": 7.939088315307544e-06, "loss": 0.8766, "step": 4782 }, { "epoch": 0.2856204466738326, "grad_norm": 2.1685075759887695, "learning_rate": 7.938424789330504e-06, "loss": 0.8657, "step": 4783 }, { "epoch": 0.2856801624268482, "grad_norm": 2.251707077026367, "learning_rate": 7.93776126335346e-06, "loss": 0.9139, "step": 4784 }, { "epoch": 0.28573987817986385, "grad_norm": 2.433317184448242, "learning_rate": 7.937097737376418e-06, "loss": 0.9013, "step": 4785 }, { "epoch": 0.2857995939328795, "grad_norm": 2.982015371322632, "learning_rate": 7.936434211399377e-06, "loss": 0.9559, "step": 4786 }, { "epoch": 0.2858593096858951, "grad_norm": 1.7019051313400269, "learning_rate": 7.935770685422335e-06, "loss": 0.8855, "step": 4787 }, { "epoch": 0.2859190254389108, "grad_norm": 1.7878257036209106, "learning_rate": 7.935107159445293e-06, "loss": 0.9099, "step": 4788 }, { "epoch": 0.28597874119192646, "grad_norm": 2.055478811264038, "learning_rate": 7.93444363346825e-06, "loss": 0.9233, "step": 4789 }, { "epoch": 0.28603845694494207, "grad_norm": 4.054516792297363, "learning_rate": 7.933780107491209e-06, "loss": 0.906, "step": 4790 }, { "epoch": 0.28609817269795773, "grad_norm": 2.430865526199341, "learning_rate": 7.933116581514167e-06, "loss": 0.9208, "step": 4791 }, { "epoch": 0.28615788845097334, "grad_norm": 2.179246187210083, "learning_rate": 7.932453055537125e-06, "loss": 0.899, "step": 4792 }, { "epoch": 0.286217604203989, "grad_norm": 2.237936496734619, "learning_rate": 7.931789529560083e-06, "loss": 0.8898, "step": 4793 }, { "epoch": 0.2862773199570047, "grad_norm": 1.9297637939453125, "learning_rate": 7.931126003583041e-06, "loss": 0.9322, "step": 4794 }, { "epoch": 0.2863370357100203, "grad_norm": 2.4101204872131348, "learning_rate": 7.930462477606e-06, "loss": 0.9192, "step": 4795 }, { "epoch": 0.28639675146303595, "grad_norm": 2.583367109298706, "learning_rate": 7.929798951628956e-06, "loss": 0.8853, "step": 4796 }, { "epoch": 0.2864564672160516, "grad_norm": 1.956400752067566, "learning_rate": 7.929135425651916e-06, "loss": 0.8746, "step": 4797 }, { "epoch": 0.2865161829690672, "grad_norm": 2.1951193809509277, "learning_rate": 7.928471899674874e-06, "loss": 0.9316, "step": 4798 }, { "epoch": 0.2865758987220829, "grad_norm": 2.3130452632904053, "learning_rate": 7.92780837369783e-06, "loss": 0.9296, "step": 4799 }, { "epoch": 0.28663561447509855, "grad_norm": 1.8082019090652466, "learning_rate": 7.927144847720788e-06, "loss": 0.8616, "step": 4800 }, { "epoch": 0.28663561447509855, "eval_text_loss": 0.9443808197975159, "eval_text_runtime": 15.1782, "eval_text_samples_per_second": 263.535, "eval_text_steps_per_second": 0.527, "step": 4800 }, { "epoch": 0.28663561447509855, "eval_image_loss": 0.6673126220703125, "eval_image_runtime": 5.0418, "eval_image_samples_per_second": 793.368, "eval_image_steps_per_second": 1.587, "step": 4800 }, { "epoch": 0.28663561447509855, "eval_video_loss": 1.1269193887710571, "eval_video_runtime": 76.3648, "eval_video_samples_per_second": 52.38, "eval_video_steps_per_second": 0.105, "step": 4800 }, { "epoch": 0.28669533022811416, "grad_norm": 2.543260335922241, "learning_rate": 7.926481321743746e-06, "loss": 0.9315, "step": 4801 }, { "epoch": 0.28675504598112983, "grad_norm": 2.275956153869629, "learning_rate": 7.925817795766704e-06, "loss": 0.9162, "step": 4802 }, { "epoch": 0.28681476173414544, "grad_norm": 2.698322296142578, "learning_rate": 7.925154269789663e-06, "loss": 0.9127, "step": 4803 }, { "epoch": 0.2868744774871611, "grad_norm": 2.231505870819092, "learning_rate": 7.92449074381262e-06, "loss": 0.9036, "step": 4804 }, { "epoch": 0.28693419324017677, "grad_norm": 3.223618984222412, "learning_rate": 7.923827217835579e-06, "loss": 0.9555, "step": 4805 }, { "epoch": 0.2869939089931924, "grad_norm": 2.0114986896514893, "learning_rate": 7.923163691858537e-06, "loss": 0.9304, "step": 4806 }, { "epoch": 0.28705362474620805, "grad_norm": 2.353318691253662, "learning_rate": 7.922500165881495e-06, "loss": 0.9048, "step": 4807 }, { "epoch": 0.2871133404992237, "grad_norm": 2.108177900314331, "learning_rate": 7.921836639904453e-06, "loss": 0.8918, "step": 4808 }, { "epoch": 0.2871730562522393, "grad_norm": 2.218381643295288, "learning_rate": 7.921173113927411e-06, "loss": 0.8505, "step": 4809 }, { "epoch": 0.287232772005255, "grad_norm": 3.1934702396392822, "learning_rate": 7.92050958795037e-06, "loss": 0.8995, "step": 4810 }, { "epoch": 0.28729248775827065, "grad_norm": 2.113243818283081, "learning_rate": 7.919846061973326e-06, "loss": 0.9033, "step": 4811 }, { "epoch": 0.28735220351128626, "grad_norm": 2.454944133758545, "learning_rate": 7.919182535996285e-06, "loss": 0.8863, "step": 4812 }, { "epoch": 0.28741191926430193, "grad_norm": 2.0925652980804443, "learning_rate": 7.918519010019244e-06, "loss": 0.8635, "step": 4813 }, { "epoch": 0.2874716350173176, "grad_norm": 1.997138261795044, "learning_rate": 7.9178554840422e-06, "loss": 0.8755, "step": 4814 }, { "epoch": 0.2875313507703332, "grad_norm": 1.9667935371398926, "learning_rate": 7.91719195806516e-06, "loss": 0.9158, "step": 4815 }, { "epoch": 0.28759106652334887, "grad_norm": 1.9441273212432861, "learning_rate": 7.916528432088116e-06, "loss": 0.897, "step": 4816 }, { "epoch": 0.2876507822763645, "grad_norm": 2.1469717025756836, "learning_rate": 7.915864906111074e-06, "loss": 0.8707, "step": 4817 }, { "epoch": 0.28771049802938015, "grad_norm": 2.2525153160095215, "learning_rate": 7.915201380134034e-06, "loss": 0.8939, "step": 4818 }, { "epoch": 0.2877702137823958, "grad_norm": 2.07039737701416, "learning_rate": 7.91453785415699e-06, "loss": 0.9155, "step": 4819 }, { "epoch": 0.2878299295354114, "grad_norm": 22.12861442565918, "learning_rate": 7.913874328179949e-06, "loss": 0.9156, "step": 4820 }, { "epoch": 0.2878896452884271, "grad_norm": 2.098421096801758, "learning_rate": 7.913210802202907e-06, "loss": 0.9142, "step": 4821 }, { "epoch": 0.28794936104144275, "grad_norm": 1.9782872200012207, "learning_rate": 7.912547276225865e-06, "loss": 0.9222, "step": 4822 }, { "epoch": 0.28800907679445836, "grad_norm": 2.0566844940185547, "learning_rate": 7.911883750248823e-06, "loss": 0.921, "step": 4823 }, { "epoch": 0.28806879254747403, "grad_norm": 7.217777252197266, "learning_rate": 7.911220224271781e-06, "loss": 0.9039, "step": 4824 }, { "epoch": 0.2881285083004897, "grad_norm": 2.1330184936523438, "learning_rate": 7.910556698294739e-06, "loss": 0.8969, "step": 4825 }, { "epoch": 0.2881882240535053, "grad_norm": 2.241968870162964, "learning_rate": 7.909893172317697e-06, "loss": 0.898, "step": 4826 }, { "epoch": 0.28824793980652097, "grad_norm": 1.7369943857192993, "learning_rate": 7.909229646340655e-06, "loss": 0.8969, "step": 4827 }, { "epoch": 0.2883076555595366, "grad_norm": 2.191969394683838, "learning_rate": 7.908566120363613e-06, "loss": 0.8931, "step": 4828 }, { "epoch": 0.28836737131255225, "grad_norm": 2.2566640377044678, "learning_rate": 7.90790259438657e-06, "loss": 0.8604, "step": 4829 }, { "epoch": 0.2884270870655679, "grad_norm": 8.218108177185059, "learning_rate": 7.90723906840953e-06, "loss": 0.9305, "step": 4830 }, { "epoch": 0.2884868028185835, "grad_norm": 2.0082640647888184, "learning_rate": 7.906575542432486e-06, "loss": 0.8944, "step": 4831 }, { "epoch": 0.2885465185715992, "grad_norm": 2.301042318344116, "learning_rate": 7.905912016455444e-06, "loss": 0.9044, "step": 4832 }, { "epoch": 0.28860623432461485, "grad_norm": 2.099837064743042, "learning_rate": 7.905248490478404e-06, "loss": 0.8666, "step": 4833 }, { "epoch": 0.28866595007763046, "grad_norm": 1.7875663042068481, "learning_rate": 7.90458496450136e-06, "loss": 0.8858, "step": 4834 }, { "epoch": 0.2887256658306461, "grad_norm": 5.370433330535889, "learning_rate": 7.903921438524318e-06, "loss": 0.9094, "step": 4835 }, { "epoch": 0.2887853815836618, "grad_norm": 2.2043561935424805, "learning_rate": 7.903257912547276e-06, "loss": 0.8658, "step": 4836 }, { "epoch": 0.2888450973366774, "grad_norm": 2.579658269882202, "learning_rate": 7.902594386570235e-06, "loss": 0.9235, "step": 4837 }, { "epoch": 0.28890481308969307, "grad_norm": 2.45719051361084, "learning_rate": 7.901930860593193e-06, "loss": 0.9336, "step": 4838 }, { "epoch": 0.2889645288427087, "grad_norm": 2.3800432682037354, "learning_rate": 7.90126733461615e-06, "loss": 0.8992, "step": 4839 }, { "epoch": 0.28902424459572434, "grad_norm": 2.2261085510253906, "learning_rate": 7.900603808639109e-06, "loss": 0.8483, "step": 4840 }, { "epoch": 0.28908396034874, "grad_norm": 1.4886155128479004, "learning_rate": 7.899940282662067e-06, "loss": 0.9158, "step": 4841 }, { "epoch": 0.2891436761017556, "grad_norm": 4.049009799957275, "learning_rate": 7.899276756685025e-06, "loss": 0.8801, "step": 4842 }, { "epoch": 0.2892033918547713, "grad_norm": 2.311075448989868, "learning_rate": 7.898613230707983e-06, "loss": 0.884, "step": 4843 }, { "epoch": 0.28926310760778695, "grad_norm": 1.972247838973999, "learning_rate": 7.897949704730941e-06, "loss": 0.8879, "step": 4844 }, { "epoch": 0.28932282336080256, "grad_norm": 2.280099630355835, "learning_rate": 7.8972861787539e-06, "loss": 0.9148, "step": 4845 }, { "epoch": 0.2893825391138182, "grad_norm": 15.957112312316895, "learning_rate": 7.896622652776856e-06, "loss": 0.9177, "step": 4846 }, { "epoch": 0.2894422548668339, "grad_norm": 2.7892374992370605, "learning_rate": 7.895959126799816e-06, "loss": 0.8861, "step": 4847 }, { "epoch": 0.2895019706198495, "grad_norm": 1.6758400201797485, "learning_rate": 7.895295600822774e-06, "loss": 0.9313, "step": 4848 }, { "epoch": 0.28956168637286517, "grad_norm": 3.403909921646118, "learning_rate": 7.89463207484573e-06, "loss": 0.9043, "step": 4849 }, { "epoch": 0.28962140212588083, "grad_norm": 2.39100980758667, "learning_rate": 7.89396854886869e-06, "loss": 0.8828, "step": 4850 }, { "epoch": 0.28968111787889644, "grad_norm": 2.6171064376831055, "learning_rate": 7.893305022891646e-06, "loss": 0.9067, "step": 4851 }, { "epoch": 0.2897408336319121, "grad_norm": 3.2352845668792725, "learning_rate": 7.892641496914604e-06, "loss": 0.8955, "step": 4852 }, { "epoch": 0.2898005493849277, "grad_norm": 1.6596152782440186, "learning_rate": 7.891977970937562e-06, "loss": 0.905, "step": 4853 }, { "epoch": 0.2898602651379434, "grad_norm": 2.477827548980713, "learning_rate": 7.89131444496052e-06, "loss": 0.9173, "step": 4854 }, { "epoch": 0.28991998089095905, "grad_norm": 2.148404121398926, "learning_rate": 7.890650918983479e-06, "loss": 0.9163, "step": 4855 }, { "epoch": 0.28997969664397466, "grad_norm": 1.8438961505889893, "learning_rate": 7.889987393006437e-06, "loss": 0.8991, "step": 4856 }, { "epoch": 0.2900394123969903, "grad_norm": 1.6624284982681274, "learning_rate": 7.889323867029395e-06, "loss": 0.9059, "step": 4857 }, { "epoch": 0.290099128150006, "grad_norm": 1.6463104486465454, "learning_rate": 7.888660341052353e-06, "loss": 0.9226, "step": 4858 }, { "epoch": 0.2901588439030216, "grad_norm": 2.24405837059021, "learning_rate": 7.887996815075311e-06, "loss": 0.9252, "step": 4859 }, { "epoch": 0.29021855965603727, "grad_norm": 2.734222650527954, "learning_rate": 7.88733328909827e-06, "loss": 0.9128, "step": 4860 }, { "epoch": 0.29027827540905293, "grad_norm": 2.2298264503479004, "learning_rate": 7.886669763121226e-06, "loss": 0.895, "step": 4861 }, { "epoch": 0.29033799116206854, "grad_norm": 2.2847230434417725, "learning_rate": 7.886006237144185e-06, "loss": 0.8991, "step": 4862 }, { "epoch": 0.2903977069150842, "grad_norm": 3.038161277770996, "learning_rate": 7.885342711167143e-06, "loss": 0.9314, "step": 4863 }, { "epoch": 0.2904574226680998, "grad_norm": 2.2862002849578857, "learning_rate": 7.8846791851901e-06, "loss": 0.9056, "step": 4864 }, { "epoch": 0.2905171384211155, "grad_norm": 4.14421272277832, "learning_rate": 7.88401565921306e-06, "loss": 0.8937, "step": 4865 }, { "epoch": 0.29057685417413115, "grad_norm": 1.9750913381576538, "learning_rate": 7.883352133236016e-06, "loss": 0.9268, "step": 4866 }, { "epoch": 0.29063656992714676, "grad_norm": 2.8681674003601074, "learning_rate": 7.882688607258974e-06, "loss": 0.9048, "step": 4867 }, { "epoch": 0.2906962856801624, "grad_norm": 2.165391683578491, "learning_rate": 7.882025081281934e-06, "loss": 0.8422, "step": 4868 }, { "epoch": 0.2907560014331781, "grad_norm": 1.997154712677002, "learning_rate": 7.88136155530489e-06, "loss": 0.8996, "step": 4869 }, { "epoch": 0.2908157171861937, "grad_norm": 1.9860224723815918, "learning_rate": 7.880698029327848e-06, "loss": 0.8635, "step": 4870 }, { "epoch": 0.29087543293920937, "grad_norm": 1.8448361158370972, "learning_rate": 7.880034503350807e-06, "loss": 0.8965, "step": 4871 }, { "epoch": 0.29093514869222503, "grad_norm": 2.182020425796509, "learning_rate": 7.879370977373765e-06, "loss": 0.9139, "step": 4872 }, { "epoch": 0.29099486444524064, "grad_norm": 3.6406965255737305, "learning_rate": 7.878707451396723e-06, "loss": 0.8737, "step": 4873 }, { "epoch": 0.2910545801982563, "grad_norm": 2.5072340965270996, "learning_rate": 7.878043925419681e-06, "loss": 0.8823, "step": 4874 }, { "epoch": 0.291114295951272, "grad_norm": 1.8793432712554932, "learning_rate": 7.877380399442639e-06, "loss": 0.8716, "step": 4875 }, { "epoch": 0.2911740117042876, "grad_norm": 1.8721287250518799, "learning_rate": 7.876716873465597e-06, "loss": 0.9054, "step": 4876 }, { "epoch": 0.29123372745730325, "grad_norm": 2.3737733364105225, "learning_rate": 7.876053347488555e-06, "loss": 0.903, "step": 4877 }, { "epoch": 0.29129344321031886, "grad_norm": 2.238536834716797, "learning_rate": 7.875389821511513e-06, "loss": 0.908, "step": 4878 }, { "epoch": 0.2913531589633345, "grad_norm": 1.9632500410079956, "learning_rate": 7.874726295534471e-06, "loss": 0.8647, "step": 4879 }, { "epoch": 0.2914128747163502, "grad_norm": 3.248914957046509, "learning_rate": 7.87406276955743e-06, "loss": 0.9512, "step": 4880 }, { "epoch": 0.2914725904693658, "grad_norm": 1.8949129581451416, "learning_rate": 7.873399243580386e-06, "loss": 0.8826, "step": 4881 }, { "epoch": 0.29153230622238147, "grad_norm": 2.1597135066986084, "learning_rate": 7.872735717603344e-06, "loss": 0.9041, "step": 4882 }, { "epoch": 0.29159202197539713, "grad_norm": 1.7582213878631592, "learning_rate": 7.872072191626304e-06, "loss": 0.8891, "step": 4883 }, { "epoch": 0.29165173772841274, "grad_norm": 1.9829707145690918, "learning_rate": 7.87140866564926e-06, "loss": 0.9213, "step": 4884 }, { "epoch": 0.2917114534814284, "grad_norm": 2.472548246383667, "learning_rate": 7.870745139672218e-06, "loss": 0.8781, "step": 4885 }, { "epoch": 0.29177116923444407, "grad_norm": 2.3710668087005615, "learning_rate": 7.870081613695176e-06, "loss": 0.8856, "step": 4886 }, { "epoch": 0.2918308849874597, "grad_norm": 2.700864791870117, "learning_rate": 7.869418087718134e-06, "loss": 0.8778, "step": 4887 }, { "epoch": 0.29189060074047535, "grad_norm": 3.006068468093872, "learning_rate": 7.868754561741093e-06, "loss": 0.872, "step": 4888 }, { "epoch": 0.29195031649349096, "grad_norm": 1.90482759475708, "learning_rate": 7.86809103576405e-06, "loss": 0.9195, "step": 4889 }, { "epoch": 0.2920100322465066, "grad_norm": 2.637300491333008, "learning_rate": 7.867427509787009e-06, "loss": 0.8982, "step": 4890 }, { "epoch": 0.2920697479995223, "grad_norm": 1.897375464439392, "learning_rate": 7.866763983809967e-06, "loss": 0.9081, "step": 4891 }, { "epoch": 0.2921294637525379, "grad_norm": 2.7421681880950928, "learning_rate": 7.866100457832925e-06, "loss": 0.8876, "step": 4892 }, { "epoch": 0.29218917950555356, "grad_norm": 3.6526970863342285, "learning_rate": 7.865436931855883e-06, "loss": 0.8813, "step": 4893 }, { "epoch": 0.29224889525856923, "grad_norm": 2.1707046031951904, "learning_rate": 7.864773405878841e-06, "loss": 0.9347, "step": 4894 }, { "epoch": 0.29230861101158484, "grad_norm": 2.778320074081421, "learning_rate": 7.8641098799018e-06, "loss": 0.9029, "step": 4895 }, { "epoch": 0.2923683267646005, "grad_norm": 2.1567800045013428, "learning_rate": 7.863446353924756e-06, "loss": 0.9091, "step": 4896 }, { "epoch": 0.29242804251761617, "grad_norm": 1.7548437118530273, "learning_rate": 7.862782827947716e-06, "loss": 0.8905, "step": 4897 }, { "epoch": 0.2924877582706318, "grad_norm": 1.766861915588379, "learning_rate": 7.862119301970674e-06, "loss": 0.9081, "step": 4898 }, { "epoch": 0.29254747402364745, "grad_norm": 7.988877773284912, "learning_rate": 7.86145577599363e-06, "loss": 0.8998, "step": 4899 }, { "epoch": 0.29260718977666306, "grad_norm": 2.4918267726898193, "learning_rate": 7.86079225001659e-06, "loss": 0.9399, "step": 4900 }, { "epoch": 0.2926669055296787, "grad_norm": 2.147812604904175, "learning_rate": 7.860128724039546e-06, "loss": 0.8885, "step": 4901 }, { "epoch": 0.2927266212826944, "grad_norm": 2.136845350265503, "learning_rate": 7.859465198062504e-06, "loss": 0.9089, "step": 4902 }, { "epoch": 0.29278633703571, "grad_norm": 1.592042088508606, "learning_rate": 7.858801672085462e-06, "loss": 0.9118, "step": 4903 }, { "epoch": 0.29284605278872566, "grad_norm": 1.6053168773651123, "learning_rate": 7.85813814610842e-06, "loss": 0.8868, "step": 4904 }, { "epoch": 0.29290576854174133, "grad_norm": 5.5556960105896, "learning_rate": 7.857474620131379e-06, "loss": 0.9117, "step": 4905 }, { "epoch": 0.29296548429475694, "grad_norm": 2.9570443630218506, "learning_rate": 7.856811094154337e-06, "loss": 0.9469, "step": 4906 }, { "epoch": 0.2930252000477726, "grad_norm": 2.258244037628174, "learning_rate": 7.856147568177295e-06, "loss": 0.913, "step": 4907 }, { "epoch": 0.29308491580078827, "grad_norm": 3.447564125061035, "learning_rate": 7.855484042200253e-06, "loss": 0.8995, "step": 4908 }, { "epoch": 0.2931446315538039, "grad_norm": 3.124640703201294, "learning_rate": 7.854820516223211e-06, "loss": 0.8614, "step": 4909 }, { "epoch": 0.29320434730681955, "grad_norm": 4.9081268310546875, "learning_rate": 7.854156990246169e-06, "loss": 0.8933, "step": 4910 }, { "epoch": 0.2932640630598352, "grad_norm": 2.0612664222717285, "learning_rate": 7.853493464269126e-06, "loss": 0.8936, "step": 4911 }, { "epoch": 0.2933237788128508, "grad_norm": 2.5569100379943848, "learning_rate": 7.852829938292085e-06, "loss": 0.9232, "step": 4912 }, { "epoch": 0.2933834945658665, "grad_norm": 2.5304369926452637, "learning_rate": 7.852166412315043e-06, "loss": 0.8972, "step": 4913 }, { "epoch": 0.2934432103188821, "grad_norm": 2.4108238220214844, "learning_rate": 7.851502886338e-06, "loss": 0.9181, "step": 4914 }, { "epoch": 0.29350292607189776, "grad_norm": 2.3703598976135254, "learning_rate": 7.85083936036096e-06, "loss": 0.8938, "step": 4915 }, { "epoch": 0.29356264182491343, "grad_norm": 1.9370861053466797, "learning_rate": 7.850175834383916e-06, "loss": 0.922, "step": 4916 }, { "epoch": 0.29362235757792904, "grad_norm": 2.8497297763824463, "learning_rate": 7.849512308406874e-06, "loss": 0.9319, "step": 4917 }, { "epoch": 0.2936820733309447, "grad_norm": 3.9416074752807617, "learning_rate": 7.848848782429834e-06, "loss": 0.9194, "step": 4918 }, { "epoch": 0.29374178908396037, "grad_norm": 2.393259286880493, "learning_rate": 7.84818525645279e-06, "loss": 0.8948, "step": 4919 }, { "epoch": 0.293801504836976, "grad_norm": 3.389252185821533, "learning_rate": 7.847521730475748e-06, "loss": 0.9295, "step": 4920 }, { "epoch": 0.29386122058999165, "grad_norm": 2.091440439224243, "learning_rate": 7.846858204498707e-06, "loss": 0.9227, "step": 4921 }, { "epoch": 0.2939209363430073, "grad_norm": 1.826653003692627, "learning_rate": 7.846194678521665e-06, "loss": 0.8916, "step": 4922 }, { "epoch": 0.2939806520960229, "grad_norm": 3.186497211456299, "learning_rate": 7.845531152544623e-06, "loss": 0.8999, "step": 4923 }, { "epoch": 0.2940403678490386, "grad_norm": 2.101696252822876, "learning_rate": 7.84486762656758e-06, "loss": 0.8813, "step": 4924 }, { "epoch": 0.2941000836020542, "grad_norm": 2.893425703048706, "learning_rate": 7.844204100590539e-06, "loss": 0.9193, "step": 4925 }, { "epoch": 0.29415979935506986, "grad_norm": 1.7736822366714478, "learning_rate": 7.843540574613497e-06, "loss": 0.8835, "step": 4926 }, { "epoch": 0.2942195151080855, "grad_norm": 3.0270228385925293, "learning_rate": 7.842877048636455e-06, "loss": 0.8937, "step": 4927 }, { "epoch": 0.29427923086110114, "grad_norm": 2.1566386222839355, "learning_rate": 7.842213522659413e-06, "loss": 0.9047, "step": 4928 }, { "epoch": 0.2943389466141168, "grad_norm": 4.140047550201416, "learning_rate": 7.841549996682371e-06, "loss": 0.9052, "step": 4929 }, { "epoch": 0.29439866236713247, "grad_norm": 2.0921578407287598, "learning_rate": 7.84088647070533e-06, "loss": 0.9139, "step": 4930 }, { "epoch": 0.2944583781201481, "grad_norm": 2.7034878730773926, "learning_rate": 7.840222944728286e-06, "loss": 0.9056, "step": 4931 }, { "epoch": 0.29451809387316374, "grad_norm": 2.0116052627563477, "learning_rate": 7.839559418751244e-06, "loss": 0.8863, "step": 4932 }, { "epoch": 0.2945778096261794, "grad_norm": 1.8880175352096558, "learning_rate": 7.838895892774204e-06, "loss": 0.8811, "step": 4933 }, { "epoch": 0.294637525379195, "grad_norm": 1.9140043258666992, "learning_rate": 7.83823236679716e-06, "loss": 0.8953, "step": 4934 }, { "epoch": 0.2946972411322107, "grad_norm": 2.195504903793335, "learning_rate": 7.837568840820118e-06, "loss": 0.9194, "step": 4935 }, { "epoch": 0.2947569568852263, "grad_norm": 1.7853337526321411, "learning_rate": 7.836905314843076e-06, "loss": 0.8741, "step": 4936 }, { "epoch": 0.29481667263824196, "grad_norm": 2.5459635257720947, "learning_rate": 7.836241788866034e-06, "loss": 0.8982, "step": 4937 }, { "epoch": 0.2948763883912576, "grad_norm": 1.7936670780181885, "learning_rate": 7.835578262888993e-06, "loss": 0.8761, "step": 4938 }, { "epoch": 0.29493610414427324, "grad_norm": 1.9785397052764893, "learning_rate": 7.83491473691195e-06, "loss": 0.893, "step": 4939 }, { "epoch": 0.2949958198972889, "grad_norm": 2.193485975265503, "learning_rate": 7.834251210934909e-06, "loss": 0.8739, "step": 4940 }, { "epoch": 0.29505553565030457, "grad_norm": 2.3940329551696777, "learning_rate": 7.833587684957867e-06, "loss": 0.9011, "step": 4941 }, { "epoch": 0.2951152514033202, "grad_norm": 2.861819267272949, "learning_rate": 7.832924158980825e-06, "loss": 0.9055, "step": 4942 }, { "epoch": 0.29517496715633584, "grad_norm": 2.0647897720336914, "learning_rate": 7.832260633003783e-06, "loss": 0.9033, "step": 4943 }, { "epoch": 0.2952346829093515, "grad_norm": 2.3416805267333984, "learning_rate": 7.831597107026741e-06, "loss": 0.9137, "step": 4944 }, { "epoch": 0.2952943986623671, "grad_norm": 1.8210028409957886, "learning_rate": 7.8309335810497e-06, "loss": 0.8852, "step": 4945 }, { "epoch": 0.2953541144153828, "grad_norm": 2.11757230758667, "learning_rate": 7.830270055072656e-06, "loss": 0.9211, "step": 4946 }, { "epoch": 0.29541383016839845, "grad_norm": 2.8088717460632324, "learning_rate": 7.829606529095615e-06, "loss": 0.8872, "step": 4947 }, { "epoch": 0.29547354592141406, "grad_norm": 6.768397808074951, "learning_rate": 7.828943003118574e-06, "loss": 0.8906, "step": 4948 }, { "epoch": 0.2955332616744297, "grad_norm": 1.9889167547225952, "learning_rate": 7.82827947714153e-06, "loss": 0.8577, "step": 4949 }, { "epoch": 0.29559297742744534, "grad_norm": 2.0634679794311523, "learning_rate": 7.82761595116449e-06, "loss": 0.9177, "step": 4950 }, { "epoch": 0.295652693180461, "grad_norm": 2.5756988525390625, "learning_rate": 7.826952425187446e-06, "loss": 0.8994, "step": 4951 }, { "epoch": 0.29571240893347667, "grad_norm": 3.2122833728790283, "learning_rate": 7.826288899210404e-06, "loss": 0.882, "step": 4952 }, { "epoch": 0.2957721246864923, "grad_norm": 2.015979290008545, "learning_rate": 7.825625373233362e-06, "loss": 0.855, "step": 4953 }, { "epoch": 0.29583184043950794, "grad_norm": 2.3897926807403564, "learning_rate": 7.82496184725632e-06, "loss": 0.887, "step": 4954 }, { "epoch": 0.2958915561925236, "grad_norm": 6.282131671905518, "learning_rate": 7.824298321279279e-06, "loss": 0.9239, "step": 4955 }, { "epoch": 0.2959512719455392, "grad_norm": 1.6113040447235107, "learning_rate": 7.823634795302237e-06, "loss": 0.9139, "step": 4956 }, { "epoch": 0.2960109876985549, "grad_norm": 1.8033685684204102, "learning_rate": 7.822971269325195e-06, "loss": 0.913, "step": 4957 }, { "epoch": 0.29607070345157055, "grad_norm": 2.2638988494873047, "learning_rate": 7.822307743348153e-06, "loss": 0.8984, "step": 4958 }, { "epoch": 0.29613041920458616, "grad_norm": 2.5454201698303223, "learning_rate": 7.821644217371111e-06, "loss": 0.8913, "step": 4959 }, { "epoch": 0.2961901349576018, "grad_norm": 3.5422990322113037, "learning_rate": 7.820980691394069e-06, "loss": 0.887, "step": 4960 }, { "epoch": 0.29624985071061744, "grad_norm": 2.4636027812957764, "learning_rate": 7.820317165417025e-06, "loss": 0.913, "step": 4961 }, { "epoch": 0.2963095664636331, "grad_norm": 4.476391792297363, "learning_rate": 7.819653639439985e-06, "loss": 0.9275, "step": 4962 }, { "epoch": 0.29636928221664877, "grad_norm": 2.080321788787842, "learning_rate": 7.818990113462943e-06, "loss": 0.904, "step": 4963 }, { "epoch": 0.2964289979696644, "grad_norm": 2.5544826984405518, "learning_rate": 7.8183265874859e-06, "loss": 0.9207, "step": 4964 }, { "epoch": 0.29648871372268004, "grad_norm": 2.002317428588867, "learning_rate": 7.81766306150886e-06, "loss": 0.9087, "step": 4965 }, { "epoch": 0.2965484294756957, "grad_norm": 2.7883172035217285, "learning_rate": 7.816999535531816e-06, "loss": 0.9054, "step": 4966 }, { "epoch": 0.2966081452287113, "grad_norm": 1.949971079826355, "learning_rate": 7.816336009554774e-06, "loss": 0.8966, "step": 4967 }, { "epoch": 0.296667860981727, "grad_norm": 2.6555583477020264, "learning_rate": 7.815672483577734e-06, "loss": 0.9286, "step": 4968 }, { "epoch": 0.29672757673474265, "grad_norm": 2.026383399963379, "learning_rate": 7.81500895760069e-06, "loss": 0.8863, "step": 4969 }, { "epoch": 0.29678729248775826, "grad_norm": 2.7359297275543213, "learning_rate": 7.814345431623648e-06, "loss": 0.9202, "step": 4970 }, { "epoch": 0.2968470082407739, "grad_norm": 1.8345215320587158, "learning_rate": 7.813681905646606e-06, "loss": 0.8995, "step": 4971 }, { "epoch": 0.29690672399378953, "grad_norm": 2.1859419345855713, "learning_rate": 7.813018379669565e-06, "loss": 0.9001, "step": 4972 }, { "epoch": 0.2969664397468052, "grad_norm": 2.382568836212158, "learning_rate": 7.812354853692523e-06, "loss": 0.8877, "step": 4973 }, { "epoch": 0.29702615549982087, "grad_norm": 2.925508737564087, "learning_rate": 7.81169132771548e-06, "loss": 0.9033, "step": 4974 }, { "epoch": 0.2970858712528365, "grad_norm": 4.289122104644775, "learning_rate": 7.811027801738439e-06, "loss": 0.9217, "step": 4975 }, { "epoch": 0.29714558700585214, "grad_norm": 2.3457400798797607, "learning_rate": 7.810364275761397e-06, "loss": 0.9522, "step": 4976 }, { "epoch": 0.2972053027588678, "grad_norm": 3.3495380878448486, "learning_rate": 7.809700749784355e-06, "loss": 0.8928, "step": 4977 }, { "epoch": 0.2972650185118834, "grad_norm": 2.6607961654663086, "learning_rate": 7.809037223807313e-06, "loss": 0.872, "step": 4978 }, { "epoch": 0.2973247342648991, "grad_norm": 2.639756441116333, "learning_rate": 7.808373697830271e-06, "loss": 0.9358, "step": 4979 }, { "epoch": 0.29738445001791475, "grad_norm": 2.0198440551757812, "learning_rate": 7.80771017185323e-06, "loss": 0.8694, "step": 4980 }, { "epoch": 0.29744416577093036, "grad_norm": 2.4270122051239014, "learning_rate": 7.807046645876186e-06, "loss": 0.9269, "step": 4981 }, { "epoch": 0.297503881523946, "grad_norm": 1.6915605068206787, "learning_rate": 7.806383119899144e-06, "loss": 0.8591, "step": 4982 }, { "epoch": 0.2975635972769617, "grad_norm": 3.3284106254577637, "learning_rate": 7.805719593922104e-06, "loss": 0.9188, "step": 4983 }, { "epoch": 0.2976233130299773, "grad_norm": 2.3747949600219727, "learning_rate": 7.80505606794506e-06, "loss": 0.868, "step": 4984 }, { "epoch": 0.29768302878299296, "grad_norm": 2.4137089252471924, "learning_rate": 7.804392541968018e-06, "loss": 0.8744, "step": 4985 }, { "epoch": 0.2977427445360086, "grad_norm": 3.1071085929870605, "learning_rate": 7.803729015990976e-06, "loss": 0.8711, "step": 4986 }, { "epoch": 0.29780246028902424, "grad_norm": 1.9603726863861084, "learning_rate": 7.803065490013934e-06, "loss": 0.9239, "step": 4987 }, { "epoch": 0.2978621760420399, "grad_norm": 2.7719602584838867, "learning_rate": 7.802401964036892e-06, "loss": 0.8555, "step": 4988 }, { "epoch": 0.2979218917950555, "grad_norm": 2.275573968887329, "learning_rate": 7.80173843805985e-06, "loss": 0.8932, "step": 4989 }, { "epoch": 0.2979816075480712, "grad_norm": 2.4123165607452393, "learning_rate": 7.801074912082809e-06, "loss": 0.9194, "step": 4990 }, { "epoch": 0.29804132330108685, "grad_norm": 1.7462711334228516, "learning_rate": 7.800411386105767e-06, "loss": 0.8711, "step": 4991 }, { "epoch": 0.29810103905410246, "grad_norm": 2.3830862045288086, "learning_rate": 7.799747860128725e-06, "loss": 0.8925, "step": 4992 }, { "epoch": 0.2981607548071181, "grad_norm": 1.6565773487091064, "learning_rate": 7.799084334151683e-06, "loss": 0.8635, "step": 4993 }, { "epoch": 0.2982204705601338, "grad_norm": 2.44551944732666, "learning_rate": 7.798420808174641e-06, "loss": 0.8759, "step": 4994 }, { "epoch": 0.2982801863131494, "grad_norm": 2.1126372814178467, "learning_rate": 7.7977572821976e-06, "loss": 0.8862, "step": 4995 }, { "epoch": 0.29833990206616506, "grad_norm": 2.3992366790771484, "learning_rate": 7.797093756220556e-06, "loss": 0.92, "step": 4996 }, { "epoch": 0.2983996178191807, "grad_norm": 2.3804831504821777, "learning_rate": 7.796430230243515e-06, "loss": 0.9278, "step": 4997 }, { "epoch": 0.29845933357219634, "grad_norm": 2.7431695461273193, "learning_rate": 7.795766704266473e-06, "loss": 0.8503, "step": 4998 }, { "epoch": 0.298519049325212, "grad_norm": 3.9286465644836426, "learning_rate": 7.79510317828943e-06, "loss": 0.9553, "step": 4999 }, { "epoch": 0.2985787650782276, "grad_norm": 1.9914264678955078, "learning_rate": 7.79443965231239e-06, "loss": 0.8872, "step": 5000 }, { "epoch": 0.2985787650782276, "eval_text_loss": 0.940213143825531, "eval_text_runtime": 15.1797, "eval_text_samples_per_second": 263.51, "eval_text_steps_per_second": 0.527, "step": 5000 }, { "epoch": 0.2985787650782276, "eval_image_loss": 0.6633392572402954, "eval_image_runtime": 5.0267, "eval_image_samples_per_second": 795.753, "eval_image_steps_per_second": 1.592, "step": 5000 }, { "epoch": 0.2985787650782276, "eval_video_loss": 1.1211092472076416, "eval_video_runtime": 76.3729, "eval_video_samples_per_second": 52.375, "eval_video_steps_per_second": 0.105, "step": 5000 }, { "epoch": 0.2986384808312433, "grad_norm": 2.0886666774749756, "learning_rate": 7.793776126335346e-06, "loss": 0.8977, "step": 5001 }, { "epoch": 0.29869819658425895, "grad_norm": 1.7685617208480835, "learning_rate": 7.793112600358304e-06, "loss": 0.8678, "step": 5002 }, { "epoch": 0.29875791233727456, "grad_norm": 2.864792585372925, "learning_rate": 7.792449074381262e-06, "loss": 0.8596, "step": 5003 }, { "epoch": 0.2988176280902902, "grad_norm": 2.57133150100708, "learning_rate": 7.79178554840422e-06, "loss": 0.9002, "step": 5004 }, { "epoch": 0.2988773438433059, "grad_norm": 2.652395009994507, "learning_rate": 7.791122022427178e-06, "loss": 0.8785, "step": 5005 }, { "epoch": 0.2989370595963215, "grad_norm": 2.4114737510681152, "learning_rate": 7.790458496450137e-06, "loss": 0.8988, "step": 5006 }, { "epoch": 0.29899677534933716, "grad_norm": 2.5528619289398193, "learning_rate": 7.789794970473095e-06, "loss": 0.9206, "step": 5007 }, { "epoch": 0.29905649110235283, "grad_norm": 2.6211533546447754, "learning_rate": 7.789131444496053e-06, "loss": 0.9037, "step": 5008 }, { "epoch": 0.29911620685536844, "grad_norm": 1.974682092666626, "learning_rate": 7.788467918519011e-06, "loss": 0.8886, "step": 5009 }, { "epoch": 0.2991759226083841, "grad_norm": 1.7213605642318726, "learning_rate": 7.787804392541969e-06, "loss": 0.8833, "step": 5010 }, { "epoch": 0.2992356383613997, "grad_norm": 2.2066004276275635, "learning_rate": 7.787140866564925e-06, "loss": 0.9034, "step": 5011 }, { "epoch": 0.2992953541144154, "grad_norm": 2.173377513885498, "learning_rate": 7.786477340587885e-06, "loss": 0.8958, "step": 5012 }, { "epoch": 0.29935506986743105, "grad_norm": 2.9191203117370605, "learning_rate": 7.785813814610843e-06, "loss": 0.8938, "step": 5013 }, { "epoch": 0.29941478562044666, "grad_norm": 2.1701595783233643, "learning_rate": 7.7851502886338e-06, "loss": 0.9213, "step": 5014 }, { "epoch": 0.2994745013734623, "grad_norm": 1.8565127849578857, "learning_rate": 7.78448676265676e-06, "loss": 0.9629, "step": 5015 }, { "epoch": 0.299534217126478, "grad_norm": 4.570680141448975, "learning_rate": 7.783823236679716e-06, "loss": 0.8785, "step": 5016 }, { "epoch": 0.2995939328794936, "grad_norm": 2.7522284984588623, "learning_rate": 7.783159710702674e-06, "loss": 0.8799, "step": 5017 }, { "epoch": 0.29965364863250926, "grad_norm": 2.5280823707580566, "learning_rate": 7.782496184725634e-06, "loss": 0.8967, "step": 5018 }, { "epoch": 0.2997133643855249, "grad_norm": 2.0901682376861572, "learning_rate": 7.78183265874859e-06, "loss": 0.8751, "step": 5019 }, { "epoch": 0.29977308013854054, "grad_norm": 2.9986135959625244, "learning_rate": 7.781169132771548e-06, "loss": 0.8442, "step": 5020 }, { "epoch": 0.2998327958915562, "grad_norm": 1.8278898000717163, "learning_rate": 7.780505606794506e-06, "loss": 0.8925, "step": 5021 }, { "epoch": 0.2998925116445718, "grad_norm": 2.2644622325897217, "learning_rate": 7.779842080817465e-06, "loss": 0.8748, "step": 5022 }, { "epoch": 0.2999522273975875, "grad_norm": 2.115347146987915, "learning_rate": 7.779178554840423e-06, "loss": 0.8989, "step": 5023 }, { "epoch": 0.30001194315060314, "grad_norm": 2.3272945880889893, "learning_rate": 7.77851502886338e-06, "loss": 0.9335, "step": 5024 }, { "epoch": 0.30007165890361875, "grad_norm": 2.073812246322632, "learning_rate": 7.777851502886339e-06, "loss": 0.9182, "step": 5025 }, { "epoch": 0.3001313746566344, "grad_norm": 2.7305870056152344, "learning_rate": 7.777187976909297e-06, "loss": 0.9094, "step": 5026 }, { "epoch": 0.3001910904096501, "grad_norm": 1.9016064405441284, "learning_rate": 7.776524450932255e-06, "loss": 0.8831, "step": 5027 }, { "epoch": 0.3002508061626657, "grad_norm": 2.6199870109558105, "learning_rate": 7.775860924955213e-06, "loss": 0.8733, "step": 5028 }, { "epoch": 0.30031052191568136, "grad_norm": 2.1079142093658447, "learning_rate": 7.775197398978171e-06, "loss": 0.9018, "step": 5029 }, { "epoch": 0.300370237668697, "grad_norm": 2.0971519947052, "learning_rate": 7.77453387300113e-06, "loss": 0.881, "step": 5030 }, { "epoch": 0.30042995342171264, "grad_norm": 2.6432244777679443, "learning_rate": 7.773870347024086e-06, "loss": 0.922, "step": 5031 }, { "epoch": 0.3004896691747283, "grad_norm": 2.8648269176483154, "learning_rate": 7.773206821047044e-06, "loss": 0.8927, "step": 5032 }, { "epoch": 0.3005493849277439, "grad_norm": 3.3499441146850586, "learning_rate": 7.772543295070004e-06, "loss": 0.9102, "step": 5033 }, { "epoch": 0.3006091006807596, "grad_norm": 3.119180917739868, "learning_rate": 7.77187976909296e-06, "loss": 0.8782, "step": 5034 }, { "epoch": 0.30066881643377524, "grad_norm": 2.5020854473114014, "learning_rate": 7.771216243115918e-06, "loss": 0.9067, "step": 5035 }, { "epoch": 0.30072853218679085, "grad_norm": 2.3624675273895264, "learning_rate": 7.770552717138876e-06, "loss": 0.9349, "step": 5036 }, { "epoch": 0.3007882479398065, "grad_norm": 1.7151212692260742, "learning_rate": 7.769889191161834e-06, "loss": 0.8739, "step": 5037 }, { "epoch": 0.3008479636928222, "grad_norm": 2.6529746055603027, "learning_rate": 7.769225665184792e-06, "loss": 0.9263, "step": 5038 }, { "epoch": 0.3009076794458378, "grad_norm": 3.54245924949646, "learning_rate": 7.76856213920775e-06, "loss": 0.8872, "step": 5039 }, { "epoch": 0.30096739519885346, "grad_norm": 2.74271559715271, "learning_rate": 7.767898613230709e-06, "loss": 0.923, "step": 5040 }, { "epoch": 0.3010271109518691, "grad_norm": 2.9881889820098877, "learning_rate": 7.767235087253667e-06, "loss": 0.9279, "step": 5041 }, { "epoch": 0.30108682670488474, "grad_norm": 2.6926090717315674, "learning_rate": 7.766571561276625e-06, "loss": 0.9013, "step": 5042 }, { "epoch": 0.3011465424579004, "grad_norm": 3.1311111450195312, "learning_rate": 7.765908035299583e-06, "loss": 0.8977, "step": 5043 }, { "epoch": 0.30120625821091607, "grad_norm": 4.739022731781006, "learning_rate": 7.765244509322541e-06, "loss": 0.8983, "step": 5044 }, { "epoch": 0.3012659739639317, "grad_norm": 3.7010440826416016, "learning_rate": 7.764580983345499e-06, "loss": 0.9486, "step": 5045 }, { "epoch": 0.30132568971694734, "grad_norm": 1.969300627708435, "learning_rate": 7.763917457368456e-06, "loss": 0.9077, "step": 5046 }, { "epoch": 0.30138540546996295, "grad_norm": 2.418278217315674, "learning_rate": 7.763253931391415e-06, "loss": 0.89, "step": 5047 }, { "epoch": 0.3014451212229786, "grad_norm": 2.144740343093872, "learning_rate": 7.762590405414373e-06, "loss": 0.8788, "step": 5048 }, { "epoch": 0.3015048369759943, "grad_norm": 2.4600307941436768, "learning_rate": 7.76192687943733e-06, "loss": 0.898, "step": 5049 }, { "epoch": 0.3015645527290099, "grad_norm": 3.6372451782226562, "learning_rate": 7.76126335346029e-06, "loss": 0.9237, "step": 5050 }, { "epoch": 0.30162426848202556, "grad_norm": 1.9814506769180298, "learning_rate": 7.760599827483246e-06, "loss": 0.8922, "step": 5051 }, { "epoch": 0.3016839842350412, "grad_norm": 3.2273504734039307, "learning_rate": 7.759936301506204e-06, "loss": 0.9038, "step": 5052 }, { "epoch": 0.30174369998805683, "grad_norm": 2.154160737991333, "learning_rate": 7.759272775529162e-06, "loss": 0.8846, "step": 5053 }, { "epoch": 0.3018034157410725, "grad_norm": 2.9151065349578857, "learning_rate": 7.75860924955212e-06, "loss": 0.9142, "step": 5054 }, { "epoch": 0.30186313149408817, "grad_norm": 1.7857985496520996, "learning_rate": 7.757945723575078e-06, "loss": 0.8765, "step": 5055 }, { "epoch": 0.3019228472471038, "grad_norm": 2.135786294937134, "learning_rate": 7.757282197598037e-06, "loss": 0.9322, "step": 5056 }, { "epoch": 0.30198256300011944, "grad_norm": 2.7423737049102783, "learning_rate": 7.756618671620995e-06, "loss": 0.8844, "step": 5057 }, { "epoch": 0.30204227875313505, "grad_norm": 2.031442403793335, "learning_rate": 7.755955145643953e-06, "loss": 0.8772, "step": 5058 }, { "epoch": 0.3021019945061507, "grad_norm": 2.4870269298553467, "learning_rate": 7.75529161966691e-06, "loss": 0.9164, "step": 5059 }, { "epoch": 0.3021617102591664, "grad_norm": 2.246750831604004, "learning_rate": 7.754628093689869e-06, "loss": 0.8971, "step": 5060 }, { "epoch": 0.302221426012182, "grad_norm": 2.0814435482025146, "learning_rate": 7.753964567712825e-06, "loss": 0.8974, "step": 5061 }, { "epoch": 0.30228114176519766, "grad_norm": 2.4633004665374756, "learning_rate": 7.753301041735785e-06, "loss": 0.8874, "step": 5062 }, { "epoch": 0.3023408575182133, "grad_norm": 2.5355610847473145, "learning_rate": 7.752637515758743e-06, "loss": 0.8755, "step": 5063 }, { "epoch": 0.30240057327122893, "grad_norm": 2.6911847591400146, "learning_rate": 7.7519739897817e-06, "loss": 0.9185, "step": 5064 }, { "epoch": 0.3024602890242446, "grad_norm": 2.2475147247314453, "learning_rate": 7.75131046380466e-06, "loss": 0.8784, "step": 5065 }, { "epoch": 0.30252000477726027, "grad_norm": 2.464759588241577, "learning_rate": 7.750646937827616e-06, "loss": 0.8907, "step": 5066 }, { "epoch": 0.3025797205302759, "grad_norm": 3.5130081176757812, "learning_rate": 7.749983411850574e-06, "loss": 0.9043, "step": 5067 }, { "epoch": 0.30263943628329154, "grad_norm": 1.9561717510223389, "learning_rate": 7.749319885873534e-06, "loss": 0.8675, "step": 5068 }, { "epoch": 0.30269915203630715, "grad_norm": 1.8695892095565796, "learning_rate": 7.74865635989649e-06, "loss": 0.9082, "step": 5069 }, { "epoch": 0.3027588677893228, "grad_norm": 1.641324758529663, "learning_rate": 7.747992833919448e-06, "loss": 0.8505, "step": 5070 }, { "epoch": 0.3028185835423385, "grad_norm": 2.4452388286590576, "learning_rate": 7.747329307942406e-06, "loss": 0.8715, "step": 5071 }, { "epoch": 0.3028782992953541, "grad_norm": 2.360670328140259, "learning_rate": 7.746665781965364e-06, "loss": 0.8847, "step": 5072 }, { "epoch": 0.30293801504836976, "grad_norm": 1.82449209690094, "learning_rate": 7.746002255988323e-06, "loss": 0.8773, "step": 5073 }, { "epoch": 0.3029977308013854, "grad_norm": 2.0976293087005615, "learning_rate": 7.74533873001128e-06, "loss": 0.904, "step": 5074 }, { "epoch": 0.30305744655440103, "grad_norm": 2.3404486179351807, "learning_rate": 7.744675204034239e-06, "loss": 0.8897, "step": 5075 }, { "epoch": 0.3031171623074167, "grad_norm": 2.756726026535034, "learning_rate": 7.744011678057197e-06, "loss": 0.8447, "step": 5076 }, { "epoch": 0.30317687806043236, "grad_norm": 1.9818965196609497, "learning_rate": 7.743348152080155e-06, "loss": 0.8816, "step": 5077 }, { "epoch": 0.303236593813448, "grad_norm": 3.368574619293213, "learning_rate": 7.742684626103113e-06, "loss": 0.9201, "step": 5078 }, { "epoch": 0.30329630956646364, "grad_norm": 1.9420431852340698, "learning_rate": 7.742021100126071e-06, "loss": 0.8817, "step": 5079 }, { "epoch": 0.3033560253194793, "grad_norm": 2.4620678424835205, "learning_rate": 7.74135757414903e-06, "loss": 0.8645, "step": 5080 }, { "epoch": 0.3034157410724949, "grad_norm": 2.105522394180298, "learning_rate": 7.740694048171986e-06, "loss": 0.9176, "step": 5081 }, { "epoch": 0.3034754568255106, "grad_norm": 4.498410701751709, "learning_rate": 7.740030522194944e-06, "loss": 0.9331, "step": 5082 }, { "epoch": 0.3035351725785262, "grad_norm": 2.1752612590789795, "learning_rate": 7.739366996217904e-06, "loss": 0.9049, "step": 5083 }, { "epoch": 0.30359488833154186, "grad_norm": 2.163790702819824, "learning_rate": 7.73870347024086e-06, "loss": 0.9145, "step": 5084 }, { "epoch": 0.3036546040845575, "grad_norm": 2.4154937267303467, "learning_rate": 7.738039944263818e-06, "loss": 0.9222, "step": 5085 }, { "epoch": 0.30371431983757313, "grad_norm": 1.620465874671936, "learning_rate": 7.737376418286776e-06, "loss": 0.9218, "step": 5086 }, { "epoch": 0.3037740355905888, "grad_norm": 2.8194870948791504, "learning_rate": 7.736712892309734e-06, "loss": 0.9349, "step": 5087 }, { "epoch": 0.30383375134360446, "grad_norm": 2.124908685684204, "learning_rate": 7.736049366332692e-06, "loss": 0.8875, "step": 5088 }, { "epoch": 0.3038934670966201, "grad_norm": 3.098849058151245, "learning_rate": 7.73538584035565e-06, "loss": 0.8966, "step": 5089 }, { "epoch": 0.30395318284963574, "grad_norm": 8.365011215209961, "learning_rate": 7.734722314378609e-06, "loss": 0.9132, "step": 5090 }, { "epoch": 0.3040128986026514, "grad_norm": 2.9870312213897705, "learning_rate": 7.734058788401567e-06, "loss": 0.8956, "step": 5091 }, { "epoch": 0.304072614355667, "grad_norm": 3.5950145721435547, "learning_rate": 7.733395262424525e-06, "loss": 0.9048, "step": 5092 }, { "epoch": 0.3041323301086827, "grad_norm": 2.251002550125122, "learning_rate": 7.732731736447483e-06, "loss": 0.9081, "step": 5093 }, { "epoch": 0.3041920458616983, "grad_norm": 3.6957874298095703, "learning_rate": 7.732068210470441e-06, "loss": 0.8877, "step": 5094 }, { "epoch": 0.30425176161471396, "grad_norm": 1.7676533460617065, "learning_rate": 7.731404684493399e-06, "loss": 0.9458, "step": 5095 }, { "epoch": 0.3043114773677296, "grad_norm": 1.7008901834487915, "learning_rate": 7.730741158516355e-06, "loss": 0.875, "step": 5096 }, { "epoch": 0.30437119312074523, "grad_norm": 2.025606870651245, "learning_rate": 7.730077632539315e-06, "loss": 0.8958, "step": 5097 }, { "epoch": 0.3044309088737609, "grad_norm": 1.9385930299758911, "learning_rate": 7.729414106562273e-06, "loss": 0.8701, "step": 5098 }, { "epoch": 0.30449062462677656, "grad_norm": 2.123666763305664, "learning_rate": 7.72875058058523e-06, "loss": 0.8756, "step": 5099 }, { "epoch": 0.3045503403797922, "grad_norm": 2.4375319480895996, "learning_rate": 7.72808705460819e-06, "loss": 0.8874, "step": 5100 }, { "epoch": 0.30461005613280784, "grad_norm": 1.8525958061218262, "learning_rate": 7.727423528631146e-06, "loss": 0.8667, "step": 5101 }, { "epoch": 0.3046697718858235, "grad_norm": 3.1371357440948486, "learning_rate": 7.726760002654104e-06, "loss": 0.9053, "step": 5102 }, { "epoch": 0.3047294876388391, "grad_norm": 1.8998469114303589, "learning_rate": 7.726096476677062e-06, "loss": 0.9164, "step": 5103 }, { "epoch": 0.3047892033918548, "grad_norm": 2.14851975440979, "learning_rate": 7.72543295070002e-06, "loss": 0.8818, "step": 5104 }, { "epoch": 0.3048489191448704, "grad_norm": 2.537090301513672, "learning_rate": 7.724769424722978e-06, "loss": 0.905, "step": 5105 }, { "epoch": 0.30490863489788606, "grad_norm": 2.3558382987976074, "learning_rate": 7.724105898745936e-06, "loss": 0.9275, "step": 5106 }, { "epoch": 0.3049683506509017, "grad_norm": 2.0548460483551025, "learning_rate": 7.723442372768895e-06, "loss": 0.9259, "step": 5107 }, { "epoch": 0.30502806640391733, "grad_norm": 2.037722110748291, "learning_rate": 7.722778846791853e-06, "loss": 0.88, "step": 5108 }, { "epoch": 0.305087782156933, "grad_norm": 3.1214520931243896, "learning_rate": 7.72211532081481e-06, "loss": 0.8683, "step": 5109 }, { "epoch": 0.30514749790994866, "grad_norm": 1.9375181198120117, "learning_rate": 7.721451794837769e-06, "loss": 0.9214, "step": 5110 }, { "epoch": 0.30520721366296427, "grad_norm": 3.1387083530426025, "learning_rate": 7.720788268860725e-06, "loss": 0.8885, "step": 5111 }, { "epoch": 0.30526692941597994, "grad_norm": 2.649484872817993, "learning_rate": 7.720124742883685e-06, "loss": 0.8905, "step": 5112 }, { "epoch": 0.3053266451689956, "grad_norm": 2.4630744457244873, "learning_rate": 7.719461216906643e-06, "loss": 0.8942, "step": 5113 }, { "epoch": 0.3053863609220112, "grad_norm": 2.5656826496124268, "learning_rate": 7.7187976909296e-06, "loss": 0.9162, "step": 5114 }, { "epoch": 0.3054460766750269, "grad_norm": 2.199171781539917, "learning_rate": 7.71813416495256e-06, "loss": 0.8763, "step": 5115 }, { "epoch": 0.30550579242804254, "grad_norm": 3.8689448833465576, "learning_rate": 7.717470638975516e-06, "loss": 0.9316, "step": 5116 }, { "epoch": 0.30556550818105815, "grad_norm": 2.2183496952056885, "learning_rate": 7.716807112998474e-06, "loss": 0.8804, "step": 5117 }, { "epoch": 0.3056252239340738, "grad_norm": 2.825124979019165, "learning_rate": 7.716143587021434e-06, "loss": 0.8865, "step": 5118 }, { "epoch": 0.30568493968708943, "grad_norm": 2.525235414505005, "learning_rate": 7.71548006104439e-06, "loss": 0.9062, "step": 5119 }, { "epoch": 0.3057446554401051, "grad_norm": 2.1822173595428467, "learning_rate": 7.714816535067348e-06, "loss": 0.8589, "step": 5120 }, { "epoch": 0.30580437119312076, "grad_norm": 2.261005163192749, "learning_rate": 7.714153009090306e-06, "loss": 0.8663, "step": 5121 }, { "epoch": 0.30586408694613637, "grad_norm": 2.784243106842041, "learning_rate": 7.713489483113264e-06, "loss": 0.8664, "step": 5122 }, { "epoch": 0.30592380269915204, "grad_norm": 3.619769334793091, "learning_rate": 7.712825957136222e-06, "loss": 0.939, "step": 5123 }, { "epoch": 0.3059835184521677, "grad_norm": 2.0297672748565674, "learning_rate": 7.71216243115918e-06, "loss": 0.9075, "step": 5124 }, { "epoch": 0.3060432342051833, "grad_norm": 2.698906660079956, "learning_rate": 7.711498905182139e-06, "loss": 0.9168, "step": 5125 }, { "epoch": 0.306102949958199, "grad_norm": 2.105449914932251, "learning_rate": 7.710835379205097e-06, "loss": 0.9279, "step": 5126 }, { "epoch": 0.30616266571121464, "grad_norm": 2.5262906551361084, "learning_rate": 7.710171853228055e-06, "loss": 0.9129, "step": 5127 }, { "epoch": 0.30622238146423025, "grad_norm": 2.215200662612915, "learning_rate": 7.709508327251013e-06, "loss": 0.882, "step": 5128 }, { "epoch": 0.3062820972172459, "grad_norm": 1.727790355682373, "learning_rate": 7.708844801273971e-06, "loss": 0.8462, "step": 5129 }, { "epoch": 0.30634181297026153, "grad_norm": 2.748256206512451, "learning_rate": 7.70818127529693e-06, "loss": 0.8596, "step": 5130 }, { "epoch": 0.3064015287232772, "grad_norm": 3.09622859954834, "learning_rate": 7.707517749319886e-06, "loss": 0.9176, "step": 5131 }, { "epoch": 0.30646124447629286, "grad_norm": 2.054715871810913, "learning_rate": 7.706854223342844e-06, "loss": 0.9264, "step": 5132 }, { "epoch": 0.30652096022930847, "grad_norm": 2.362739324569702, "learning_rate": 7.706190697365804e-06, "loss": 0.9378, "step": 5133 }, { "epoch": 0.30658067598232414, "grad_norm": 3.808915376663208, "learning_rate": 7.70552717138876e-06, "loss": 0.9004, "step": 5134 }, { "epoch": 0.3066403917353398, "grad_norm": 3.854034900665283, "learning_rate": 7.704863645411718e-06, "loss": 0.9143, "step": 5135 }, { "epoch": 0.3067001074883554, "grad_norm": 2.758119583129883, "learning_rate": 7.704200119434676e-06, "loss": 0.9021, "step": 5136 }, { "epoch": 0.3067598232413711, "grad_norm": 2.170257568359375, "learning_rate": 7.703536593457634e-06, "loss": 0.8724, "step": 5137 }, { "epoch": 0.30681953899438674, "grad_norm": 1.9074149131774902, "learning_rate": 7.702873067480592e-06, "loss": 0.8885, "step": 5138 }, { "epoch": 0.30687925474740235, "grad_norm": 1.8527207374572754, "learning_rate": 7.70220954150355e-06, "loss": 0.902, "step": 5139 }, { "epoch": 0.306938970500418, "grad_norm": 2.3492956161499023, "learning_rate": 7.701546015526509e-06, "loss": 0.8778, "step": 5140 }, { "epoch": 0.30699868625343363, "grad_norm": 2.271991491317749, "learning_rate": 7.700882489549467e-06, "loss": 0.9175, "step": 5141 }, { "epoch": 0.3070584020064493, "grad_norm": 2.025282859802246, "learning_rate": 7.700218963572425e-06, "loss": 0.8566, "step": 5142 }, { "epoch": 0.30711811775946496, "grad_norm": 2.5162177085876465, "learning_rate": 7.699555437595383e-06, "loss": 0.8881, "step": 5143 }, { "epoch": 0.30717783351248057, "grad_norm": 2.0072033405303955, "learning_rate": 7.698891911618341e-06, "loss": 0.9084, "step": 5144 }, { "epoch": 0.30723754926549623, "grad_norm": 2.4039900302886963, "learning_rate": 7.698228385641299e-06, "loss": 0.8812, "step": 5145 }, { "epoch": 0.3072972650185119, "grad_norm": 1.5847278833389282, "learning_rate": 7.697564859664255e-06, "loss": 0.9014, "step": 5146 }, { "epoch": 0.3073569807715275, "grad_norm": 1.9970499277114868, "learning_rate": 7.696901333687215e-06, "loss": 0.9071, "step": 5147 }, { "epoch": 0.3074166965245432, "grad_norm": 1.7844280004501343, "learning_rate": 7.696237807710173e-06, "loss": 0.8777, "step": 5148 }, { "epoch": 0.30747641227755884, "grad_norm": 2.3361661434173584, "learning_rate": 7.69557428173313e-06, "loss": 0.8544, "step": 5149 }, { "epoch": 0.30753612803057445, "grad_norm": 2.048710823059082, "learning_rate": 7.69491075575609e-06, "loss": 0.9231, "step": 5150 }, { "epoch": 0.3075958437835901, "grad_norm": 2.989349603652954, "learning_rate": 7.694247229779046e-06, "loss": 0.9196, "step": 5151 }, { "epoch": 0.3076555595366058, "grad_norm": 2.229943037033081, "learning_rate": 7.693583703802004e-06, "loss": 0.9313, "step": 5152 }, { "epoch": 0.3077152752896214, "grad_norm": 2.0661723613739014, "learning_rate": 7.692920177824962e-06, "loss": 0.9254, "step": 5153 }, { "epoch": 0.30777499104263706, "grad_norm": 2.665302038192749, "learning_rate": 7.69225665184792e-06, "loss": 0.8828, "step": 5154 }, { "epoch": 0.30783470679565267, "grad_norm": 2.6885547637939453, "learning_rate": 7.691593125870878e-06, "loss": 0.8817, "step": 5155 }, { "epoch": 0.30789442254866833, "grad_norm": 2.4894394874572754, "learning_rate": 7.690929599893836e-06, "loss": 0.899, "step": 5156 }, { "epoch": 0.307954138301684, "grad_norm": 2.1143722534179688, "learning_rate": 7.690266073916795e-06, "loss": 0.9148, "step": 5157 }, { "epoch": 0.3080138540546996, "grad_norm": 4.091215133666992, "learning_rate": 7.689602547939753e-06, "loss": 0.891, "step": 5158 }, { "epoch": 0.3080735698077153, "grad_norm": 2.1816909313201904, "learning_rate": 7.68893902196271e-06, "loss": 0.9217, "step": 5159 }, { "epoch": 0.30813328556073094, "grad_norm": 2.1142892837524414, "learning_rate": 7.688275495985669e-06, "loss": 0.892, "step": 5160 }, { "epoch": 0.30819300131374655, "grad_norm": 3.0664422512054443, "learning_rate": 7.687611970008625e-06, "loss": 0.9029, "step": 5161 }, { "epoch": 0.3082527170667622, "grad_norm": 2.222583055496216, "learning_rate": 7.686948444031585e-06, "loss": 0.8722, "step": 5162 }, { "epoch": 0.3083124328197779, "grad_norm": 2.8944449424743652, "learning_rate": 7.686284918054543e-06, "loss": 0.8835, "step": 5163 }, { "epoch": 0.3083721485727935, "grad_norm": 2.6655917167663574, "learning_rate": 7.6856213920775e-06, "loss": 0.9044, "step": 5164 }, { "epoch": 0.30843186432580916, "grad_norm": 1.9883259534835815, "learning_rate": 7.68495786610046e-06, "loss": 0.8677, "step": 5165 }, { "epoch": 0.30849158007882477, "grad_norm": 2.1098222732543945, "learning_rate": 7.684294340123416e-06, "loss": 0.881, "step": 5166 }, { "epoch": 0.30855129583184043, "grad_norm": 2.0072519779205322, "learning_rate": 7.683630814146374e-06, "loss": 0.9126, "step": 5167 }, { "epoch": 0.3086110115848561, "grad_norm": 2.071422576904297, "learning_rate": 7.682967288169334e-06, "loss": 0.8748, "step": 5168 }, { "epoch": 0.3086707273378717, "grad_norm": 2.3783464431762695, "learning_rate": 7.68230376219229e-06, "loss": 0.8321, "step": 5169 }, { "epoch": 0.3087304430908874, "grad_norm": 2.0943660736083984, "learning_rate": 7.681640236215248e-06, "loss": 0.914, "step": 5170 }, { "epoch": 0.30879015884390304, "grad_norm": 2.2766103744506836, "learning_rate": 7.680976710238206e-06, "loss": 0.9067, "step": 5171 }, { "epoch": 0.30884987459691865, "grad_norm": 2.1539368629455566, "learning_rate": 7.680313184261164e-06, "loss": 0.9284, "step": 5172 }, { "epoch": 0.3089095903499343, "grad_norm": 2.4509739875793457, "learning_rate": 7.679649658284122e-06, "loss": 0.8783, "step": 5173 }, { "epoch": 0.30896930610295, "grad_norm": 4.23559045791626, "learning_rate": 7.67898613230708e-06, "loss": 0.9354, "step": 5174 }, { "epoch": 0.3090290218559656, "grad_norm": 3.3081679344177246, "learning_rate": 7.678322606330039e-06, "loss": 0.8811, "step": 5175 }, { "epoch": 0.30908873760898126, "grad_norm": 3.469578504562378, "learning_rate": 7.677659080352997e-06, "loss": 0.8799, "step": 5176 }, { "epoch": 0.3091484533619969, "grad_norm": 2.024444818496704, "learning_rate": 7.676995554375955e-06, "loss": 0.9128, "step": 5177 }, { "epoch": 0.30920816911501253, "grad_norm": 1.8757115602493286, "learning_rate": 7.676332028398913e-06, "loss": 0.9392, "step": 5178 }, { "epoch": 0.3092678848680282, "grad_norm": 6.098393440246582, "learning_rate": 7.675668502421871e-06, "loss": 0.8959, "step": 5179 }, { "epoch": 0.3093276006210438, "grad_norm": 8.035234451293945, "learning_rate": 7.675004976444829e-06, "loss": 0.8987, "step": 5180 }, { "epoch": 0.3093873163740595, "grad_norm": 2.15804123878479, "learning_rate": 7.674341450467786e-06, "loss": 0.9046, "step": 5181 }, { "epoch": 0.30944703212707514, "grad_norm": 1.8853429555892944, "learning_rate": 7.673677924490744e-06, "loss": 0.8806, "step": 5182 }, { "epoch": 0.30950674788009075, "grad_norm": 2.148378610610962, "learning_rate": 7.673014398513703e-06, "loss": 0.9109, "step": 5183 }, { "epoch": 0.3095664636331064, "grad_norm": 3.515799045562744, "learning_rate": 7.67235087253666e-06, "loss": 0.9168, "step": 5184 }, { "epoch": 0.3096261793861221, "grad_norm": 2.076904296875, "learning_rate": 7.671687346559618e-06, "loss": 0.864, "step": 5185 }, { "epoch": 0.3096858951391377, "grad_norm": 2.243894338607788, "learning_rate": 7.671023820582576e-06, "loss": 0.8976, "step": 5186 }, { "epoch": 0.30974561089215336, "grad_norm": 4.545199394226074, "learning_rate": 7.670360294605534e-06, "loss": 0.884, "step": 5187 }, { "epoch": 0.309805326645169, "grad_norm": 2.165053606033325, "learning_rate": 7.669696768628492e-06, "loss": 0.8888, "step": 5188 }, { "epoch": 0.30986504239818463, "grad_norm": 2.5583231449127197, "learning_rate": 7.66903324265145e-06, "loss": 0.8391, "step": 5189 }, { "epoch": 0.3099247581512003, "grad_norm": 1.8324118852615356, "learning_rate": 7.668369716674408e-06, "loss": 0.8785, "step": 5190 }, { "epoch": 0.3099844739042159, "grad_norm": 2.346036911010742, "learning_rate": 7.667706190697367e-06, "loss": 0.8855, "step": 5191 }, { "epoch": 0.3100441896572316, "grad_norm": 2.1117050647735596, "learning_rate": 7.667042664720325e-06, "loss": 0.8999, "step": 5192 }, { "epoch": 0.31010390541024724, "grad_norm": 2.189923048019409, "learning_rate": 7.666379138743283e-06, "loss": 0.9054, "step": 5193 }, { "epoch": 0.31016362116326285, "grad_norm": 3.240736961364746, "learning_rate": 7.665715612766241e-06, "loss": 0.9042, "step": 5194 }, { "epoch": 0.3102233369162785, "grad_norm": 4.051148891448975, "learning_rate": 7.665052086789199e-06, "loss": 0.893, "step": 5195 }, { "epoch": 0.3102830526692942, "grad_norm": 3.126070022583008, "learning_rate": 7.664388560812155e-06, "loss": 0.9196, "step": 5196 }, { "epoch": 0.3103427684223098, "grad_norm": 4.539301872253418, "learning_rate": 7.663725034835115e-06, "loss": 0.8981, "step": 5197 }, { "epoch": 0.31040248417532545, "grad_norm": 1.8814772367477417, "learning_rate": 7.663061508858073e-06, "loss": 0.8926, "step": 5198 }, { "epoch": 0.3104621999283411, "grad_norm": 2.9387834072113037, "learning_rate": 7.66239798288103e-06, "loss": 0.8875, "step": 5199 }, { "epoch": 0.31052191568135673, "grad_norm": 3.1585071086883545, "learning_rate": 7.66173445690399e-06, "loss": 0.8356, "step": 5200 }, { "epoch": 0.31052191568135673, "eval_text_loss": 0.9379466772079468, "eval_text_runtime": 15.2376, "eval_text_samples_per_second": 262.509, "eval_text_steps_per_second": 0.525, "step": 5200 }, { "epoch": 0.31052191568135673, "eval_image_loss": 0.6610161066055298, "eval_image_runtime": 4.9948, "eval_image_samples_per_second": 800.828, "eval_image_steps_per_second": 1.602, "step": 5200 }, { "epoch": 0.31052191568135673, "eval_video_loss": 1.120626449584961, "eval_video_runtime": 76.2504, "eval_video_samples_per_second": 52.459, "eval_video_steps_per_second": 0.105, "step": 5200 }, { "epoch": 0.3105816314343724, "grad_norm": 2.4913926124572754, "learning_rate": 7.661070930926946e-06, "loss": 0.9044, "step": 5201 }, { "epoch": 0.310641347187388, "grad_norm": 1.98965585231781, "learning_rate": 7.660407404949904e-06, "loss": 0.9414, "step": 5202 }, { "epoch": 0.31070106294040367, "grad_norm": 2.110244035720825, "learning_rate": 7.659743878972862e-06, "loss": 0.877, "step": 5203 }, { "epoch": 0.31076077869341934, "grad_norm": 2.297973155975342, "learning_rate": 7.65908035299582e-06, "loss": 0.8976, "step": 5204 }, { "epoch": 0.31082049444643495, "grad_norm": 1.9841746091842651, "learning_rate": 7.658416827018778e-06, "loss": 0.9, "step": 5205 }, { "epoch": 0.3108802101994506, "grad_norm": 1.642600178718567, "learning_rate": 7.657753301041736e-06, "loss": 0.84, "step": 5206 }, { "epoch": 0.3109399259524663, "grad_norm": 2.6409385204315186, "learning_rate": 7.657089775064694e-06, "loss": 0.8261, "step": 5207 }, { "epoch": 0.3109996417054819, "grad_norm": 3.948065757751465, "learning_rate": 7.656426249087653e-06, "loss": 0.9068, "step": 5208 }, { "epoch": 0.31105935745849755, "grad_norm": 2.1655795574188232, "learning_rate": 7.65576272311061e-06, "loss": 0.9005, "step": 5209 }, { "epoch": 0.3111190732115132, "grad_norm": 2.3780035972595215, "learning_rate": 7.655099197133569e-06, "loss": 0.8879, "step": 5210 }, { "epoch": 0.31117878896452883, "grad_norm": 1.9875189065933228, "learning_rate": 7.654435671156525e-06, "loss": 0.8897, "step": 5211 }, { "epoch": 0.3112385047175445, "grad_norm": 1.683472752571106, "learning_rate": 7.653772145179485e-06, "loss": 0.9372, "step": 5212 }, { "epoch": 0.31129822047056016, "grad_norm": 1.9111213684082031, "learning_rate": 7.653108619202443e-06, "loss": 0.8626, "step": 5213 }, { "epoch": 0.31135793622357577, "grad_norm": 2.236070394515991, "learning_rate": 7.6524450932254e-06, "loss": 0.8876, "step": 5214 }, { "epoch": 0.31141765197659144, "grad_norm": 2.154597759246826, "learning_rate": 7.65178156724836e-06, "loss": 0.9129, "step": 5215 }, { "epoch": 0.31147736772960705, "grad_norm": 2.602966547012329, "learning_rate": 7.651118041271316e-06, "loss": 0.8951, "step": 5216 }, { "epoch": 0.3115370834826227, "grad_norm": 2.7970640659332275, "learning_rate": 7.650454515294274e-06, "loss": 0.8782, "step": 5217 }, { "epoch": 0.3115967992356384, "grad_norm": 2.9633002281188965, "learning_rate": 7.649790989317234e-06, "loss": 0.885, "step": 5218 }, { "epoch": 0.311656514988654, "grad_norm": 4.6209940910339355, "learning_rate": 7.64912746334019e-06, "loss": 0.8962, "step": 5219 }, { "epoch": 0.31171623074166965, "grad_norm": 7.152946949005127, "learning_rate": 7.648463937363148e-06, "loss": 0.9137, "step": 5220 }, { "epoch": 0.3117759464946853, "grad_norm": 2.140164375305176, "learning_rate": 7.647800411386106e-06, "loss": 0.9016, "step": 5221 }, { "epoch": 0.31183566224770093, "grad_norm": 3.637774705886841, "learning_rate": 7.647136885409064e-06, "loss": 0.882, "step": 5222 }, { "epoch": 0.3118953780007166, "grad_norm": 1.9370265007019043, "learning_rate": 7.646473359432022e-06, "loss": 0.8835, "step": 5223 }, { "epoch": 0.31195509375373226, "grad_norm": 2.288975238800049, "learning_rate": 7.64580983345498e-06, "loss": 0.9306, "step": 5224 }, { "epoch": 0.31201480950674787, "grad_norm": 2.6955020427703857, "learning_rate": 7.645146307477939e-06, "loss": 0.9028, "step": 5225 }, { "epoch": 0.31207452525976354, "grad_norm": 3.854302406311035, "learning_rate": 7.644482781500897e-06, "loss": 0.8475, "step": 5226 }, { "epoch": 0.31213424101277915, "grad_norm": 2.132274627685547, "learning_rate": 7.643819255523855e-06, "loss": 0.89, "step": 5227 }, { "epoch": 0.3121939567657948, "grad_norm": 3.681072950363159, "learning_rate": 7.643155729546813e-06, "loss": 0.8948, "step": 5228 }, { "epoch": 0.3122536725188105, "grad_norm": 3.249323606491089, "learning_rate": 7.642492203569771e-06, "loss": 0.9541, "step": 5229 }, { "epoch": 0.3123133882718261, "grad_norm": 1.8296173810958862, "learning_rate": 7.641828677592729e-06, "loss": 0.897, "step": 5230 }, { "epoch": 0.31237310402484175, "grad_norm": 3.2617719173431396, "learning_rate": 7.641165151615685e-06, "loss": 0.9166, "step": 5231 }, { "epoch": 0.3124328197778574, "grad_norm": 2.2428131103515625, "learning_rate": 7.640501625638644e-06, "loss": 0.9163, "step": 5232 }, { "epoch": 0.31249253553087303, "grad_norm": 8.631561279296875, "learning_rate": 7.639838099661603e-06, "loss": 0.9356, "step": 5233 }, { "epoch": 0.3125522512838887, "grad_norm": 2.828336477279663, "learning_rate": 7.63917457368456e-06, "loss": 0.9241, "step": 5234 }, { "epoch": 0.31261196703690436, "grad_norm": 2.3513424396514893, "learning_rate": 7.638511047707518e-06, "loss": 0.8367, "step": 5235 }, { "epoch": 0.31267168278991997, "grad_norm": 2.2209198474884033, "learning_rate": 7.637847521730476e-06, "loss": 0.8623, "step": 5236 }, { "epoch": 0.31273139854293563, "grad_norm": 2.5934829711914062, "learning_rate": 7.637183995753434e-06, "loss": 0.8987, "step": 5237 }, { "epoch": 0.31279111429595124, "grad_norm": 6.474710941314697, "learning_rate": 7.636520469776392e-06, "loss": 0.9299, "step": 5238 }, { "epoch": 0.3128508300489669, "grad_norm": 1.9798541069030762, "learning_rate": 7.63585694379935e-06, "loss": 0.8867, "step": 5239 }, { "epoch": 0.3129105458019826, "grad_norm": 1.7170166969299316, "learning_rate": 7.635193417822308e-06, "loss": 0.854, "step": 5240 }, { "epoch": 0.3129702615549982, "grad_norm": 3.4404823780059814, "learning_rate": 7.634529891845266e-06, "loss": 0.9178, "step": 5241 }, { "epoch": 0.31302997730801385, "grad_norm": 2.7493574619293213, "learning_rate": 7.633866365868225e-06, "loss": 0.8854, "step": 5242 }, { "epoch": 0.3130896930610295, "grad_norm": 1.9821187257766724, "learning_rate": 7.633202839891183e-06, "loss": 0.8782, "step": 5243 }, { "epoch": 0.3131494088140451, "grad_norm": 3.277785062789917, "learning_rate": 7.63253931391414e-06, "loss": 0.8962, "step": 5244 }, { "epoch": 0.3132091245670608, "grad_norm": 2.5698294639587402, "learning_rate": 7.631875787937099e-06, "loss": 0.8536, "step": 5245 }, { "epoch": 0.31326884032007646, "grad_norm": 2.321012258529663, "learning_rate": 7.631212261960055e-06, "loss": 0.9118, "step": 5246 }, { "epoch": 0.31332855607309207, "grad_norm": 2.938652276992798, "learning_rate": 7.630548735983015e-06, "loss": 0.9171, "step": 5247 }, { "epoch": 0.31338827182610773, "grad_norm": 3.5981993675231934, "learning_rate": 7.629885210005973e-06, "loss": 0.8628, "step": 5248 }, { "epoch": 0.3134479875791234, "grad_norm": 2.48905611038208, "learning_rate": 7.6292216840289305e-06, "loss": 0.9284, "step": 5249 }, { "epoch": 0.313507703332139, "grad_norm": 1.8405243158340454, "learning_rate": 7.6285581580518886e-06, "loss": 0.9128, "step": 5250 }, { "epoch": 0.3135674190851547, "grad_norm": 2.456484079360962, "learning_rate": 7.627894632074846e-06, "loss": 0.8776, "step": 5251 }, { "epoch": 0.3136271348381703, "grad_norm": 1.7293546199798584, "learning_rate": 7.627231106097805e-06, "loss": 0.8982, "step": 5252 }, { "epoch": 0.31368685059118595, "grad_norm": 5.072652816772461, "learning_rate": 7.626567580120763e-06, "loss": 0.894, "step": 5253 }, { "epoch": 0.3137465663442016, "grad_norm": 2.5830862522125244, "learning_rate": 7.62590405414372e-06, "loss": 0.8999, "step": 5254 }, { "epoch": 0.3138062820972172, "grad_norm": 2.2388999462127686, "learning_rate": 7.625240528166678e-06, "loss": 0.9019, "step": 5255 }, { "epoch": 0.3138659978502329, "grad_norm": 2.055115222930908, "learning_rate": 7.624577002189636e-06, "loss": 0.8756, "step": 5256 }, { "epoch": 0.31392571360324856, "grad_norm": 3.080554962158203, "learning_rate": 7.623913476212594e-06, "loss": 0.8875, "step": 5257 }, { "epoch": 0.31398542935626417, "grad_norm": 2.4722344875335693, "learning_rate": 7.6232499502355525e-06, "loss": 0.8983, "step": 5258 }, { "epoch": 0.31404514510927983, "grad_norm": 2.7527260780334473, "learning_rate": 7.62258642425851e-06, "loss": 0.9033, "step": 5259 }, { "epoch": 0.3141048608622955, "grad_norm": 2.4425859451293945, "learning_rate": 7.621922898281469e-06, "loss": 0.8589, "step": 5260 }, { "epoch": 0.3141645766153111, "grad_norm": 1.6770634651184082, "learning_rate": 7.621259372304426e-06, "loss": 0.8627, "step": 5261 }, { "epoch": 0.3142242923683268, "grad_norm": 2.2344202995300293, "learning_rate": 7.620595846327384e-06, "loss": 0.9235, "step": 5262 }, { "epoch": 0.3142840081213424, "grad_norm": 1.9492321014404297, "learning_rate": 7.619932320350343e-06, "loss": 0.8966, "step": 5263 }, { "epoch": 0.31434372387435805, "grad_norm": 1.8987939357757568, "learning_rate": 7.6192687943733e-06, "loss": 0.876, "step": 5264 }, { "epoch": 0.3144034396273737, "grad_norm": 2.1339476108551025, "learning_rate": 7.618605268396258e-06, "loss": 0.9083, "step": 5265 }, { "epoch": 0.3144631553803893, "grad_norm": 2.1341264247894287, "learning_rate": 7.617941742419216e-06, "loss": 0.9235, "step": 5266 }, { "epoch": 0.314522871133405, "grad_norm": 2.614361047744751, "learning_rate": 7.6172782164421746e-06, "loss": 0.9066, "step": 5267 }, { "epoch": 0.31458258688642066, "grad_norm": 4.258636951446533, "learning_rate": 7.616614690465133e-06, "loss": 0.8984, "step": 5268 }, { "epoch": 0.31464230263943627, "grad_norm": 2.042102098464966, "learning_rate": 7.61595116448809e-06, "loss": 0.9036, "step": 5269 }, { "epoch": 0.31470201839245193, "grad_norm": 2.854508638381958, "learning_rate": 7.615287638511049e-06, "loss": 0.8689, "step": 5270 }, { "epoch": 0.3147617341454676, "grad_norm": 2.009281873703003, "learning_rate": 7.614624112534006e-06, "loss": 0.8945, "step": 5271 }, { "epoch": 0.3148214498984832, "grad_norm": 2.0892703533172607, "learning_rate": 7.613960586556964e-06, "loss": 0.9169, "step": 5272 }, { "epoch": 0.3148811656514989, "grad_norm": 2.537142515182495, "learning_rate": 7.613297060579923e-06, "loss": 0.8665, "step": 5273 }, { "epoch": 0.3149408814045145, "grad_norm": 1.8853540420532227, "learning_rate": 7.61263353460288e-06, "loss": 0.8936, "step": 5274 }, { "epoch": 0.31500059715753015, "grad_norm": 2.680452585220337, "learning_rate": 7.6119700086258385e-06, "loss": 0.9029, "step": 5275 }, { "epoch": 0.3150603129105458, "grad_norm": 2.9230198860168457, "learning_rate": 7.611306482648796e-06, "loss": 0.9068, "step": 5276 }, { "epoch": 0.3151200286635614, "grad_norm": 1.9096204042434692, "learning_rate": 7.610642956671755e-06, "loss": 0.913, "step": 5277 }, { "epoch": 0.3151797444165771, "grad_norm": 2.9100725650787354, "learning_rate": 7.609979430694713e-06, "loss": 0.9039, "step": 5278 }, { "epoch": 0.31523946016959276, "grad_norm": 3.0080506801605225, "learning_rate": 7.60931590471767e-06, "loss": 0.9152, "step": 5279 }, { "epoch": 0.31529917592260837, "grad_norm": 2.1641225814819336, "learning_rate": 7.608652378740628e-06, "loss": 0.9094, "step": 5280 }, { "epoch": 0.31535889167562403, "grad_norm": 1.9708622694015503, "learning_rate": 7.607988852763586e-06, "loss": 0.8902, "step": 5281 }, { "epoch": 0.3154186074286397, "grad_norm": 2.1599314212799072, "learning_rate": 7.607325326786544e-06, "loss": 0.9021, "step": 5282 }, { "epoch": 0.3154783231816553, "grad_norm": 2.1324448585510254, "learning_rate": 7.6066618008095025e-06, "loss": 0.8825, "step": 5283 }, { "epoch": 0.315538038934671, "grad_norm": 2.5417487621307373, "learning_rate": 7.60599827483246e-06, "loss": 0.9202, "step": 5284 }, { "epoch": 0.31559775468768664, "grad_norm": 2.115715980529785, "learning_rate": 7.605334748855419e-06, "loss": 0.9055, "step": 5285 }, { "epoch": 0.31565747044070225, "grad_norm": 1.8453433513641357, "learning_rate": 7.604671222878376e-06, "loss": 0.8655, "step": 5286 }, { "epoch": 0.3157171861937179, "grad_norm": 1.8804341554641724, "learning_rate": 7.604007696901334e-06, "loss": 0.8874, "step": 5287 }, { "epoch": 0.3157769019467335, "grad_norm": 2.8977603912353516, "learning_rate": 7.603344170924293e-06, "loss": 0.8939, "step": 5288 }, { "epoch": 0.3158366176997492, "grad_norm": 2.6576650142669678, "learning_rate": 7.60268064494725e-06, "loss": 0.902, "step": 5289 }, { "epoch": 0.31589633345276485, "grad_norm": 1.9388939142227173, "learning_rate": 7.602017118970208e-06, "loss": 0.8892, "step": 5290 }, { "epoch": 0.31595604920578046, "grad_norm": 2.7507081031799316, "learning_rate": 7.601353592993166e-06, "loss": 0.9073, "step": 5291 }, { "epoch": 0.31601576495879613, "grad_norm": 3.3252339363098145, "learning_rate": 7.6006900670161245e-06, "loss": 0.9064, "step": 5292 }, { "epoch": 0.3160754807118118, "grad_norm": 3.0030252933502197, "learning_rate": 7.600026541039083e-06, "loss": 0.9032, "step": 5293 }, { "epoch": 0.3161351964648274, "grad_norm": 1.5281343460083008, "learning_rate": 7.59936301506204e-06, "loss": 0.8799, "step": 5294 }, { "epoch": 0.31619491221784307, "grad_norm": 1.77337646484375, "learning_rate": 7.598699489084999e-06, "loss": 0.8422, "step": 5295 }, { "epoch": 0.31625462797085874, "grad_norm": 2.1689445972442627, "learning_rate": 7.598035963107956e-06, "loss": 0.8667, "step": 5296 }, { "epoch": 0.31631434372387435, "grad_norm": 2.6767239570617676, "learning_rate": 7.597372437130914e-06, "loss": 0.9057, "step": 5297 }, { "epoch": 0.31637405947689, "grad_norm": 1.6239213943481445, "learning_rate": 7.596708911153873e-06, "loss": 0.9039, "step": 5298 }, { "epoch": 0.3164337752299056, "grad_norm": 3.5052976608276367, "learning_rate": 7.59604538517683e-06, "loss": 0.9014, "step": 5299 }, { "epoch": 0.3164934909829213, "grad_norm": 1.8436214923858643, "learning_rate": 7.5953818591997885e-06, "loss": 0.9184, "step": 5300 }, { "epoch": 0.31655320673593695, "grad_norm": 2.0756070613861084, "learning_rate": 7.594718333222746e-06, "loss": 0.9003, "step": 5301 }, { "epoch": 0.31661292248895256, "grad_norm": 3.144165277481079, "learning_rate": 7.594054807245705e-06, "loss": 0.8941, "step": 5302 }, { "epoch": 0.31667263824196823, "grad_norm": 3.100221872329712, "learning_rate": 7.593391281268663e-06, "loss": 0.9131, "step": 5303 }, { "epoch": 0.3167323539949839, "grad_norm": 1.9181424379348755, "learning_rate": 7.59272775529162e-06, "loss": 0.8799, "step": 5304 }, { "epoch": 0.3167920697479995, "grad_norm": 2.2583329677581787, "learning_rate": 7.592064229314578e-06, "loss": 0.8845, "step": 5305 }, { "epoch": 0.31685178550101517, "grad_norm": 2.7640433311462402, "learning_rate": 7.591400703337536e-06, "loss": 0.8875, "step": 5306 }, { "epoch": 0.31691150125403084, "grad_norm": 1.8527032136917114, "learning_rate": 7.590737177360494e-06, "loss": 0.8605, "step": 5307 }, { "epoch": 0.31697121700704645, "grad_norm": 2.4088199138641357, "learning_rate": 7.5900736513834524e-06, "loss": 0.8918, "step": 5308 }, { "epoch": 0.3170309327600621, "grad_norm": 3.058117628097534, "learning_rate": 7.58941012540641e-06, "loss": 0.8961, "step": 5309 }, { "epoch": 0.3170906485130778, "grad_norm": 3.538848638534546, "learning_rate": 7.588746599429369e-06, "loss": 0.8973, "step": 5310 }, { "epoch": 0.3171503642660934, "grad_norm": 3.3894903659820557, "learning_rate": 7.588083073452326e-06, "loss": 0.8846, "step": 5311 }, { "epoch": 0.31721008001910905, "grad_norm": 2.7055959701538086, "learning_rate": 7.587419547475284e-06, "loss": 0.875, "step": 5312 }, { "epoch": 0.31726979577212466, "grad_norm": 2.2596380710601807, "learning_rate": 7.586756021498243e-06, "loss": 0.8652, "step": 5313 }, { "epoch": 0.31732951152514033, "grad_norm": 1.7287819385528564, "learning_rate": 7.5860924955212e-06, "loss": 0.835, "step": 5314 }, { "epoch": 0.317389227278156, "grad_norm": 2.0496013164520264, "learning_rate": 7.585428969544158e-06, "loss": 0.8878, "step": 5315 }, { "epoch": 0.3174489430311716, "grad_norm": 2.9417812824249268, "learning_rate": 7.5847654435671156e-06, "loss": 0.9364, "step": 5316 }, { "epoch": 0.31750865878418727, "grad_norm": 1.9493327140808105, "learning_rate": 7.5841019175900745e-06, "loss": 0.869, "step": 5317 }, { "epoch": 0.31756837453720294, "grad_norm": 2.29830265045166, "learning_rate": 7.583438391613033e-06, "loss": 0.9075, "step": 5318 }, { "epoch": 0.31762809029021855, "grad_norm": 2.565129280090332, "learning_rate": 7.58277486563599e-06, "loss": 0.8926, "step": 5319 }, { "epoch": 0.3176878060432342, "grad_norm": 2.979735851287842, "learning_rate": 7.582111339658949e-06, "loss": 0.8657, "step": 5320 }, { "epoch": 0.3177475217962499, "grad_norm": 2.3616769313812256, "learning_rate": 7.581447813681906e-06, "loss": 0.8874, "step": 5321 }, { "epoch": 0.3178072375492655, "grad_norm": 1.9192641973495483, "learning_rate": 7.580784287704864e-06, "loss": 0.8947, "step": 5322 }, { "epoch": 0.31786695330228115, "grad_norm": 6.560997486114502, "learning_rate": 7.580120761727823e-06, "loss": 0.9086, "step": 5323 }, { "epoch": 0.31792666905529676, "grad_norm": 2.3562986850738525, "learning_rate": 7.57945723575078e-06, "loss": 0.8691, "step": 5324 }, { "epoch": 0.31798638480831243, "grad_norm": 2.6685562133789062, "learning_rate": 7.5787937097737385e-06, "loss": 0.9282, "step": 5325 }, { "epoch": 0.3180461005613281, "grad_norm": 1.857616662979126, "learning_rate": 7.578130183796696e-06, "loss": 0.8791, "step": 5326 }, { "epoch": 0.3181058163143437, "grad_norm": 2.1000711917877197, "learning_rate": 7.577466657819655e-06, "loss": 0.8833, "step": 5327 }, { "epoch": 0.31816553206735937, "grad_norm": 2.235218048095703, "learning_rate": 7.576803131842613e-06, "loss": 0.8921, "step": 5328 }, { "epoch": 0.31822524782037503, "grad_norm": 3.9099509716033936, "learning_rate": 7.57613960586557e-06, "loss": 0.913, "step": 5329 }, { "epoch": 0.31828496357339064, "grad_norm": 2.881930112838745, "learning_rate": 7.575476079888528e-06, "loss": 0.8954, "step": 5330 }, { "epoch": 0.3183446793264063, "grad_norm": 1.7537413835525513, "learning_rate": 7.574812553911486e-06, "loss": 0.9208, "step": 5331 }, { "epoch": 0.318404395079422, "grad_norm": 1.696606159210205, "learning_rate": 7.574149027934444e-06, "loss": 0.8896, "step": 5332 }, { "epoch": 0.3184641108324376, "grad_norm": 3.015558958053589, "learning_rate": 7.573485501957402e-06, "loss": 0.902, "step": 5333 }, { "epoch": 0.31852382658545325, "grad_norm": 4.274018287658691, "learning_rate": 7.57282197598036e-06, "loss": 0.9095, "step": 5334 }, { "epoch": 0.31858354233846886, "grad_norm": 1.8066308498382568, "learning_rate": 7.572158450003319e-06, "loss": 0.8811, "step": 5335 }, { "epoch": 0.3186432580914845, "grad_norm": 1.9849176406860352, "learning_rate": 7.571494924026276e-06, "loss": 0.9192, "step": 5336 }, { "epoch": 0.3187029738445002, "grad_norm": 2.191946029663086, "learning_rate": 7.570831398049234e-06, "loss": 0.8849, "step": 5337 }, { "epoch": 0.3187626895975158, "grad_norm": 2.215773820877075, "learning_rate": 7.570167872072193e-06, "loss": 0.8812, "step": 5338 }, { "epoch": 0.31882240535053147, "grad_norm": 2.291332960128784, "learning_rate": 7.56950434609515e-06, "loss": 0.8907, "step": 5339 }, { "epoch": 0.31888212110354713, "grad_norm": 2.2838099002838135, "learning_rate": 7.568840820118108e-06, "loss": 0.9121, "step": 5340 }, { "epoch": 0.31894183685656274, "grad_norm": 3.82651424407959, "learning_rate": 7.5681772941410655e-06, "loss": 0.8788, "step": 5341 }, { "epoch": 0.3190015526095784, "grad_norm": 1.930860996246338, "learning_rate": 7.5675137681640245e-06, "loss": 0.8887, "step": 5342 }, { "epoch": 0.3190612683625941, "grad_norm": 2.528545618057251, "learning_rate": 7.566850242186983e-06, "loss": 0.9107, "step": 5343 }, { "epoch": 0.3191209841156097, "grad_norm": 2.3810958862304688, "learning_rate": 7.56618671620994e-06, "loss": 0.8939, "step": 5344 }, { "epoch": 0.31918069986862535, "grad_norm": 4.305899143218994, "learning_rate": 7.565523190232899e-06, "loss": 0.8683, "step": 5345 }, { "epoch": 0.319240415621641, "grad_norm": 2.4643471240997314, "learning_rate": 7.564859664255856e-06, "loss": 0.9259, "step": 5346 }, { "epoch": 0.3193001313746566, "grad_norm": 3.351911783218384, "learning_rate": 7.564196138278814e-06, "loss": 0.9026, "step": 5347 }, { "epoch": 0.3193598471276723, "grad_norm": 2.129814863204956, "learning_rate": 7.563532612301773e-06, "loss": 0.8828, "step": 5348 }, { "epoch": 0.3194195628806879, "grad_norm": 2.660891532897949, "learning_rate": 7.56286908632473e-06, "loss": 0.8817, "step": 5349 }, { "epoch": 0.31947927863370357, "grad_norm": 1.7149770259857178, "learning_rate": 7.5622055603476884e-06, "loss": 0.8977, "step": 5350 }, { "epoch": 0.31953899438671923, "grad_norm": 2.0057449340820312, "learning_rate": 7.561542034370646e-06, "loss": 0.8744, "step": 5351 }, { "epoch": 0.31959871013973484, "grad_norm": 1.9717295169830322, "learning_rate": 7.560878508393605e-06, "loss": 0.9018, "step": 5352 }, { "epoch": 0.3196584258927505, "grad_norm": 2.14253568649292, "learning_rate": 7.560214982416563e-06, "loss": 0.8694, "step": 5353 }, { "epoch": 0.3197181416457662, "grad_norm": 2.839756727218628, "learning_rate": 7.55955145643952e-06, "loss": 0.857, "step": 5354 }, { "epoch": 0.3197778573987818, "grad_norm": 2.231455087661743, "learning_rate": 7.558887930462478e-06, "loss": 0.8897, "step": 5355 }, { "epoch": 0.31983757315179745, "grad_norm": 4.266635417938232, "learning_rate": 7.558224404485436e-06, "loss": 0.9102, "step": 5356 }, { "epoch": 0.3198972889048131, "grad_norm": 1.9136559963226318, "learning_rate": 7.557560878508394e-06, "loss": 0.8678, "step": 5357 }, { "epoch": 0.3199570046578287, "grad_norm": 2.369750738143921, "learning_rate": 7.556897352531352e-06, "loss": 0.9479, "step": 5358 }, { "epoch": 0.3200167204108444, "grad_norm": 2.0259807109832764, "learning_rate": 7.55623382655431e-06, "loss": 0.8954, "step": 5359 }, { "epoch": 0.32007643616386, "grad_norm": 2.096723794937134, "learning_rate": 7.555570300577269e-06, "loss": 0.8745, "step": 5360 }, { "epoch": 0.32013615191687567, "grad_norm": 2.812842845916748, "learning_rate": 7.554906774600226e-06, "loss": 0.8938, "step": 5361 }, { "epoch": 0.32019586766989133, "grad_norm": 2.0092387199401855, "learning_rate": 7.554243248623184e-06, "loss": 0.888, "step": 5362 }, { "epoch": 0.32025558342290694, "grad_norm": 1.9475064277648926, "learning_rate": 7.553579722646143e-06, "loss": 0.8849, "step": 5363 }, { "epoch": 0.3203152991759226, "grad_norm": 3.3761205673217773, "learning_rate": 7.5529161966691e-06, "loss": 0.8982, "step": 5364 }, { "epoch": 0.3203750149289383, "grad_norm": 1.772983193397522, "learning_rate": 7.552252670692058e-06, "loss": 0.914, "step": 5365 }, { "epoch": 0.3204347306819539, "grad_norm": 2.920872449874878, "learning_rate": 7.5515891447150155e-06, "loss": 0.9255, "step": 5366 }, { "epoch": 0.32049444643496955, "grad_norm": 2.985398292541504, "learning_rate": 7.5509256187379744e-06, "loss": 0.8823, "step": 5367 }, { "epoch": 0.3205541621879852, "grad_norm": 1.8693703413009644, "learning_rate": 7.5502620927609325e-06, "loss": 0.9047, "step": 5368 }, { "epoch": 0.3206138779410008, "grad_norm": 1.8492487668991089, "learning_rate": 7.54959856678389e-06, "loss": 0.8452, "step": 5369 }, { "epoch": 0.3206735936940165, "grad_norm": 2.1664164066314697, "learning_rate": 7.548935040806849e-06, "loss": 0.8975, "step": 5370 }, { "epoch": 0.3207333094470321, "grad_norm": 3.1225175857543945, "learning_rate": 7.548271514829806e-06, "loss": 0.9266, "step": 5371 }, { "epoch": 0.32079302520004777, "grad_norm": 2.8453803062438965, "learning_rate": 7.547607988852764e-06, "loss": 0.9029, "step": 5372 }, { "epoch": 0.32085274095306343, "grad_norm": 1.8854562044143677, "learning_rate": 7.546944462875723e-06, "loss": 0.8658, "step": 5373 }, { "epoch": 0.32091245670607904, "grad_norm": 1.9977067708969116, "learning_rate": 7.54628093689868e-06, "loss": 0.9087, "step": 5374 }, { "epoch": 0.3209721724590947, "grad_norm": 2.63556170463562, "learning_rate": 7.545617410921638e-06, "loss": 0.896, "step": 5375 }, { "epoch": 0.3210318882121104, "grad_norm": 3.0034589767456055, "learning_rate": 7.544953884944596e-06, "loss": 0.9122, "step": 5376 }, { "epoch": 0.321091603965126, "grad_norm": 3.2262368202209473, "learning_rate": 7.544290358967555e-06, "loss": 0.9082, "step": 5377 }, { "epoch": 0.32115131971814165, "grad_norm": 2.5902976989746094, "learning_rate": 7.543626832990513e-06, "loss": 0.916, "step": 5378 }, { "epoch": 0.3212110354711573, "grad_norm": 2.3979060649871826, "learning_rate": 7.54296330701347e-06, "loss": 0.9001, "step": 5379 }, { "epoch": 0.3212707512241729, "grad_norm": 1.6867754459381104, "learning_rate": 7.542299781036428e-06, "loss": 0.9247, "step": 5380 }, { "epoch": 0.3213304669771886, "grad_norm": 2.4820382595062256, "learning_rate": 7.541636255059386e-06, "loss": 0.8925, "step": 5381 }, { "epoch": 0.32139018273020425, "grad_norm": 1.9979314804077148, "learning_rate": 7.540972729082344e-06, "loss": 0.9181, "step": 5382 }, { "epoch": 0.32144989848321986, "grad_norm": 1.6673316955566406, "learning_rate": 7.540309203105302e-06, "loss": 0.9255, "step": 5383 }, { "epoch": 0.32150961423623553, "grad_norm": 2.6355061531066895, "learning_rate": 7.53964567712826e-06, "loss": 0.8632, "step": 5384 }, { "epoch": 0.32156932998925114, "grad_norm": 1.8982092142105103, "learning_rate": 7.5389821511512186e-06, "loss": 0.8963, "step": 5385 }, { "epoch": 0.3216290457422668, "grad_norm": 2.4531383514404297, "learning_rate": 7.538318625174176e-06, "loss": 0.9402, "step": 5386 }, { "epoch": 0.32168876149528247, "grad_norm": 2.2166569232940674, "learning_rate": 7.537655099197134e-06, "loss": 0.9137, "step": 5387 }, { "epoch": 0.3217484772482981, "grad_norm": 5.789676666259766, "learning_rate": 7.536991573220093e-06, "loss": 0.8865, "step": 5388 }, { "epoch": 0.32180819300131375, "grad_norm": 2.126973867416382, "learning_rate": 7.53632804724305e-06, "loss": 0.9004, "step": 5389 }, { "epoch": 0.3218679087543294, "grad_norm": 1.9732792377471924, "learning_rate": 7.535664521266008e-06, "loss": 0.8714, "step": 5390 }, { "epoch": 0.321927624507345, "grad_norm": 3.088765859603882, "learning_rate": 7.5350009952889655e-06, "loss": 0.8772, "step": 5391 }, { "epoch": 0.3219873402603607, "grad_norm": 2.593226671218872, "learning_rate": 7.534337469311924e-06, "loss": 0.917, "step": 5392 }, { "epoch": 0.32204705601337635, "grad_norm": 1.9153152704238892, "learning_rate": 7.5336739433348825e-06, "loss": 0.8985, "step": 5393 }, { "epoch": 0.32210677176639196, "grad_norm": 2.5230064392089844, "learning_rate": 7.53301041735784e-06, "loss": 0.8936, "step": 5394 }, { "epoch": 0.32216648751940763, "grad_norm": 2.2637622356414795, "learning_rate": 7.532346891380799e-06, "loss": 0.8801, "step": 5395 }, { "epoch": 0.32222620327242324, "grad_norm": 1.7796505689620972, "learning_rate": 7.531683365403756e-06, "loss": 0.8559, "step": 5396 }, { "epoch": 0.3222859190254389, "grad_norm": 2.070354461669922, "learning_rate": 7.531019839426714e-06, "loss": 0.8736, "step": 5397 }, { "epoch": 0.32234563477845457, "grad_norm": 2.6855013370513916, "learning_rate": 7.530356313449673e-06, "loss": 0.9099, "step": 5398 }, { "epoch": 0.3224053505314702, "grad_norm": 2.088068962097168, "learning_rate": 7.52969278747263e-06, "loss": 0.9019, "step": 5399 }, { "epoch": 0.32246506628448585, "grad_norm": 1.9629172086715698, "learning_rate": 7.529029261495588e-06, "loss": 0.8856, "step": 5400 }, { "epoch": 0.32246506628448585, "eval_text_loss": 0.9365300536155701, "eval_text_runtime": 15.1806, "eval_text_samples_per_second": 263.494, "eval_text_steps_per_second": 0.527, "step": 5400 }, { "epoch": 0.32246506628448585, "eval_image_loss": 0.6580880284309387, "eval_image_runtime": 5.0505, "eval_image_samples_per_second": 791.997, "eval_image_steps_per_second": 1.584, "step": 5400 }, { "epoch": 0.32246506628448585, "eval_video_loss": 1.1160646677017212, "eval_video_runtime": 76.5131, "eval_video_samples_per_second": 52.279, "eval_video_steps_per_second": 0.105, "step": 5400 }, { "epoch": 0.3225247820375015, "grad_norm": 2.206156015396118, "learning_rate": 7.528365735518546e-06, "loss": 0.8474, "step": 5401 }, { "epoch": 0.3225844977905171, "grad_norm": 1.9147791862487793, "learning_rate": 7.527702209541505e-06, "loss": 0.8753, "step": 5402 }, { "epoch": 0.3226442135435328, "grad_norm": 1.978491187095642, "learning_rate": 7.527038683564463e-06, "loss": 0.9191, "step": 5403 }, { "epoch": 0.32270392929654845, "grad_norm": 2.5577356815338135, "learning_rate": 7.52637515758742e-06, "loss": 0.8877, "step": 5404 }, { "epoch": 0.32276364504956406, "grad_norm": 2.252964496612549, "learning_rate": 7.525711631610378e-06, "loss": 0.8709, "step": 5405 }, { "epoch": 0.32282336080257973, "grad_norm": 3.9189484119415283, "learning_rate": 7.525048105633336e-06, "loss": 0.9031, "step": 5406 }, { "epoch": 0.32288307655559534, "grad_norm": 2.20855975151062, "learning_rate": 7.524384579656294e-06, "loss": 0.9449, "step": 5407 }, { "epoch": 0.322942792308611, "grad_norm": 6.176812648773193, "learning_rate": 7.523721053679252e-06, "loss": 0.9089, "step": 5408 }, { "epoch": 0.32300250806162667, "grad_norm": 2.1496498584747314, "learning_rate": 7.52305752770221e-06, "loss": 0.9034, "step": 5409 }, { "epoch": 0.3230622238146423, "grad_norm": 3.0700607299804688, "learning_rate": 7.5223940017251685e-06, "loss": 0.8859, "step": 5410 }, { "epoch": 0.32312193956765795, "grad_norm": 2.474290609359741, "learning_rate": 7.521730475748126e-06, "loss": 0.9034, "step": 5411 }, { "epoch": 0.3231816553206736, "grad_norm": 1.8397071361541748, "learning_rate": 7.521066949771084e-06, "loss": 0.8541, "step": 5412 }, { "epoch": 0.3232413710736892, "grad_norm": 2.132077932357788, "learning_rate": 7.520403423794043e-06, "loss": 0.8407, "step": 5413 }, { "epoch": 0.3233010868267049, "grad_norm": 2.8515632152557373, "learning_rate": 7.519739897817e-06, "loss": 0.8887, "step": 5414 }, { "epoch": 0.32336080257972055, "grad_norm": 2.272683620452881, "learning_rate": 7.519076371839958e-06, "loss": 0.882, "step": 5415 }, { "epoch": 0.32342051833273616, "grad_norm": 2.110919952392578, "learning_rate": 7.5184128458629154e-06, "loss": 0.8632, "step": 5416 }, { "epoch": 0.3234802340857518, "grad_norm": 2.1560957431793213, "learning_rate": 7.517749319885874e-06, "loss": 0.8789, "step": 5417 }, { "epoch": 0.3235399498387675, "grad_norm": 2.4726738929748535, "learning_rate": 7.5170857939088325e-06, "loss": 0.8634, "step": 5418 }, { "epoch": 0.3235996655917831, "grad_norm": 3.276217460632324, "learning_rate": 7.51642226793179e-06, "loss": 0.8611, "step": 5419 }, { "epoch": 0.32365938134479877, "grad_norm": 1.9250761270523071, "learning_rate": 7.515758741954749e-06, "loss": 0.8936, "step": 5420 }, { "epoch": 0.3237190970978144, "grad_norm": 5.229334831237793, "learning_rate": 7.515095215977706e-06, "loss": 0.9151, "step": 5421 }, { "epoch": 0.32377881285083004, "grad_norm": 1.9529441595077515, "learning_rate": 7.514431690000664e-06, "loss": 0.9093, "step": 5422 }, { "epoch": 0.3238385286038457, "grad_norm": 2.3824071884155273, "learning_rate": 7.513768164023623e-06, "loss": 0.9121, "step": 5423 }, { "epoch": 0.3238982443568613, "grad_norm": 1.967246174812317, "learning_rate": 7.51310463804658e-06, "loss": 0.9015, "step": 5424 }, { "epoch": 0.323957960109877, "grad_norm": 2.928720474243164, "learning_rate": 7.512441112069538e-06, "loss": 0.8926, "step": 5425 }, { "epoch": 0.32401767586289265, "grad_norm": 2.2966275215148926, "learning_rate": 7.511777586092496e-06, "loss": 0.934, "step": 5426 }, { "epoch": 0.32407739161590826, "grad_norm": 2.150036096572876, "learning_rate": 7.5111140601154545e-06, "loss": 0.8611, "step": 5427 }, { "epoch": 0.3241371073689239, "grad_norm": 1.9519853591918945, "learning_rate": 7.510450534138413e-06, "loss": 0.9044, "step": 5428 }, { "epoch": 0.3241968231219396, "grad_norm": 2.589520215988159, "learning_rate": 7.50978700816137e-06, "loss": 0.895, "step": 5429 }, { "epoch": 0.3242565388749552, "grad_norm": 1.649460792541504, "learning_rate": 7.509123482184328e-06, "loss": 0.8575, "step": 5430 }, { "epoch": 0.32431625462797087, "grad_norm": 1.9318671226501465, "learning_rate": 7.508459956207286e-06, "loss": 0.8761, "step": 5431 }, { "epoch": 0.3243759703809865, "grad_norm": 3.7227485179901123, "learning_rate": 7.507796430230244e-06, "loss": 0.9092, "step": 5432 }, { "epoch": 0.32443568613400214, "grad_norm": 1.9422742128372192, "learning_rate": 7.507132904253202e-06, "loss": 0.8637, "step": 5433 }, { "epoch": 0.3244954018870178, "grad_norm": 2.857527732849121, "learning_rate": 7.5064693782761596e-06, "loss": 0.8544, "step": 5434 }, { "epoch": 0.3245551176400334, "grad_norm": 1.7483694553375244, "learning_rate": 7.5058058522991185e-06, "loss": 0.8892, "step": 5435 }, { "epoch": 0.3246148333930491, "grad_norm": 2.14967942237854, "learning_rate": 7.505142326322076e-06, "loss": 0.8446, "step": 5436 }, { "epoch": 0.32467454914606475, "grad_norm": 2.2686893939971924, "learning_rate": 7.504478800345034e-06, "loss": 0.8567, "step": 5437 }, { "epoch": 0.32473426489908036, "grad_norm": 2.109600067138672, "learning_rate": 7.503815274367993e-06, "loss": 0.9185, "step": 5438 }, { "epoch": 0.324793980652096, "grad_norm": 2.4526641368865967, "learning_rate": 7.50315174839095e-06, "loss": 0.858, "step": 5439 }, { "epoch": 0.3248536964051117, "grad_norm": 2.0866401195526123, "learning_rate": 7.502488222413908e-06, "loss": 0.8704, "step": 5440 }, { "epoch": 0.3249134121581273, "grad_norm": 2.009739637374878, "learning_rate": 7.501824696436865e-06, "loss": 0.8611, "step": 5441 }, { "epoch": 0.32497312791114297, "grad_norm": 2.8711037635803223, "learning_rate": 7.501161170459824e-06, "loss": 0.8792, "step": 5442 }, { "epoch": 0.32503284366415863, "grad_norm": 1.7480610609054565, "learning_rate": 7.5004976444827825e-06, "loss": 0.9095, "step": 5443 }, { "epoch": 0.32509255941717424, "grad_norm": 2.0444347858428955, "learning_rate": 7.49983411850574e-06, "loss": 0.8947, "step": 5444 }, { "epoch": 0.3251522751701899, "grad_norm": 2.1290295124053955, "learning_rate": 7.499170592528699e-06, "loss": 0.8959, "step": 5445 }, { "epoch": 0.3252119909232055, "grad_norm": 2.011803150177002, "learning_rate": 7.498507066551656e-06, "loss": 0.8757, "step": 5446 }, { "epoch": 0.3252717066762212, "grad_norm": 3.7297730445861816, "learning_rate": 7.497843540574614e-06, "loss": 0.8678, "step": 5447 }, { "epoch": 0.32533142242923685, "grad_norm": 5.98937463760376, "learning_rate": 7.497180014597573e-06, "loss": 0.8797, "step": 5448 }, { "epoch": 0.32539113818225246, "grad_norm": 3.027599811553955, "learning_rate": 7.49651648862053e-06, "loss": 0.8754, "step": 5449 }, { "epoch": 0.3254508539352681, "grad_norm": 2.3020737171173096, "learning_rate": 7.495852962643488e-06, "loss": 0.879, "step": 5450 }, { "epoch": 0.3255105696882838, "grad_norm": 1.7733263969421387, "learning_rate": 7.4951894366664456e-06, "loss": 0.8684, "step": 5451 }, { "epoch": 0.3255702854412994, "grad_norm": 6.737615585327148, "learning_rate": 7.4945259106894045e-06, "loss": 0.8771, "step": 5452 }, { "epoch": 0.32563000119431507, "grad_norm": 7.072876453399658, "learning_rate": 7.493862384712363e-06, "loss": 0.872, "step": 5453 }, { "epoch": 0.32568971694733073, "grad_norm": 4.006957054138184, "learning_rate": 7.49319885873532e-06, "loss": 0.8841, "step": 5454 }, { "epoch": 0.32574943270034634, "grad_norm": 2.2897374629974365, "learning_rate": 7.492535332758278e-06, "loss": 0.8996, "step": 5455 }, { "epoch": 0.325809148453362, "grad_norm": 2.7242937088012695, "learning_rate": 7.491871806781236e-06, "loss": 0.9089, "step": 5456 }, { "epoch": 0.3258688642063776, "grad_norm": 3.1196727752685547, "learning_rate": 7.491208280804194e-06, "loss": 0.8984, "step": 5457 }, { "epoch": 0.3259285799593933, "grad_norm": 1.8031895160675049, "learning_rate": 7.490544754827152e-06, "loss": 0.914, "step": 5458 }, { "epoch": 0.32598829571240895, "grad_norm": 1.747248888015747, "learning_rate": 7.4898812288501095e-06, "loss": 0.9177, "step": 5459 }, { "epoch": 0.32604801146542456, "grad_norm": 2.737386465072632, "learning_rate": 7.4892177028730685e-06, "loss": 0.9137, "step": 5460 }, { "epoch": 0.3261077272184402, "grad_norm": 2.040250062942505, "learning_rate": 7.488554176896026e-06, "loss": 0.8929, "step": 5461 }, { "epoch": 0.3261674429714559, "grad_norm": 2.662295341491699, "learning_rate": 7.487890650918984e-06, "loss": 0.8574, "step": 5462 }, { "epoch": 0.3262271587244715, "grad_norm": 2.594944953918457, "learning_rate": 7.487227124941943e-06, "loss": 0.8788, "step": 5463 }, { "epoch": 0.32628687447748717, "grad_norm": 1.6405967473983765, "learning_rate": 7.4865635989649e-06, "loss": 0.8837, "step": 5464 }, { "epoch": 0.32634659023050283, "grad_norm": 4.306271076202393, "learning_rate": 7.485900072987858e-06, "loss": 0.8671, "step": 5465 }, { "epoch": 0.32640630598351844, "grad_norm": 4.124363899230957, "learning_rate": 7.485236547010815e-06, "loss": 0.9088, "step": 5466 }, { "epoch": 0.3264660217365341, "grad_norm": 9.8016939163208, "learning_rate": 7.484573021033774e-06, "loss": 0.8584, "step": 5467 }, { "epoch": 0.3265257374895497, "grad_norm": 1.603459119796753, "learning_rate": 7.4839094950567324e-06, "loss": 0.869, "step": 5468 }, { "epoch": 0.3265854532425654, "grad_norm": 2.1582069396972656, "learning_rate": 7.48324596907969e-06, "loss": 0.9007, "step": 5469 }, { "epoch": 0.32664516899558105, "grad_norm": 4.3087921142578125, "learning_rate": 7.482582443102649e-06, "loss": 0.8999, "step": 5470 }, { "epoch": 0.32670488474859666, "grad_norm": 1.7272992134094238, "learning_rate": 7.481918917125606e-06, "loss": 0.8768, "step": 5471 }, { "epoch": 0.3267646005016123, "grad_norm": 2.902536392211914, "learning_rate": 7.481255391148564e-06, "loss": 0.9028, "step": 5472 }, { "epoch": 0.326824316254628, "grad_norm": 1.9111456871032715, "learning_rate": 7.480591865171523e-06, "loss": 0.9071, "step": 5473 }, { "epoch": 0.3268840320076436, "grad_norm": 3.351393938064575, "learning_rate": 7.47992833919448e-06, "loss": 0.9235, "step": 5474 }, { "epoch": 0.32694374776065926, "grad_norm": 2.469428539276123, "learning_rate": 7.479264813217438e-06, "loss": 0.8887, "step": 5475 }, { "epoch": 0.32700346351367493, "grad_norm": 4.315464973449707, "learning_rate": 7.4786012872403955e-06, "loss": 0.901, "step": 5476 }, { "epoch": 0.32706317926669054, "grad_norm": 5.569596767425537, "learning_rate": 7.4779377612633545e-06, "loss": 0.9201, "step": 5477 }, { "epoch": 0.3271228950197062, "grad_norm": 2.0493104457855225, "learning_rate": 7.477274235286313e-06, "loss": 0.9037, "step": 5478 }, { "epoch": 0.32718261077272187, "grad_norm": 2.955252170562744, "learning_rate": 7.47661070930927e-06, "loss": 0.9233, "step": 5479 }, { "epoch": 0.3272423265257375, "grad_norm": 2.179239273071289, "learning_rate": 7.475947183332228e-06, "loss": 0.8873, "step": 5480 }, { "epoch": 0.32730204227875315, "grad_norm": 1.6770342588424683, "learning_rate": 7.475283657355186e-06, "loss": 0.8552, "step": 5481 }, { "epoch": 0.32736175803176876, "grad_norm": 2.8447437286376953, "learning_rate": 7.474620131378144e-06, "loss": 0.9402, "step": 5482 }, { "epoch": 0.3274214737847844, "grad_norm": 2.260632276535034, "learning_rate": 7.473956605401102e-06, "loss": 0.9033, "step": 5483 }, { "epoch": 0.3274811895378001, "grad_norm": 3.085319995880127, "learning_rate": 7.4732930794240595e-06, "loss": 0.875, "step": 5484 }, { "epoch": 0.3275409052908157, "grad_norm": 1.960575819015503, "learning_rate": 7.4726295534470184e-06, "loss": 0.8611, "step": 5485 }, { "epoch": 0.32760062104383136, "grad_norm": 1.8102222681045532, "learning_rate": 7.471966027469976e-06, "loss": 0.8565, "step": 5486 }, { "epoch": 0.32766033679684703, "grad_norm": 2.2453463077545166, "learning_rate": 7.471302501492934e-06, "loss": 0.9203, "step": 5487 }, { "epoch": 0.32772005254986264, "grad_norm": 2.0648996829986572, "learning_rate": 7.470638975515893e-06, "loss": 0.8816, "step": 5488 }, { "epoch": 0.3277797683028783, "grad_norm": 2.623427629470825, "learning_rate": 7.46997544953885e-06, "loss": 0.9054, "step": 5489 }, { "epoch": 0.32783948405589397, "grad_norm": 1.8687328100204468, "learning_rate": 7.469311923561808e-06, "loss": 0.93, "step": 5490 }, { "epoch": 0.3278991998089096, "grad_norm": 2.0719165802001953, "learning_rate": 7.468648397584765e-06, "loss": 0.8874, "step": 5491 }, { "epoch": 0.32795891556192525, "grad_norm": 2.5108978748321533, "learning_rate": 7.467984871607724e-06, "loss": 0.8621, "step": 5492 }, { "epoch": 0.32801863131494086, "grad_norm": 2.5508487224578857, "learning_rate": 7.467321345630682e-06, "loss": 0.9145, "step": 5493 }, { "epoch": 0.3280783470679565, "grad_norm": 5.383915424346924, "learning_rate": 7.46665781965364e-06, "loss": 0.8943, "step": 5494 }, { "epoch": 0.3281380628209722, "grad_norm": 3.184687852859497, "learning_rate": 7.465994293676599e-06, "loss": 0.9081, "step": 5495 }, { "epoch": 0.3281977785739878, "grad_norm": 1.833663821220398, "learning_rate": 7.465330767699556e-06, "loss": 0.8736, "step": 5496 }, { "epoch": 0.32825749432700346, "grad_norm": 2.4475326538085938, "learning_rate": 7.464667241722514e-06, "loss": 0.9296, "step": 5497 }, { "epoch": 0.32831721008001913, "grad_norm": 2.334169864654541, "learning_rate": 7.464003715745473e-06, "loss": 0.8925, "step": 5498 }, { "epoch": 0.32837692583303474, "grad_norm": 2.2890782356262207, "learning_rate": 7.46334018976843e-06, "loss": 0.9004, "step": 5499 }, { "epoch": 0.3284366415860504, "grad_norm": 1.7716180086135864, "learning_rate": 7.462676663791388e-06, "loss": 0.8966, "step": 5500 }, { "epoch": 0.32849635733906607, "grad_norm": 1.9632560014724731, "learning_rate": 7.4620131378143455e-06, "loss": 0.8806, "step": 5501 }, { "epoch": 0.3285560730920817, "grad_norm": 2.826070547103882, "learning_rate": 7.4613496118373045e-06, "loss": 0.9463, "step": 5502 }, { "epoch": 0.32861578884509735, "grad_norm": 2.8265790939331055, "learning_rate": 7.4606860858602626e-06, "loss": 0.904, "step": 5503 }, { "epoch": 0.32867550459811296, "grad_norm": 2.7384746074676514, "learning_rate": 7.46002255988322e-06, "loss": 0.8956, "step": 5504 }, { "epoch": 0.3287352203511286, "grad_norm": 2.5950636863708496, "learning_rate": 7.459359033906178e-06, "loss": 0.8886, "step": 5505 }, { "epoch": 0.3287949361041443, "grad_norm": 1.587059736251831, "learning_rate": 7.458695507929136e-06, "loss": 0.8613, "step": 5506 }, { "epoch": 0.3288546518571599, "grad_norm": 3.2606797218322754, "learning_rate": 7.458031981952094e-06, "loss": 0.9047, "step": 5507 }, { "epoch": 0.32891436761017556, "grad_norm": 1.7788606882095337, "learning_rate": 7.457368455975052e-06, "loss": 0.8616, "step": 5508 }, { "epoch": 0.3289740833631912, "grad_norm": 1.9899213314056396, "learning_rate": 7.4567049299980095e-06, "loss": 0.8968, "step": 5509 }, { "epoch": 0.32903379911620684, "grad_norm": 1.7570724487304688, "learning_rate": 7.456041404020968e-06, "loss": 0.921, "step": 5510 }, { "epoch": 0.3290935148692225, "grad_norm": 2.4573607444763184, "learning_rate": 7.455377878043926e-06, "loss": 0.8963, "step": 5511 }, { "epoch": 0.32915323062223817, "grad_norm": 3.356919527053833, "learning_rate": 7.454714352066884e-06, "loss": 0.9197, "step": 5512 }, { "epoch": 0.3292129463752538, "grad_norm": 2.033590316772461, "learning_rate": 7.454050826089843e-06, "loss": 0.8902, "step": 5513 }, { "epoch": 0.32927266212826944, "grad_norm": 2.0189461708068848, "learning_rate": 7.4533873001128e-06, "loss": 0.894, "step": 5514 }, { "epoch": 0.3293323778812851, "grad_norm": 1.9133187532424927, "learning_rate": 7.452723774135758e-06, "loss": 0.8916, "step": 5515 }, { "epoch": 0.3293920936343007, "grad_norm": 2.31476092338562, "learning_rate": 7.452060248158715e-06, "loss": 0.8697, "step": 5516 }, { "epoch": 0.3294518093873164, "grad_norm": 4.379959583282471, "learning_rate": 7.451396722181674e-06, "loss": 0.884, "step": 5517 }, { "epoch": 0.329511525140332, "grad_norm": 2.6626007556915283, "learning_rate": 7.450733196204632e-06, "loss": 0.894, "step": 5518 }, { "epoch": 0.32957124089334766, "grad_norm": 2.0634071826934814, "learning_rate": 7.45006967022759e-06, "loss": 0.8882, "step": 5519 }, { "epoch": 0.3296309566463633, "grad_norm": 1.9591732025146484, "learning_rate": 7.4494061442505486e-06, "loss": 0.8586, "step": 5520 }, { "epoch": 0.32969067239937894, "grad_norm": 3.1061244010925293, "learning_rate": 7.448742618273506e-06, "loss": 0.9023, "step": 5521 }, { "epoch": 0.3297503881523946, "grad_norm": 2.6458425521850586, "learning_rate": 7.448079092296464e-06, "loss": 0.8887, "step": 5522 }, { "epoch": 0.32981010390541027, "grad_norm": 1.90631902217865, "learning_rate": 7.447415566319423e-06, "loss": 0.917, "step": 5523 }, { "epoch": 0.3298698196584259, "grad_norm": 4.718690395355225, "learning_rate": 7.44675204034238e-06, "loss": 0.9286, "step": 5524 }, { "epoch": 0.32992953541144154, "grad_norm": 1.6465998888015747, "learning_rate": 7.446088514365338e-06, "loss": 0.905, "step": 5525 }, { "epoch": 0.3299892511644572, "grad_norm": 1.9844592809677124, "learning_rate": 7.4454249883882955e-06, "loss": 0.9037, "step": 5526 }, { "epoch": 0.3300489669174728, "grad_norm": 1.8079783916473389, "learning_rate": 7.4447614624112544e-06, "loss": 0.9237, "step": 5527 }, { "epoch": 0.3301086826704885, "grad_norm": 2.642932415008545, "learning_rate": 7.4440979364342125e-06, "loss": 0.941, "step": 5528 }, { "epoch": 0.3301683984235041, "grad_norm": 2.966005325317383, "learning_rate": 7.44343441045717e-06, "loss": 0.8698, "step": 5529 }, { "epoch": 0.33022811417651976, "grad_norm": 7.54641056060791, "learning_rate": 7.442770884480128e-06, "loss": 0.8969, "step": 5530 }, { "epoch": 0.3302878299295354, "grad_norm": 1.67332923412323, "learning_rate": 7.442107358503086e-06, "loss": 0.8998, "step": 5531 }, { "epoch": 0.33034754568255104, "grad_norm": 2.4388906955718994, "learning_rate": 7.441443832526044e-06, "loss": 0.9096, "step": 5532 }, { "epoch": 0.3304072614355667, "grad_norm": 3.017958402633667, "learning_rate": 7.440780306549002e-06, "loss": 0.8901, "step": 5533 }, { "epoch": 0.33046697718858237, "grad_norm": 1.9975595474243164, "learning_rate": 7.4401167805719594e-06, "loss": 0.8801, "step": 5534 }, { "epoch": 0.330526692941598, "grad_norm": 3.3056480884552, "learning_rate": 7.439453254594918e-06, "loss": 0.8862, "step": 5535 }, { "epoch": 0.33058640869461364, "grad_norm": 2.433868408203125, "learning_rate": 7.438789728617876e-06, "loss": 0.8887, "step": 5536 }, { "epoch": 0.3306461244476293, "grad_norm": 2.6480908393859863, "learning_rate": 7.438126202640834e-06, "loss": 0.8865, "step": 5537 }, { "epoch": 0.3307058402006449, "grad_norm": 3.073256254196167, "learning_rate": 7.437462676663793e-06, "loss": 0.9102, "step": 5538 }, { "epoch": 0.3307655559536606, "grad_norm": 2.3229358196258545, "learning_rate": 7.43679915068675e-06, "loss": 0.9064, "step": 5539 }, { "epoch": 0.3308252717066762, "grad_norm": 2.411949872970581, "learning_rate": 7.436135624709708e-06, "loss": 0.8765, "step": 5540 }, { "epoch": 0.33088498745969186, "grad_norm": 2.132279396057129, "learning_rate": 7.435472098732665e-06, "loss": 0.8634, "step": 5541 }, { "epoch": 0.3309447032127075, "grad_norm": 1.895796775817871, "learning_rate": 7.434808572755624e-06, "loss": 0.8875, "step": 5542 }, { "epoch": 0.33100441896572314, "grad_norm": 2.865490436553955, "learning_rate": 7.434145046778582e-06, "loss": 0.8854, "step": 5543 }, { "epoch": 0.3310641347187388, "grad_norm": 1.9670754671096802, "learning_rate": 7.43348152080154e-06, "loss": 0.8956, "step": 5544 }, { "epoch": 0.33112385047175447, "grad_norm": 2.472832202911377, "learning_rate": 7.4328179948244985e-06, "loss": 0.8881, "step": 5545 }, { "epoch": 0.3311835662247701, "grad_norm": 2.0500900745391846, "learning_rate": 7.432154468847456e-06, "loss": 0.8845, "step": 5546 }, { "epoch": 0.33124328197778574, "grad_norm": 2.2771849632263184, "learning_rate": 7.431490942870414e-06, "loss": 0.8755, "step": 5547 }, { "epoch": 0.3313029977308014, "grad_norm": 2.4279613494873047, "learning_rate": 7.430827416893373e-06, "loss": 0.8597, "step": 5548 }, { "epoch": 0.331362713483817, "grad_norm": 2.941880226135254, "learning_rate": 7.43016389091633e-06, "loss": 0.8826, "step": 5549 }, { "epoch": 0.3314224292368327, "grad_norm": 2.322706699371338, "learning_rate": 7.429500364939288e-06, "loss": 0.9037, "step": 5550 }, { "epoch": 0.33148214498984835, "grad_norm": 1.733999490737915, "learning_rate": 7.4288368389622455e-06, "loss": 0.923, "step": 5551 }, { "epoch": 0.33154186074286396, "grad_norm": 1.9607285261154175, "learning_rate": 7.428173312985204e-06, "loss": 0.8889, "step": 5552 }, { "epoch": 0.3316015764958796, "grad_norm": 1.7950540781021118, "learning_rate": 7.4275097870081625e-06, "loss": 0.8964, "step": 5553 }, { "epoch": 0.33166129224889523, "grad_norm": 1.8801507949829102, "learning_rate": 7.42684626103112e-06, "loss": 0.9102, "step": 5554 }, { "epoch": 0.3317210080019109, "grad_norm": 2.1978859901428223, "learning_rate": 7.426182735054078e-06, "loss": 0.8792, "step": 5555 }, { "epoch": 0.33178072375492657, "grad_norm": 3.3202061653137207, "learning_rate": 7.425519209077036e-06, "loss": 0.8526, "step": 5556 }, { "epoch": 0.3318404395079422, "grad_norm": 1.9065213203430176, "learning_rate": 7.424855683099994e-06, "loss": 0.9023, "step": 5557 }, { "epoch": 0.33190015526095784, "grad_norm": 1.7078371047973633, "learning_rate": 7.424192157122952e-06, "loss": 0.8697, "step": 5558 }, { "epoch": 0.3319598710139735, "grad_norm": 2.3300883769989014, "learning_rate": 7.423528631145909e-06, "loss": 0.9086, "step": 5559 }, { "epoch": 0.3320195867669891, "grad_norm": 2.001593828201294, "learning_rate": 7.422865105168868e-06, "loss": 0.8935, "step": 5560 }, { "epoch": 0.3320793025200048, "grad_norm": 1.885810136795044, "learning_rate": 7.422201579191826e-06, "loss": 0.8826, "step": 5561 }, { "epoch": 0.33213901827302045, "grad_norm": 2.6271755695343018, "learning_rate": 7.421538053214784e-06, "loss": 0.8803, "step": 5562 }, { "epoch": 0.33219873402603606, "grad_norm": 1.949475646018982, "learning_rate": 7.420874527237743e-06, "loss": 0.93, "step": 5563 }, { "epoch": 0.3322584497790517, "grad_norm": 2.907963991165161, "learning_rate": 7.4202110012607e-06, "loss": 0.9313, "step": 5564 }, { "epoch": 0.33231816553206733, "grad_norm": 2.011587619781494, "learning_rate": 7.419547475283658e-06, "loss": 0.9188, "step": 5565 }, { "epoch": 0.332377881285083, "grad_norm": 2.357574701309204, "learning_rate": 7.418883949306615e-06, "loss": 0.8839, "step": 5566 }, { "epoch": 0.33243759703809866, "grad_norm": 2.2077605724334717, "learning_rate": 7.418220423329574e-06, "loss": 0.8723, "step": 5567 }, { "epoch": 0.3324973127911143, "grad_norm": 3.616393804550171, "learning_rate": 7.417556897352532e-06, "loss": 0.8772, "step": 5568 }, { "epoch": 0.33255702854412994, "grad_norm": 1.7707856893539429, "learning_rate": 7.4168933713754896e-06, "loss": 0.8888, "step": 5569 }, { "epoch": 0.3326167442971456, "grad_norm": 2.9200491905212402, "learning_rate": 7.4162298453984485e-06, "loss": 0.8621, "step": 5570 }, { "epoch": 0.3326764600501612, "grad_norm": 3.051830291748047, "learning_rate": 7.415566319421406e-06, "loss": 0.8876, "step": 5571 }, { "epoch": 0.3327361758031769, "grad_norm": 2.0541701316833496, "learning_rate": 7.414902793444364e-06, "loss": 0.8727, "step": 5572 }, { "epoch": 0.33279589155619255, "grad_norm": 1.9222418069839478, "learning_rate": 7.414239267467323e-06, "loss": 0.8663, "step": 5573 }, { "epoch": 0.33285560730920816, "grad_norm": 2.2434327602386475, "learning_rate": 7.41357574149028e-06, "loss": 0.8908, "step": 5574 }, { "epoch": 0.3329153230622238, "grad_norm": 2.3489956855773926, "learning_rate": 7.412912215513238e-06, "loss": 0.85, "step": 5575 }, { "epoch": 0.33297503881523943, "grad_norm": 2.3438689708709717, "learning_rate": 7.4122486895361954e-06, "loss": 0.9096, "step": 5576 }, { "epoch": 0.3330347545682551, "grad_norm": 3.2040746212005615, "learning_rate": 7.411585163559154e-06, "loss": 0.9161, "step": 5577 }, { "epoch": 0.33309447032127076, "grad_norm": 3.7345221042633057, "learning_rate": 7.4109216375821125e-06, "loss": 0.8824, "step": 5578 }, { "epoch": 0.3331541860742864, "grad_norm": 1.7656537294387817, "learning_rate": 7.41025811160507e-06, "loss": 0.9092, "step": 5579 }, { "epoch": 0.33321390182730204, "grad_norm": 2.086017370223999, "learning_rate": 7.409594585628028e-06, "loss": 0.896, "step": 5580 }, { "epoch": 0.3332736175803177, "grad_norm": 1.8102774620056152, "learning_rate": 7.408931059650986e-06, "loss": 0.8683, "step": 5581 }, { "epoch": 0.3333333333333333, "grad_norm": 4.163419246673584, "learning_rate": 7.408267533673944e-06, "loss": 0.8807, "step": 5582 }, { "epoch": 0.333393049086349, "grad_norm": 2.3327205181121826, "learning_rate": 7.407604007696902e-06, "loss": 0.8857, "step": 5583 }, { "epoch": 0.33345276483936465, "grad_norm": 3.3273847103118896, "learning_rate": 7.406940481719859e-06, "loss": 0.9016, "step": 5584 }, { "epoch": 0.33351248059238026, "grad_norm": 3.1351935863494873, "learning_rate": 7.406276955742818e-06, "loss": 0.9014, "step": 5585 }, { "epoch": 0.3335721963453959, "grad_norm": 2.1248393058776855, "learning_rate": 7.405613429765776e-06, "loss": 0.8966, "step": 5586 }, { "epoch": 0.3336319120984116, "grad_norm": 2.8510000705718994, "learning_rate": 7.404949903788734e-06, "loss": 0.9056, "step": 5587 }, { "epoch": 0.3336916278514272, "grad_norm": 1.9545044898986816, "learning_rate": 7.404286377811693e-06, "loss": 0.8739, "step": 5588 }, { "epoch": 0.33375134360444286, "grad_norm": 2.050675392150879, "learning_rate": 7.40362285183465e-06, "loss": 0.8896, "step": 5589 }, { "epoch": 0.3338110593574585, "grad_norm": 2.613058090209961, "learning_rate": 7.402959325857608e-06, "loss": 0.8913, "step": 5590 }, { "epoch": 0.33387077511047414, "grad_norm": 1.9359954595565796, "learning_rate": 7.402295799880565e-06, "loss": 0.8844, "step": 5591 }, { "epoch": 0.3339304908634898, "grad_norm": 2.04792857170105, "learning_rate": 7.401632273903524e-06, "loss": 0.8959, "step": 5592 }, { "epoch": 0.3339902066165054, "grad_norm": 2.1732964515686035, "learning_rate": 7.400968747926482e-06, "loss": 0.8877, "step": 5593 }, { "epoch": 0.3340499223695211, "grad_norm": 2.543553352355957, "learning_rate": 7.4003052219494395e-06, "loss": 0.9207, "step": 5594 }, { "epoch": 0.33410963812253675, "grad_norm": 3.2959892749786377, "learning_rate": 7.3996416959723985e-06, "loss": 0.9083, "step": 5595 }, { "epoch": 0.33416935387555236, "grad_norm": 4.3116841316223145, "learning_rate": 7.398978169995356e-06, "loss": 0.8973, "step": 5596 }, { "epoch": 0.334229069628568, "grad_norm": 1.834020733833313, "learning_rate": 7.398314644018314e-06, "loss": 0.9103, "step": 5597 }, { "epoch": 0.3342887853815837, "grad_norm": 2.3178956508636475, "learning_rate": 7.397651118041273e-06, "loss": 0.9159, "step": 5598 }, { "epoch": 0.3343485011345993, "grad_norm": 2.512706995010376, "learning_rate": 7.39698759206423e-06, "loss": 0.8823, "step": 5599 }, { "epoch": 0.33440821688761496, "grad_norm": 2.9970343112945557, "learning_rate": 7.396324066087188e-06, "loss": 0.911, "step": 5600 }, { "epoch": 0.33440821688761496, "eval_text_loss": 0.9358161687850952, "eval_text_runtime": 15.2112, "eval_text_samples_per_second": 262.965, "eval_text_steps_per_second": 0.526, "step": 5600 }, { "epoch": 0.33440821688761496, "eval_image_loss": 0.6555365920066833, "eval_image_runtime": 5.0337, "eval_image_samples_per_second": 794.652, "eval_image_steps_per_second": 1.589, "step": 5600 }, { "epoch": 0.33440821688761496, "eval_video_loss": 1.1133735179901123, "eval_video_runtime": 76.0519, "eval_video_samples_per_second": 52.596, "eval_video_steps_per_second": 0.105, "step": 5600 }, { "epoch": 0.33446793264063057, "grad_norm": 3.360280990600586, "learning_rate": 7.395660540110145e-06, "loss": 0.9082, "step": 5601 }, { "epoch": 0.33452764839364624, "grad_norm": 11.770442962646484, "learning_rate": 7.394997014133104e-06, "loss": 0.8996, "step": 5602 }, { "epoch": 0.3345873641466619, "grad_norm": 2.228471517562866, "learning_rate": 7.3943334881560624e-06, "loss": 0.8694, "step": 5603 }, { "epoch": 0.3346470798996775, "grad_norm": 2.2647790908813477, "learning_rate": 7.39366996217902e-06, "loss": 0.8691, "step": 5604 }, { "epoch": 0.3347067956526932, "grad_norm": 3.4857122898101807, "learning_rate": 7.393006436201978e-06, "loss": 0.9143, "step": 5605 }, { "epoch": 0.33476651140570884, "grad_norm": 2.3666012287139893, "learning_rate": 7.392342910224936e-06, "loss": 0.8649, "step": 5606 }, { "epoch": 0.33482622715872445, "grad_norm": 1.88980233669281, "learning_rate": 7.391679384247894e-06, "loss": 0.8745, "step": 5607 }, { "epoch": 0.3348859429117401, "grad_norm": 1.9311095476150513, "learning_rate": 7.391015858270852e-06, "loss": 0.8643, "step": 5608 }, { "epoch": 0.3349456586647558, "grad_norm": 1.9865330457687378, "learning_rate": 7.390352332293809e-06, "loss": 0.912, "step": 5609 }, { "epoch": 0.3350053744177714, "grad_norm": 3.134882926940918, "learning_rate": 7.389688806316768e-06, "loss": 0.9121, "step": 5610 }, { "epoch": 0.33506509017078706, "grad_norm": 2.581174850463867, "learning_rate": 7.3890252803397256e-06, "loss": 0.8614, "step": 5611 }, { "epoch": 0.3351248059238027, "grad_norm": 2.444077730178833, "learning_rate": 7.388361754362684e-06, "loss": 0.9067, "step": 5612 }, { "epoch": 0.33518452167681834, "grad_norm": 2.275141716003418, "learning_rate": 7.387698228385643e-06, "loss": 0.8703, "step": 5613 }, { "epoch": 0.335244237429834, "grad_norm": 4.556289196014404, "learning_rate": 7.3870347024086e-06, "loss": 0.9096, "step": 5614 }, { "epoch": 0.3353039531828496, "grad_norm": 2.4674108028411865, "learning_rate": 7.386371176431558e-06, "loss": 0.8562, "step": 5615 }, { "epoch": 0.3353636689358653, "grad_norm": 3.237592935562134, "learning_rate": 7.385707650454515e-06, "loss": 0.8673, "step": 5616 }, { "epoch": 0.33542338468888094, "grad_norm": 2.8727521896362305, "learning_rate": 7.385044124477474e-06, "loss": 0.8987, "step": 5617 }, { "epoch": 0.33548310044189655, "grad_norm": 2.447343349456787, "learning_rate": 7.384380598500432e-06, "loss": 0.9245, "step": 5618 }, { "epoch": 0.3355428161949122, "grad_norm": 2.8590497970581055, "learning_rate": 7.3837170725233895e-06, "loss": 0.9176, "step": 5619 }, { "epoch": 0.3356025319479279, "grad_norm": 2.702908992767334, "learning_rate": 7.3830535465463485e-06, "loss": 0.8936, "step": 5620 }, { "epoch": 0.3356622477009435, "grad_norm": 2.076984405517578, "learning_rate": 7.382390020569306e-06, "loss": 0.8779, "step": 5621 }, { "epoch": 0.33572196345395916, "grad_norm": 2.4874539375305176, "learning_rate": 7.381726494592264e-06, "loss": 0.8637, "step": 5622 }, { "epoch": 0.3357816792069748, "grad_norm": 3.4722092151641846, "learning_rate": 7.381062968615223e-06, "loss": 0.8486, "step": 5623 }, { "epoch": 0.33584139495999044, "grad_norm": 3.436176300048828, "learning_rate": 7.38039944263818e-06, "loss": 0.8827, "step": 5624 }, { "epoch": 0.3359011107130061, "grad_norm": 2.3392601013183594, "learning_rate": 7.379735916661138e-06, "loss": 0.8967, "step": 5625 }, { "epoch": 0.3359608264660217, "grad_norm": 2.6576313972473145, "learning_rate": 7.379072390684095e-06, "loss": 0.863, "step": 5626 }, { "epoch": 0.3360205422190374, "grad_norm": 1.9267370700836182, "learning_rate": 7.378408864707054e-06, "loss": 0.895, "step": 5627 }, { "epoch": 0.33608025797205304, "grad_norm": 1.9748568534851074, "learning_rate": 7.377745338730012e-06, "loss": 0.8896, "step": 5628 }, { "epoch": 0.33613997372506865, "grad_norm": 2.2792375087738037, "learning_rate": 7.37708181275297e-06, "loss": 0.878, "step": 5629 }, { "epoch": 0.3361996894780843, "grad_norm": 13.64553451538086, "learning_rate": 7.376418286775929e-06, "loss": 0.9109, "step": 5630 }, { "epoch": 0.3362594052311, "grad_norm": 1.9486263990402222, "learning_rate": 7.375754760798886e-06, "loss": 0.8994, "step": 5631 }, { "epoch": 0.3363191209841156, "grad_norm": 3.3429901599884033, "learning_rate": 7.375091234821844e-06, "loss": 0.8792, "step": 5632 }, { "epoch": 0.33637883673713126, "grad_norm": 4.446897983551025, "learning_rate": 7.374427708844802e-06, "loss": 0.9164, "step": 5633 }, { "epoch": 0.3364385524901469, "grad_norm": 2.622325897216797, "learning_rate": 7.373764182867759e-06, "loss": 0.8756, "step": 5634 }, { "epoch": 0.33649826824316253, "grad_norm": 3.7191081047058105, "learning_rate": 7.373100656890718e-06, "loss": 0.9155, "step": 5635 }, { "epoch": 0.3365579839961782, "grad_norm": 1.8573181629180908, "learning_rate": 7.3724371309136755e-06, "loss": 0.8678, "step": 5636 }, { "epoch": 0.3366176997491938, "grad_norm": 1.72369384765625, "learning_rate": 7.371773604936634e-06, "loss": 0.9028, "step": 5637 }, { "epoch": 0.3366774155022095, "grad_norm": 2.186802387237549, "learning_rate": 7.3711100789595926e-06, "loss": 0.8812, "step": 5638 }, { "epoch": 0.33673713125522514, "grad_norm": 2.8118555545806885, "learning_rate": 7.37044655298255e-06, "loss": 0.851, "step": 5639 }, { "epoch": 0.33679684700824075, "grad_norm": 1.7173069715499878, "learning_rate": 7.369783027005508e-06, "loss": 0.8689, "step": 5640 }, { "epoch": 0.3368565627612564, "grad_norm": 3.719789505004883, "learning_rate": 7.369119501028465e-06, "loss": 0.9193, "step": 5641 }, { "epoch": 0.3369162785142721, "grad_norm": 4.527760028839111, "learning_rate": 7.368455975051424e-06, "loss": 0.8528, "step": 5642 }, { "epoch": 0.3369759942672877, "grad_norm": 1.8072758913040161, "learning_rate": 7.367792449074382e-06, "loss": 0.8946, "step": 5643 }, { "epoch": 0.33703571002030336, "grad_norm": 2.3563101291656494, "learning_rate": 7.3671289230973395e-06, "loss": 0.8853, "step": 5644 }, { "epoch": 0.337095425773319, "grad_norm": 2.3769490718841553, "learning_rate": 7.3664653971202984e-06, "loss": 0.8723, "step": 5645 }, { "epoch": 0.33715514152633463, "grad_norm": 2.208528518676758, "learning_rate": 7.365801871143256e-06, "loss": 0.8642, "step": 5646 }, { "epoch": 0.3372148572793503, "grad_norm": 2.4286017417907715, "learning_rate": 7.365138345166214e-06, "loss": 0.8889, "step": 5647 }, { "epoch": 0.33727457303236597, "grad_norm": 1.6441271305084229, "learning_rate": 7.364474819189173e-06, "loss": 0.895, "step": 5648 }, { "epoch": 0.3373342887853816, "grad_norm": 2.2517075538635254, "learning_rate": 7.36381129321213e-06, "loss": 0.8846, "step": 5649 }, { "epoch": 0.33739400453839724, "grad_norm": 1.797041416168213, "learning_rate": 7.363147767235088e-06, "loss": 0.8995, "step": 5650 }, { "epoch": 0.33745372029141285, "grad_norm": 2.207716464996338, "learning_rate": 7.362484241258045e-06, "loss": 0.8869, "step": 5651 }, { "epoch": 0.3375134360444285, "grad_norm": 3.4756572246551514, "learning_rate": 7.361820715281004e-06, "loss": 0.8994, "step": 5652 }, { "epoch": 0.3375731517974442, "grad_norm": 2.605234384536743, "learning_rate": 7.361157189303962e-06, "loss": 0.8665, "step": 5653 }, { "epoch": 0.3376328675504598, "grad_norm": 3.9288125038146973, "learning_rate": 7.36049366332692e-06, "loss": 0.8879, "step": 5654 }, { "epoch": 0.33769258330347546, "grad_norm": 1.6888831853866577, "learning_rate": 7.359830137349879e-06, "loss": 0.8788, "step": 5655 }, { "epoch": 0.3377522990564911, "grad_norm": 2.602506399154663, "learning_rate": 7.359166611372836e-06, "loss": 0.9083, "step": 5656 }, { "epoch": 0.33781201480950673, "grad_norm": 3.624791383743286, "learning_rate": 7.358503085395794e-06, "loss": 0.9162, "step": 5657 }, { "epoch": 0.3378717305625224, "grad_norm": 2.3368735313415527, "learning_rate": 7.357839559418752e-06, "loss": 0.8811, "step": 5658 }, { "epoch": 0.33793144631553806, "grad_norm": 2.640442132949829, "learning_rate": 7.35717603344171e-06, "loss": 0.8851, "step": 5659 }, { "epoch": 0.3379911620685537, "grad_norm": 2.096649646759033, "learning_rate": 7.356512507464668e-06, "loss": 0.8994, "step": 5660 }, { "epoch": 0.33805087782156934, "grad_norm": 1.6268622875213623, "learning_rate": 7.3558489814876255e-06, "loss": 0.8751, "step": 5661 }, { "epoch": 0.33811059357458495, "grad_norm": 1.9810938835144043, "learning_rate": 7.355185455510584e-06, "loss": 0.9038, "step": 5662 }, { "epoch": 0.3381703093276006, "grad_norm": 3.821194648742676, "learning_rate": 7.3545219295335425e-06, "loss": 0.904, "step": 5663 }, { "epoch": 0.3382300250806163, "grad_norm": 1.9720444679260254, "learning_rate": 7.3538584035565e-06, "loss": 0.8871, "step": 5664 }, { "epoch": 0.3382897408336319, "grad_norm": 1.739346981048584, "learning_rate": 7.353194877579458e-06, "loss": 0.8799, "step": 5665 }, { "epoch": 0.33834945658664756, "grad_norm": 2.3502190113067627, "learning_rate": 7.352531351602415e-06, "loss": 0.8777, "step": 5666 }, { "epoch": 0.3384091723396632, "grad_norm": 2.067301034927368, "learning_rate": 7.351867825625374e-06, "loss": 0.911, "step": 5667 }, { "epoch": 0.33846888809267883, "grad_norm": 4.059785842895508, "learning_rate": 7.351204299648332e-06, "loss": 0.872, "step": 5668 }, { "epoch": 0.3385286038456945, "grad_norm": 2.138150453567505, "learning_rate": 7.3505407736712895e-06, "loss": 0.8869, "step": 5669 }, { "epoch": 0.33858831959871016, "grad_norm": 2.3517696857452393, "learning_rate": 7.349877247694248e-06, "loss": 0.8872, "step": 5670 }, { "epoch": 0.3386480353517258, "grad_norm": 2.434812068939209, "learning_rate": 7.349213721717206e-06, "loss": 0.9006, "step": 5671 }, { "epoch": 0.33870775110474144, "grad_norm": 2.8244712352752686, "learning_rate": 7.348550195740164e-06, "loss": 0.9196, "step": 5672 }, { "epoch": 0.33876746685775705, "grad_norm": 2.721984624862671, "learning_rate": 7.347886669763123e-06, "loss": 0.9279, "step": 5673 }, { "epoch": 0.3388271826107727, "grad_norm": 2.5538599491119385, "learning_rate": 7.34722314378608e-06, "loss": 0.9068, "step": 5674 }, { "epoch": 0.3388868983637884, "grad_norm": 2.111984968185425, "learning_rate": 7.346559617809038e-06, "loss": 0.9684, "step": 5675 }, { "epoch": 0.338946614116804, "grad_norm": 3.4385364055633545, "learning_rate": 7.345896091831995e-06, "loss": 0.9048, "step": 5676 }, { "epoch": 0.33900632986981966, "grad_norm": 2.1212871074676514, "learning_rate": 7.345232565854954e-06, "loss": 0.9117, "step": 5677 }, { "epoch": 0.3390660456228353, "grad_norm": 2.6975667476654053, "learning_rate": 7.344569039877912e-06, "loss": 0.8805, "step": 5678 }, { "epoch": 0.33912576137585093, "grad_norm": 2.903141736984253, "learning_rate": 7.34390551390087e-06, "loss": 0.8921, "step": 5679 }, { "epoch": 0.3391854771288666, "grad_norm": 3.06140398979187, "learning_rate": 7.3432419879238286e-06, "loss": 0.8645, "step": 5680 }, { "epoch": 0.33924519288188226, "grad_norm": 1.8723512887954712, "learning_rate": 7.342578461946786e-06, "loss": 0.8789, "step": 5681 }, { "epoch": 0.3393049086348979, "grad_norm": 3.0803122520446777, "learning_rate": 7.341914935969744e-06, "loss": 0.8766, "step": 5682 }, { "epoch": 0.33936462438791354, "grad_norm": 2.133632183074951, "learning_rate": 7.341251409992702e-06, "loss": 0.8874, "step": 5683 }, { "epoch": 0.3394243401409292, "grad_norm": 2.2273361682891846, "learning_rate": 7.34058788401566e-06, "loss": 0.8966, "step": 5684 }, { "epoch": 0.3394840558939448, "grad_norm": 2.741483449935913, "learning_rate": 7.339924358038618e-06, "loss": 0.8636, "step": 5685 }, { "epoch": 0.3395437716469605, "grad_norm": 1.9439737796783447, "learning_rate": 7.3392608320615755e-06, "loss": 0.9343, "step": 5686 }, { "epoch": 0.3396034873999761, "grad_norm": 2.5707595348358154, "learning_rate": 7.3385973060845336e-06, "loss": 0.9071, "step": 5687 }, { "epoch": 0.33966320315299176, "grad_norm": 2.7650625705718994, "learning_rate": 7.3379337801074925e-06, "loss": 0.8879, "step": 5688 }, { "epoch": 0.3397229189060074, "grad_norm": 3.2395119667053223, "learning_rate": 7.33727025413045e-06, "loss": 0.9054, "step": 5689 }, { "epoch": 0.33978263465902303, "grad_norm": 2.154574394226074, "learning_rate": 7.336606728153408e-06, "loss": 0.8735, "step": 5690 }, { "epoch": 0.3398423504120387, "grad_norm": 2.018120527267456, "learning_rate": 7.335943202176365e-06, "loss": 0.8597, "step": 5691 }, { "epoch": 0.33990206616505436, "grad_norm": 2.368485927581787, "learning_rate": 7.335279676199324e-06, "loss": 0.8848, "step": 5692 }, { "epoch": 0.33996178191806997, "grad_norm": 2.2386269569396973, "learning_rate": 7.334616150222282e-06, "loss": 0.9065, "step": 5693 }, { "epoch": 0.34002149767108564, "grad_norm": 1.861453652381897, "learning_rate": 7.3339526242452394e-06, "loss": 0.9047, "step": 5694 }, { "epoch": 0.3400812134241013, "grad_norm": 2.5636754035949707, "learning_rate": 7.333289098268198e-06, "loss": 0.8775, "step": 5695 }, { "epoch": 0.3401409291771169, "grad_norm": 2.339848518371582, "learning_rate": 7.332625572291156e-06, "loss": 0.8849, "step": 5696 }, { "epoch": 0.3402006449301326, "grad_norm": 2.429633855819702, "learning_rate": 7.331962046314114e-06, "loss": 0.9029, "step": 5697 }, { "epoch": 0.3402603606831482, "grad_norm": 2.485852003097534, "learning_rate": 7.331298520337073e-06, "loss": 0.9255, "step": 5698 }, { "epoch": 0.34032007643616385, "grad_norm": 1.6859513521194458, "learning_rate": 7.33063499436003e-06, "loss": 0.8981, "step": 5699 }, { "epoch": 0.3403797921891795, "grad_norm": 2.0990796089172363, "learning_rate": 7.329971468382988e-06, "loss": 0.8809, "step": 5700 }, { "epoch": 0.34043950794219513, "grad_norm": 2.15972638130188, "learning_rate": 7.329307942405945e-06, "loss": 0.8973, "step": 5701 }, { "epoch": 0.3404992236952108, "grad_norm": 2.2733497619628906, "learning_rate": 7.328644416428904e-06, "loss": 0.8761, "step": 5702 }, { "epoch": 0.34055893944822646, "grad_norm": 2.0162317752838135, "learning_rate": 7.327980890451862e-06, "loss": 0.8624, "step": 5703 }, { "epoch": 0.34061865520124207, "grad_norm": 2.3483119010925293, "learning_rate": 7.32731736447482e-06, "loss": 0.9303, "step": 5704 }, { "epoch": 0.34067837095425774, "grad_norm": 3.354095697402954, "learning_rate": 7.3266538384977785e-06, "loss": 0.8586, "step": 5705 }, { "epoch": 0.3407380867072734, "grad_norm": 3.3624143600463867, "learning_rate": 7.325990312520736e-06, "loss": 0.8835, "step": 5706 }, { "epoch": 0.340797802460289, "grad_norm": 2.3851680755615234, "learning_rate": 7.325326786543694e-06, "loss": 0.8883, "step": 5707 }, { "epoch": 0.3408575182133047, "grad_norm": 2.4267642498016357, "learning_rate": 7.324663260566652e-06, "loss": 0.8866, "step": 5708 }, { "epoch": 0.3409172339663203, "grad_norm": 2.970241069793701, "learning_rate": 7.32399973458961e-06, "loss": 0.9013, "step": 5709 }, { "epoch": 0.34097694971933595, "grad_norm": 2.653399705886841, "learning_rate": 7.323336208612568e-06, "loss": 0.8981, "step": 5710 }, { "epoch": 0.3410366654723516, "grad_norm": 2.490581512451172, "learning_rate": 7.3226726826355254e-06, "loss": 0.8542, "step": 5711 }, { "epoch": 0.34109638122536723, "grad_norm": 3.655679941177368, "learning_rate": 7.3220091566584835e-06, "loss": 0.8373, "step": 5712 }, { "epoch": 0.3411560969783829, "grad_norm": 2.326347589492798, "learning_rate": 7.3213456306814425e-06, "loss": 0.9084, "step": 5713 }, { "epoch": 0.34121581273139856, "grad_norm": 2.6156678199768066, "learning_rate": 7.3206821047044e-06, "loss": 0.9503, "step": 5714 }, { "epoch": 0.34127552848441417, "grad_norm": 2.361743211746216, "learning_rate": 7.320018578727358e-06, "loss": 0.8823, "step": 5715 }, { "epoch": 0.34133524423742984, "grad_norm": 2.418137311935425, "learning_rate": 7.319355052750315e-06, "loss": 0.8969, "step": 5716 }, { "epoch": 0.3413949599904455, "grad_norm": 1.9803656339645386, "learning_rate": 7.318691526773274e-06, "loss": 0.9055, "step": 5717 }, { "epoch": 0.3414546757434611, "grad_norm": 2.3251538276672363, "learning_rate": 7.318028000796232e-06, "loss": 0.8543, "step": 5718 }, { "epoch": 0.3415143914964768, "grad_norm": 7.379897594451904, "learning_rate": 7.317364474819189e-06, "loss": 0.8702, "step": 5719 }, { "epoch": 0.34157410724949244, "grad_norm": 2.017641067504883, "learning_rate": 7.316700948842148e-06, "loss": 0.9079, "step": 5720 }, { "epoch": 0.34163382300250805, "grad_norm": 2.0936973094940186, "learning_rate": 7.316037422865106e-06, "loss": 0.9069, "step": 5721 }, { "epoch": 0.3416935387555237, "grad_norm": 2.251068592071533, "learning_rate": 7.315373896888064e-06, "loss": 0.9354, "step": 5722 }, { "epoch": 0.34175325450853933, "grad_norm": 2.1487109661102295, "learning_rate": 7.314710370911023e-06, "loss": 0.8896, "step": 5723 }, { "epoch": 0.341812970261555, "grad_norm": 2.8219757080078125, "learning_rate": 7.31404684493398e-06, "loss": 0.8811, "step": 5724 }, { "epoch": 0.34187268601457066, "grad_norm": 3.0219619274139404, "learning_rate": 7.313383318956938e-06, "loss": 0.9049, "step": 5725 }, { "epoch": 0.34193240176758627, "grad_norm": 1.9065971374511719, "learning_rate": 7.312719792979895e-06, "loss": 0.9078, "step": 5726 }, { "epoch": 0.34199211752060193, "grad_norm": 2.3272764682769775, "learning_rate": 7.312056267002854e-06, "loss": 0.9155, "step": 5727 }, { "epoch": 0.3420518332736176, "grad_norm": 1.9555164575576782, "learning_rate": 7.311392741025812e-06, "loss": 0.9158, "step": 5728 }, { "epoch": 0.3421115490266332, "grad_norm": 2.6593716144561768, "learning_rate": 7.3107292150487696e-06, "loss": 0.9341, "step": 5729 }, { "epoch": 0.3421712647796489, "grad_norm": 2.322805166244507, "learning_rate": 7.3100656890717285e-06, "loss": 0.8738, "step": 5730 }, { "epoch": 0.34223098053266454, "grad_norm": 2.279599189758301, "learning_rate": 7.309402163094686e-06, "loss": 0.8967, "step": 5731 }, { "epoch": 0.34229069628568015, "grad_norm": 5.114776611328125, "learning_rate": 7.308738637117644e-06, "loss": 0.8406, "step": 5732 }, { "epoch": 0.3423504120386958, "grad_norm": 2.6388144493103027, "learning_rate": 7.308075111140602e-06, "loss": 0.8917, "step": 5733 }, { "epoch": 0.3424101277917114, "grad_norm": 2.283940076828003, "learning_rate": 7.30741158516356e-06, "loss": 0.897, "step": 5734 }, { "epoch": 0.3424698435447271, "grad_norm": 2.735962390899658, "learning_rate": 7.306748059186518e-06, "loss": 0.8846, "step": 5735 }, { "epoch": 0.34252955929774276, "grad_norm": 2.655125379562378, "learning_rate": 7.306084533209475e-06, "loss": 0.9097, "step": 5736 }, { "epoch": 0.34258927505075837, "grad_norm": 1.558896780014038, "learning_rate": 7.3054210072324335e-06, "loss": 0.8382, "step": 5737 }, { "epoch": 0.34264899080377403, "grad_norm": 2.2364025115966797, "learning_rate": 7.304757481255392e-06, "loss": 0.9106, "step": 5738 }, { "epoch": 0.3427087065567897, "grad_norm": 2.8097779750823975, "learning_rate": 7.30409395527835e-06, "loss": 0.8772, "step": 5739 }, { "epoch": 0.3427684223098053, "grad_norm": 2.3453571796417236, "learning_rate": 7.303430429301308e-06, "loss": 0.8903, "step": 5740 }, { "epoch": 0.342828138062821, "grad_norm": 2.8169925212860107, "learning_rate": 7.302766903324265e-06, "loss": 0.8673, "step": 5741 }, { "epoch": 0.34288785381583664, "grad_norm": 1.9622141122817993, "learning_rate": 7.302103377347224e-06, "loss": 0.8619, "step": 5742 }, { "epoch": 0.34294756956885225, "grad_norm": 3.784630298614502, "learning_rate": 7.301439851370181e-06, "loss": 0.8741, "step": 5743 }, { "epoch": 0.3430072853218679, "grad_norm": 2.0297133922576904, "learning_rate": 7.300776325393139e-06, "loss": 0.8726, "step": 5744 }, { "epoch": 0.3430670010748836, "grad_norm": 2.0036966800689697, "learning_rate": 7.300112799416098e-06, "loss": 0.8768, "step": 5745 }, { "epoch": 0.3431267168278992, "grad_norm": 2.3912715911865234, "learning_rate": 7.2994492734390556e-06, "loss": 0.9194, "step": 5746 }, { "epoch": 0.34318643258091486, "grad_norm": 2.0018672943115234, "learning_rate": 7.298785747462014e-06, "loss": 0.8635, "step": 5747 }, { "epoch": 0.34324614833393047, "grad_norm": 2.7742178440093994, "learning_rate": 7.298122221484971e-06, "loss": 0.8577, "step": 5748 }, { "epoch": 0.34330586408694613, "grad_norm": 1.7915992736816406, "learning_rate": 7.29745869550793e-06, "loss": 0.8718, "step": 5749 }, { "epoch": 0.3433655798399618, "grad_norm": 1.752903938293457, "learning_rate": 7.296795169530888e-06, "loss": 0.8988, "step": 5750 }, { "epoch": 0.3434252955929774, "grad_norm": 5.070295810699463, "learning_rate": 7.296131643553845e-06, "loss": 0.9502, "step": 5751 }, { "epoch": 0.3434850113459931, "grad_norm": 3.0701634883880615, "learning_rate": 7.295468117576804e-06, "loss": 0.8958, "step": 5752 }, { "epoch": 0.34354472709900874, "grad_norm": 2.387190818786621, "learning_rate": 7.294804591599761e-06, "loss": 0.899, "step": 5753 }, { "epoch": 0.34360444285202435, "grad_norm": 2.3174774646759033, "learning_rate": 7.2941410656227195e-06, "loss": 0.8968, "step": 5754 }, { "epoch": 0.34366415860504, "grad_norm": 1.986157774925232, "learning_rate": 7.2934775396456785e-06, "loss": 0.9141, "step": 5755 }, { "epoch": 0.3437238743580557, "grad_norm": 1.9079725742340088, "learning_rate": 7.292814013668636e-06, "loss": 0.8589, "step": 5756 }, { "epoch": 0.3437835901110713, "grad_norm": 2.8691701889038086, "learning_rate": 7.292150487691594e-06, "loss": 0.9163, "step": 5757 }, { "epoch": 0.34384330586408696, "grad_norm": 1.9557863473892212, "learning_rate": 7.291486961714551e-06, "loss": 0.8894, "step": 5758 }, { "epoch": 0.34390302161710257, "grad_norm": 1.658078908920288, "learning_rate": 7.29082343573751e-06, "loss": 0.9423, "step": 5759 }, { "epoch": 0.34396273737011823, "grad_norm": 10.175479888916016, "learning_rate": 7.290159909760468e-06, "loss": 0.9039, "step": 5760 }, { "epoch": 0.3440224531231339, "grad_norm": 2.6693732738494873, "learning_rate": 7.289496383783425e-06, "loss": 0.9267, "step": 5761 }, { "epoch": 0.3440821688761495, "grad_norm": 3.0694947242736816, "learning_rate": 7.2888328578063835e-06, "loss": 0.8738, "step": 5762 }, { "epoch": 0.3441418846291652, "grad_norm": 2.6515188217163086, "learning_rate": 7.288169331829342e-06, "loss": 0.8665, "step": 5763 }, { "epoch": 0.34420160038218084, "grad_norm": 2.0929157733917236, "learning_rate": 7.2875058058523e-06, "loss": 0.8569, "step": 5764 }, { "epoch": 0.34426131613519645, "grad_norm": 2.105271100997925, "learning_rate": 7.286842279875258e-06, "loss": 0.8786, "step": 5765 }, { "epoch": 0.3443210318882121, "grad_norm": 2.2098777294158936, "learning_rate": 7.286178753898215e-06, "loss": 0.8875, "step": 5766 }, { "epoch": 0.3443807476412278, "grad_norm": 1.8065307140350342, "learning_rate": 7.285515227921174e-06, "loss": 0.9176, "step": 5767 }, { "epoch": 0.3444404633942434, "grad_norm": 2.2009315490722656, "learning_rate": 7.284851701944131e-06, "loss": 0.9178, "step": 5768 }, { "epoch": 0.34450017914725906, "grad_norm": 2.9778499603271484, "learning_rate": 7.284188175967089e-06, "loss": 0.9087, "step": 5769 }, { "epoch": 0.34455989490027467, "grad_norm": 1.6828382015228271, "learning_rate": 7.283524649990048e-06, "loss": 0.898, "step": 5770 }, { "epoch": 0.34461961065329033, "grad_norm": 1.775228500366211, "learning_rate": 7.2828611240130055e-06, "loss": 0.8819, "step": 5771 }, { "epoch": 0.344679326406306, "grad_norm": 1.9633471965789795, "learning_rate": 7.282197598035964e-06, "loss": 0.8595, "step": 5772 }, { "epoch": 0.3447390421593216, "grad_norm": 1.7759243249893188, "learning_rate": 7.281534072058921e-06, "loss": 0.891, "step": 5773 }, { "epoch": 0.3447987579123373, "grad_norm": 1.629202961921692, "learning_rate": 7.28087054608188e-06, "loss": 0.8807, "step": 5774 }, { "epoch": 0.34485847366535294, "grad_norm": 1.939280390739441, "learning_rate": 7.280207020104838e-06, "loss": 0.835, "step": 5775 }, { "epoch": 0.34491818941836855, "grad_norm": 2.1783392429351807, "learning_rate": 7.279543494127795e-06, "loss": 0.8715, "step": 5776 }, { "epoch": 0.3449779051713842, "grad_norm": 3.2763235569000244, "learning_rate": 7.278879968150754e-06, "loss": 0.8883, "step": 5777 }, { "epoch": 0.3450376209243999, "grad_norm": 2.3023643493652344, "learning_rate": 7.278216442173711e-06, "loss": 0.8962, "step": 5778 }, { "epoch": 0.3450973366774155, "grad_norm": 5.77836799621582, "learning_rate": 7.2775529161966695e-06, "loss": 0.9109, "step": 5779 }, { "epoch": 0.34515705243043115, "grad_norm": 1.821059226989746, "learning_rate": 7.2768893902196284e-06, "loss": 0.8883, "step": 5780 }, { "epoch": 0.3452167681834468, "grad_norm": 1.7854365110397339, "learning_rate": 7.276225864242586e-06, "loss": 0.9087, "step": 5781 }, { "epoch": 0.34527648393646243, "grad_norm": 2.1273369789123535, "learning_rate": 7.275562338265544e-06, "loss": 0.8944, "step": 5782 }, { "epoch": 0.3453361996894781, "grad_norm": 1.8193724155426025, "learning_rate": 7.274898812288501e-06, "loss": 0.9127, "step": 5783 }, { "epoch": 0.3453959154424937, "grad_norm": 2.128629684448242, "learning_rate": 7.27423528631146e-06, "loss": 0.8753, "step": 5784 }, { "epoch": 0.34545563119550937, "grad_norm": 1.9331978559494019, "learning_rate": 7.273571760334418e-06, "loss": 0.8603, "step": 5785 }, { "epoch": 0.34551534694852504, "grad_norm": 2.2188704013824463, "learning_rate": 7.272908234357375e-06, "loss": 0.903, "step": 5786 }, { "epoch": 0.34557506270154065, "grad_norm": 1.976798176765442, "learning_rate": 7.2722447083803335e-06, "loss": 0.8781, "step": 5787 }, { "epoch": 0.3456347784545563, "grad_norm": 2.2133331298828125, "learning_rate": 7.2715811824032916e-06, "loss": 0.8979, "step": 5788 }, { "epoch": 0.345694494207572, "grad_norm": 1.8119233846664429, "learning_rate": 7.27091765642625e-06, "loss": 0.8813, "step": 5789 }, { "epoch": 0.3457542099605876, "grad_norm": 1.8317316770553589, "learning_rate": 7.270254130449208e-06, "loss": 0.8698, "step": 5790 }, { "epoch": 0.34581392571360325, "grad_norm": 2.4937193393707275, "learning_rate": 7.269590604472165e-06, "loss": 0.8798, "step": 5791 }, { "epoch": 0.3458736414666189, "grad_norm": 6.106617450714111, "learning_rate": 7.268927078495124e-06, "loss": 0.8845, "step": 5792 }, { "epoch": 0.34593335721963453, "grad_norm": 2.0859899520874023, "learning_rate": 7.268263552518081e-06, "loss": 0.8684, "step": 5793 }, { "epoch": 0.3459930729726502, "grad_norm": 1.8359086513519287, "learning_rate": 7.267600026541039e-06, "loss": 0.8719, "step": 5794 }, { "epoch": 0.3460527887256658, "grad_norm": 1.653024673461914, "learning_rate": 7.266936500563998e-06, "loss": 0.9142, "step": 5795 }, { "epoch": 0.34611250447868147, "grad_norm": 2.6751463413238525, "learning_rate": 7.2662729745869555e-06, "loss": 0.8738, "step": 5796 }, { "epoch": 0.34617222023169714, "grad_norm": 2.1310977935791016, "learning_rate": 7.265609448609914e-06, "loss": 0.9081, "step": 5797 }, { "epoch": 0.34623193598471275, "grad_norm": 1.7157349586486816, "learning_rate": 7.264945922632871e-06, "loss": 0.8652, "step": 5798 }, { "epoch": 0.3462916517377284, "grad_norm": 2.0963058471679688, "learning_rate": 7.26428239665583e-06, "loss": 0.9012, "step": 5799 }, { "epoch": 0.3463513674907441, "grad_norm": 3.365492343902588, "learning_rate": 7.263618870678788e-06, "loss": 0.9042, "step": 5800 }, { "epoch": 0.3463513674907441, "eval_text_loss": 0.9321038722991943, "eval_text_runtime": 15.1979, "eval_text_samples_per_second": 263.195, "eval_text_steps_per_second": 0.526, "step": 5800 }, { "epoch": 0.3463513674907441, "eval_image_loss": 0.6528400182723999, "eval_image_runtime": 5.2908, "eval_image_samples_per_second": 756.03, "eval_image_steps_per_second": 1.512, "step": 5800 }, { "epoch": 0.3463513674907441, "eval_video_loss": 1.1095787286758423, "eval_video_runtime": 77.0006, "eval_video_samples_per_second": 51.948, "eval_video_steps_per_second": 0.104, "step": 5800 }, { "epoch": 0.3464110832437597, "grad_norm": 2.541775703430176, "learning_rate": 7.262955344701745e-06, "loss": 0.9101, "step": 5801 }, { "epoch": 0.34647079899677535, "grad_norm": 5.4478254318237305, "learning_rate": 7.262291818724704e-06, "loss": 0.9127, "step": 5802 }, { "epoch": 0.346530514749791, "grad_norm": 2.3624210357666016, "learning_rate": 7.261628292747661e-06, "loss": 0.8735, "step": 5803 }, { "epoch": 0.34659023050280663, "grad_norm": 1.6640957593917847, "learning_rate": 7.2609647667706195e-06, "loss": 0.8839, "step": 5804 }, { "epoch": 0.3466499462558223, "grad_norm": 3.8046059608459473, "learning_rate": 7.260301240793578e-06, "loss": 0.838, "step": 5805 }, { "epoch": 0.3467096620088379, "grad_norm": 2.5363316535949707, "learning_rate": 7.259637714816536e-06, "loss": 0.9181, "step": 5806 }, { "epoch": 0.34676937776185357, "grad_norm": 1.9039305448532104, "learning_rate": 7.258974188839494e-06, "loss": 0.8766, "step": 5807 }, { "epoch": 0.34682909351486924, "grad_norm": 2.266453742980957, "learning_rate": 7.258310662862451e-06, "loss": 0.865, "step": 5808 }, { "epoch": 0.34688880926788485, "grad_norm": 2.4954755306243896, "learning_rate": 7.25764713688541e-06, "loss": 0.8527, "step": 5809 }, { "epoch": 0.3469485250209005, "grad_norm": 3.2231006622314453, "learning_rate": 7.256983610908368e-06, "loss": 0.91, "step": 5810 }, { "epoch": 0.3470082407739162, "grad_norm": 1.792034387588501, "learning_rate": 7.256320084931325e-06, "loss": 0.8861, "step": 5811 }, { "epoch": 0.3470679565269318, "grad_norm": 1.8755910396575928, "learning_rate": 7.255656558954283e-06, "loss": 0.8687, "step": 5812 }, { "epoch": 0.34712767227994745, "grad_norm": 1.8923616409301758, "learning_rate": 7.2549930329772415e-06, "loss": 0.869, "step": 5813 }, { "epoch": 0.3471873880329631, "grad_norm": 1.9440572261810303, "learning_rate": 7.2543295070002e-06, "loss": 0.856, "step": 5814 }, { "epoch": 0.34724710378597873, "grad_norm": 2.259624719619751, "learning_rate": 7.253665981023158e-06, "loss": 0.903, "step": 5815 }, { "epoch": 0.3473068195389944, "grad_norm": 2.094759464263916, "learning_rate": 7.253002455046115e-06, "loss": 0.8987, "step": 5816 }, { "epoch": 0.34736653529201006, "grad_norm": 2.45353364944458, "learning_rate": 7.252338929069074e-06, "loss": 0.908, "step": 5817 }, { "epoch": 0.34742625104502567, "grad_norm": 2.6001243591308594, "learning_rate": 7.251675403092031e-06, "loss": 0.8745, "step": 5818 }, { "epoch": 0.34748596679804133, "grad_norm": 4.017406940460205, "learning_rate": 7.251011877114989e-06, "loss": 0.8746, "step": 5819 }, { "epoch": 0.34754568255105694, "grad_norm": 2.7724311351776123, "learning_rate": 7.250348351137948e-06, "loss": 0.8842, "step": 5820 }, { "epoch": 0.3476053983040726, "grad_norm": 1.7272734642028809, "learning_rate": 7.2496848251609055e-06, "loss": 0.9093, "step": 5821 }, { "epoch": 0.3476651140570883, "grad_norm": 2.1869447231292725, "learning_rate": 7.249021299183864e-06, "loss": 0.9506, "step": 5822 }, { "epoch": 0.3477248298101039, "grad_norm": 1.861043095588684, "learning_rate": 7.248357773206821e-06, "loss": 0.8765, "step": 5823 }, { "epoch": 0.34778454556311955, "grad_norm": 2.0445802211761475, "learning_rate": 7.24769424722978e-06, "loss": 0.931, "step": 5824 }, { "epoch": 0.3478442613161352, "grad_norm": 2.1037538051605225, "learning_rate": 7.247030721252738e-06, "loss": 0.8616, "step": 5825 }, { "epoch": 0.3479039770691508, "grad_norm": 4.426989555358887, "learning_rate": 7.246367195275695e-06, "loss": 0.8919, "step": 5826 }, { "epoch": 0.3479636928221665, "grad_norm": 2.5317678451538086, "learning_rate": 7.245703669298654e-06, "loss": 0.9092, "step": 5827 }, { "epoch": 0.34802340857518216, "grad_norm": 1.7603051662445068, "learning_rate": 7.245040143321611e-06, "loss": 0.8989, "step": 5828 }, { "epoch": 0.34808312432819777, "grad_norm": 4.070949554443359, "learning_rate": 7.2443766173445694e-06, "loss": 0.9346, "step": 5829 }, { "epoch": 0.34814284008121343, "grad_norm": 2.32900071144104, "learning_rate": 7.243713091367528e-06, "loss": 0.9174, "step": 5830 }, { "epoch": 0.34820255583422904, "grad_norm": 1.9014432430267334, "learning_rate": 7.243049565390486e-06, "loss": 0.8724, "step": 5831 }, { "epoch": 0.3482622715872447, "grad_norm": 1.8810532093048096, "learning_rate": 7.242386039413444e-06, "loss": 0.8783, "step": 5832 }, { "epoch": 0.3483219873402604, "grad_norm": 2.431997060775757, "learning_rate": 7.241722513436401e-06, "loss": 0.9066, "step": 5833 }, { "epoch": 0.348381703093276, "grad_norm": 5.301788330078125, "learning_rate": 7.24105898745936e-06, "loss": 0.8509, "step": 5834 }, { "epoch": 0.34844141884629165, "grad_norm": 2.1107289791107178, "learning_rate": 7.240395461482318e-06, "loss": 0.9374, "step": 5835 }, { "epoch": 0.3485011345993073, "grad_norm": 2.746795415878296, "learning_rate": 7.239731935505275e-06, "loss": 0.9142, "step": 5836 }, { "epoch": 0.3485608503523229, "grad_norm": 3.3349153995513916, "learning_rate": 7.239068409528233e-06, "loss": 0.8946, "step": 5837 }, { "epoch": 0.3486205661053386, "grad_norm": 2.1361277103424072, "learning_rate": 7.2384048835511915e-06, "loss": 0.8976, "step": 5838 }, { "epoch": 0.34868028185835426, "grad_norm": 2.758772373199463, "learning_rate": 7.23774135757415e-06, "loss": 0.8863, "step": 5839 }, { "epoch": 0.34873999761136987, "grad_norm": 1.9974002838134766, "learning_rate": 7.237077831597108e-06, "loss": 0.9153, "step": 5840 }, { "epoch": 0.34879971336438553, "grad_norm": 1.830458641052246, "learning_rate": 7.236414305620065e-06, "loss": 0.9035, "step": 5841 }, { "epoch": 0.34885942911740114, "grad_norm": 2.411133050918579, "learning_rate": 7.235750779643024e-06, "loss": 0.8816, "step": 5842 }, { "epoch": 0.3489191448704168, "grad_norm": 2.4023923873901367, "learning_rate": 7.235087253665981e-06, "loss": 0.8617, "step": 5843 }, { "epoch": 0.3489788606234325, "grad_norm": 2.1727664470672607, "learning_rate": 7.234423727688939e-06, "loss": 0.8443, "step": 5844 }, { "epoch": 0.3490385763764481, "grad_norm": 2.1659650802612305, "learning_rate": 7.233760201711898e-06, "loss": 0.8718, "step": 5845 }, { "epoch": 0.34909829212946375, "grad_norm": 1.8888694047927856, "learning_rate": 7.2330966757348554e-06, "loss": 0.8341, "step": 5846 }, { "epoch": 0.3491580078824794, "grad_norm": 2.2711758613586426, "learning_rate": 7.2324331497578136e-06, "loss": 0.8836, "step": 5847 }, { "epoch": 0.349217723635495, "grad_norm": 2.4157793521881104, "learning_rate": 7.231769623780771e-06, "loss": 0.8831, "step": 5848 }, { "epoch": 0.3492774393885107, "grad_norm": 1.9583345651626587, "learning_rate": 7.23110609780373e-06, "loss": 0.8705, "step": 5849 }, { "epoch": 0.34933715514152636, "grad_norm": 3.4279115200042725, "learning_rate": 7.230442571826688e-06, "loss": 0.8696, "step": 5850 }, { "epoch": 0.34939687089454197, "grad_norm": 2.2407073974609375, "learning_rate": 7.229779045849645e-06, "loss": 0.8995, "step": 5851 }, { "epoch": 0.34945658664755763, "grad_norm": 2.0568578243255615, "learning_rate": 7.229115519872604e-06, "loss": 0.909, "step": 5852 }, { "epoch": 0.3495163024005733, "grad_norm": 2.156088352203369, "learning_rate": 7.228451993895561e-06, "loss": 0.8751, "step": 5853 }, { "epoch": 0.3495760181535889, "grad_norm": 3.397331476211548, "learning_rate": 7.227788467918519e-06, "loss": 0.9489, "step": 5854 }, { "epoch": 0.3496357339066046, "grad_norm": 8.004363059997559, "learning_rate": 7.227124941941478e-06, "loss": 0.9161, "step": 5855 }, { "epoch": 0.3496954496596202, "grad_norm": 2.027848482131958, "learning_rate": 7.226461415964436e-06, "loss": 0.8605, "step": 5856 }, { "epoch": 0.34975516541263585, "grad_norm": 2.939150333404541, "learning_rate": 7.225797889987394e-06, "loss": 0.8779, "step": 5857 }, { "epoch": 0.3498148811656515, "grad_norm": 2.2593343257904053, "learning_rate": 7.225134364010351e-06, "loss": 0.8667, "step": 5858 }, { "epoch": 0.3498745969186671, "grad_norm": 2.0859344005584717, "learning_rate": 7.22447083803331e-06, "loss": 0.8486, "step": 5859 }, { "epoch": 0.3499343126716828, "grad_norm": 2.3672409057617188, "learning_rate": 7.223807312056268e-06, "loss": 0.8912, "step": 5860 }, { "epoch": 0.34999402842469846, "grad_norm": 1.8108494281768799, "learning_rate": 7.223143786079225e-06, "loss": 0.9289, "step": 5861 }, { "epoch": 0.35005374417771407, "grad_norm": 2.251893997192383, "learning_rate": 7.222480260102183e-06, "loss": 0.8957, "step": 5862 }, { "epoch": 0.35011345993072973, "grad_norm": 3.5226852893829346, "learning_rate": 7.2218167341251415e-06, "loss": 0.8771, "step": 5863 }, { "epoch": 0.3501731756837454, "grad_norm": 2.801500082015991, "learning_rate": 7.2211532081480996e-06, "loss": 0.8879, "step": 5864 }, { "epoch": 0.350232891436761, "grad_norm": 3.4978420734405518, "learning_rate": 7.220489682171058e-06, "loss": 0.9289, "step": 5865 }, { "epoch": 0.3502926071897767, "grad_norm": 2.194512128829956, "learning_rate": 7.219826156194015e-06, "loss": 0.8988, "step": 5866 }, { "epoch": 0.3503523229427923, "grad_norm": 2.209822416305542, "learning_rate": 7.219162630216974e-06, "loss": 0.894, "step": 5867 }, { "epoch": 0.35041203869580795, "grad_norm": 1.8416651487350464, "learning_rate": 7.218499104239931e-06, "loss": 0.8649, "step": 5868 }, { "epoch": 0.3504717544488236, "grad_norm": 2.3981125354766846, "learning_rate": 7.217835578262889e-06, "loss": 0.8763, "step": 5869 }, { "epoch": 0.3505314702018392, "grad_norm": 3.6242103576660156, "learning_rate": 7.217172052285848e-06, "loss": 0.9121, "step": 5870 }, { "epoch": 0.3505911859548549, "grad_norm": 2.2006092071533203, "learning_rate": 7.216508526308805e-06, "loss": 0.8677, "step": 5871 }, { "epoch": 0.35065090170787055, "grad_norm": 1.9646738767623901, "learning_rate": 7.2158450003317635e-06, "loss": 0.8839, "step": 5872 }, { "epoch": 0.35071061746088616, "grad_norm": 2.458341598510742, "learning_rate": 7.215181474354721e-06, "loss": 0.9027, "step": 5873 }, { "epoch": 0.35077033321390183, "grad_norm": 2.5064570903778076, "learning_rate": 7.21451794837768e-06, "loss": 0.8809, "step": 5874 }, { "epoch": 0.3508300489669175, "grad_norm": 3.0612189769744873, "learning_rate": 7.213854422400638e-06, "loss": 0.858, "step": 5875 }, { "epoch": 0.3508897647199331, "grad_norm": 3.423645496368408, "learning_rate": 7.213190896423595e-06, "loss": 0.8878, "step": 5876 }, { "epoch": 0.35094948047294877, "grad_norm": 3.3225655555725098, "learning_rate": 7.212527370446554e-06, "loss": 0.9072, "step": 5877 }, { "epoch": 0.35100919622596444, "grad_norm": 3.8625454902648926, "learning_rate": 7.211863844469511e-06, "loss": 0.9146, "step": 5878 }, { "epoch": 0.35106891197898005, "grad_norm": 1.5676472187042236, "learning_rate": 7.211200318492469e-06, "loss": 0.8391, "step": 5879 }, { "epoch": 0.3511286277319957, "grad_norm": 1.6527738571166992, "learning_rate": 7.210536792515428e-06, "loss": 0.9047, "step": 5880 }, { "epoch": 0.3511883434850113, "grad_norm": 1.8260760307312012, "learning_rate": 7.209873266538386e-06, "loss": 0.9069, "step": 5881 }, { "epoch": 0.351248059238027, "grad_norm": 2.6229450702667236, "learning_rate": 7.209209740561344e-06, "loss": 0.8407, "step": 5882 }, { "epoch": 0.35130777499104265, "grad_norm": 2.0433852672576904, "learning_rate": 7.208546214584301e-06, "loss": 0.8791, "step": 5883 }, { "epoch": 0.35136749074405826, "grad_norm": 1.7165402173995972, "learning_rate": 7.20788268860726e-06, "loss": 0.8507, "step": 5884 }, { "epoch": 0.35142720649707393, "grad_norm": 3.5808091163635254, "learning_rate": 7.207219162630218e-06, "loss": 0.8107, "step": 5885 }, { "epoch": 0.3514869222500896, "grad_norm": 1.9111111164093018, "learning_rate": 7.206555636653175e-06, "loss": 0.8828, "step": 5886 }, { "epoch": 0.3515466380031052, "grad_norm": 1.9036794900894165, "learning_rate": 7.205892110676133e-06, "loss": 0.9122, "step": 5887 }, { "epoch": 0.35160635375612087, "grad_norm": 2.5018246173858643, "learning_rate": 7.2052285846990914e-06, "loss": 0.8816, "step": 5888 }, { "epoch": 0.35166606950913654, "grad_norm": 7.806820869445801, "learning_rate": 7.2045650587220495e-06, "loss": 0.8987, "step": 5889 }, { "epoch": 0.35172578526215215, "grad_norm": 1.9870986938476562, "learning_rate": 7.203901532745008e-06, "loss": 0.8974, "step": 5890 }, { "epoch": 0.3517855010151678, "grad_norm": 2.511125326156616, "learning_rate": 7.203238006767965e-06, "loss": 0.9012, "step": 5891 }, { "epoch": 0.3518452167681834, "grad_norm": 2.080493211746216, "learning_rate": 7.202574480790924e-06, "loss": 0.8959, "step": 5892 }, { "epoch": 0.3519049325211991, "grad_norm": 2.148383378982544, "learning_rate": 7.201910954813881e-06, "loss": 0.8919, "step": 5893 }, { "epoch": 0.35196464827421475, "grad_norm": 1.780240774154663, "learning_rate": 7.201247428836839e-06, "loss": 0.9019, "step": 5894 }, { "epoch": 0.35202436402723036, "grad_norm": 2.766037702560425, "learning_rate": 7.200583902859798e-06, "loss": 0.8946, "step": 5895 }, { "epoch": 0.35208407978024603, "grad_norm": 2.166388750076294, "learning_rate": 7.199920376882755e-06, "loss": 0.8625, "step": 5896 }, { "epoch": 0.3521437955332617, "grad_norm": 2.5876564979553223, "learning_rate": 7.1992568509057135e-06, "loss": 0.8747, "step": 5897 }, { "epoch": 0.3522035112862773, "grad_norm": 2.595444917678833, "learning_rate": 7.198593324928671e-06, "loss": 0.8947, "step": 5898 }, { "epoch": 0.35226322703929297, "grad_norm": 3.373872995376587, "learning_rate": 7.19792979895163e-06, "loss": 0.9114, "step": 5899 }, { "epoch": 0.35232294279230864, "grad_norm": 1.6384047269821167, "learning_rate": 7.197266272974588e-06, "loss": 0.8583, "step": 5900 }, { "epoch": 0.35238265854532425, "grad_norm": 1.8836172819137573, "learning_rate": 7.196602746997545e-06, "loss": 0.9176, "step": 5901 }, { "epoch": 0.3524423742983399, "grad_norm": 2.0337445735931396, "learning_rate": 7.195939221020504e-06, "loss": 0.8819, "step": 5902 }, { "epoch": 0.3525020900513555, "grad_norm": 2.960400104522705, "learning_rate": 7.195275695043461e-06, "loss": 0.8941, "step": 5903 }, { "epoch": 0.3525618058043712, "grad_norm": 2.2055816650390625, "learning_rate": 7.194612169066419e-06, "loss": 0.8884, "step": 5904 }, { "epoch": 0.35262152155738685, "grad_norm": 2.613844156265259, "learning_rate": 7.193948643089378e-06, "loss": 0.9106, "step": 5905 }, { "epoch": 0.35268123731040246, "grad_norm": 2.331576108932495, "learning_rate": 7.1932851171123355e-06, "loss": 0.8811, "step": 5906 }, { "epoch": 0.35274095306341813, "grad_norm": 1.8798742294311523, "learning_rate": 7.192621591135294e-06, "loss": 0.8656, "step": 5907 }, { "epoch": 0.3528006688164338, "grad_norm": 2.709134101867676, "learning_rate": 7.191958065158251e-06, "loss": 0.9334, "step": 5908 }, { "epoch": 0.3528603845694494, "grad_norm": 2.5596702098846436, "learning_rate": 7.19129453918121e-06, "loss": 0.8631, "step": 5909 }, { "epoch": 0.35292010032246507, "grad_norm": 1.879271149635315, "learning_rate": 7.190631013204168e-06, "loss": 0.8838, "step": 5910 }, { "epoch": 0.35297981607548073, "grad_norm": 4.920459747314453, "learning_rate": 7.189967487227125e-06, "loss": 0.8883, "step": 5911 }, { "epoch": 0.35303953182849634, "grad_norm": 2.2231204509735107, "learning_rate": 7.189303961250083e-06, "loss": 0.8445, "step": 5912 }, { "epoch": 0.353099247581512, "grad_norm": 2.639857530593872, "learning_rate": 7.188640435273041e-06, "loss": 0.8797, "step": 5913 }, { "epoch": 0.3531589633345277, "grad_norm": 3.26000714302063, "learning_rate": 7.1879769092959995e-06, "loss": 0.8786, "step": 5914 }, { "epoch": 0.3532186790875433, "grad_norm": 2.3202567100524902, "learning_rate": 7.187313383318958e-06, "loss": 0.8753, "step": 5915 }, { "epoch": 0.35327839484055895, "grad_norm": 1.916106939315796, "learning_rate": 7.186649857341915e-06, "loss": 0.9064, "step": 5916 }, { "epoch": 0.35333811059357456, "grad_norm": 2.135735273361206, "learning_rate": 7.185986331364874e-06, "loss": 0.8953, "step": 5917 }, { "epoch": 0.3533978263465902, "grad_norm": 1.888261079788208, "learning_rate": 7.185322805387831e-06, "loss": 0.8602, "step": 5918 }, { "epoch": 0.3534575420996059, "grad_norm": 2.507282018661499, "learning_rate": 7.184659279410789e-06, "loss": 0.8968, "step": 5919 }, { "epoch": 0.3535172578526215, "grad_norm": 2.409403085708618, "learning_rate": 7.183995753433748e-06, "loss": 0.942, "step": 5920 }, { "epoch": 0.35357697360563717, "grad_norm": 3.237039804458618, "learning_rate": 7.183332227456705e-06, "loss": 0.8877, "step": 5921 }, { "epoch": 0.35363668935865283, "grad_norm": 3.1131808757781982, "learning_rate": 7.1826687014796635e-06, "loss": 0.8584, "step": 5922 }, { "epoch": 0.35369640511166844, "grad_norm": 2.8887531757354736, "learning_rate": 7.182005175502621e-06, "loss": 0.9021, "step": 5923 }, { "epoch": 0.3537561208646841, "grad_norm": 2.0899903774261475, "learning_rate": 7.18134164952558e-06, "loss": 0.9081, "step": 5924 }, { "epoch": 0.3538158366176998, "grad_norm": 2.328564167022705, "learning_rate": 7.180678123548538e-06, "loss": 0.879, "step": 5925 }, { "epoch": 0.3538755523707154, "grad_norm": 2.1915228366851807, "learning_rate": 7.180014597571495e-06, "loss": 0.9088, "step": 5926 }, { "epoch": 0.35393526812373105, "grad_norm": 2.6457462310791016, "learning_rate": 7.179351071594454e-06, "loss": 0.9188, "step": 5927 }, { "epoch": 0.35399498387674666, "grad_norm": 2.407783269882202, "learning_rate": 7.178687545617411e-06, "loss": 0.8964, "step": 5928 }, { "epoch": 0.3540546996297623, "grad_norm": 2.0622880458831787, "learning_rate": 7.178024019640369e-06, "loss": 0.8877, "step": 5929 }, { "epoch": 0.354114415382778, "grad_norm": 2.3154821395874023, "learning_rate": 7.177360493663328e-06, "loss": 0.9357, "step": 5930 }, { "epoch": 0.3541741311357936, "grad_norm": 1.8843913078308105, "learning_rate": 7.1766969676862855e-06, "loss": 0.8733, "step": 5931 }, { "epoch": 0.35423384688880927, "grad_norm": 1.9959322214126587, "learning_rate": 7.176033441709244e-06, "loss": 0.8907, "step": 5932 }, { "epoch": 0.35429356264182493, "grad_norm": 2.247560501098633, "learning_rate": 7.175369915732201e-06, "loss": 0.8792, "step": 5933 }, { "epoch": 0.35435327839484054, "grad_norm": 2.4763383865356445, "learning_rate": 7.17470638975516e-06, "loss": 0.8763, "step": 5934 }, { "epoch": 0.3544129941478562, "grad_norm": 2.189021110534668, "learning_rate": 7.174042863778118e-06, "loss": 0.8793, "step": 5935 }, { "epoch": 0.3544727099008719, "grad_norm": 2.235232353210449, "learning_rate": 7.173379337801075e-06, "loss": 0.8774, "step": 5936 }, { "epoch": 0.3545324256538875, "grad_norm": 2.099576234817505, "learning_rate": 7.172715811824033e-06, "loss": 0.861, "step": 5937 }, { "epoch": 0.35459214140690315, "grad_norm": 4.598006248474121, "learning_rate": 7.172052285846991e-06, "loss": 0.9003, "step": 5938 }, { "epoch": 0.35465185715991876, "grad_norm": 2.0547690391540527, "learning_rate": 7.1713887598699495e-06, "loss": 0.8799, "step": 5939 }, { "epoch": 0.3547115729129344, "grad_norm": 2.321183204650879, "learning_rate": 7.170725233892908e-06, "loss": 0.9174, "step": 5940 }, { "epoch": 0.3547712886659501, "grad_norm": 5.308407306671143, "learning_rate": 7.170061707915865e-06, "loss": 0.8916, "step": 5941 }, { "epoch": 0.3548310044189657, "grad_norm": 2.0293257236480713, "learning_rate": 7.169398181938824e-06, "loss": 0.869, "step": 5942 }, { "epoch": 0.35489072017198137, "grad_norm": 2.051215648651123, "learning_rate": 7.168734655961781e-06, "loss": 0.8639, "step": 5943 }, { "epoch": 0.35495043592499703, "grad_norm": 2.8341078758239746, "learning_rate": 7.168071129984739e-06, "loss": 0.879, "step": 5944 }, { "epoch": 0.35501015167801264, "grad_norm": 2.2794442176818848, "learning_rate": 7.167407604007698e-06, "loss": 0.8946, "step": 5945 }, { "epoch": 0.3550698674310283, "grad_norm": 2.171692371368408, "learning_rate": 7.166744078030655e-06, "loss": 0.8935, "step": 5946 }, { "epoch": 0.355129583184044, "grad_norm": 2.161839485168457, "learning_rate": 7.1660805520536134e-06, "loss": 0.8567, "step": 5947 }, { "epoch": 0.3551892989370596, "grad_norm": 3.79328989982605, "learning_rate": 7.165417026076571e-06, "loss": 0.8908, "step": 5948 }, { "epoch": 0.35524901469007525, "grad_norm": 1.8234459161758423, "learning_rate": 7.16475350009953e-06, "loss": 0.8616, "step": 5949 }, { "epoch": 0.3553087304430909, "grad_norm": 2.3828890323638916, "learning_rate": 7.164089974122488e-06, "loss": 0.8751, "step": 5950 }, { "epoch": 0.3553684461961065, "grad_norm": 2.1530120372772217, "learning_rate": 7.163426448145445e-06, "loss": 0.8946, "step": 5951 }, { "epoch": 0.3554281619491222, "grad_norm": 1.8162635564804077, "learning_rate": 7.162762922168404e-06, "loss": 0.8293, "step": 5952 }, { "epoch": 0.3554878777021378, "grad_norm": 2.3973171710968018, "learning_rate": 7.162099396191361e-06, "loss": 0.8806, "step": 5953 }, { "epoch": 0.35554759345515347, "grad_norm": 4.525802135467529, "learning_rate": 7.161435870214319e-06, "loss": 0.8804, "step": 5954 }, { "epoch": 0.35560730920816913, "grad_norm": 2.177586078643799, "learning_rate": 7.160772344237278e-06, "loss": 0.9013, "step": 5955 }, { "epoch": 0.35566702496118474, "grad_norm": 3.213754653930664, "learning_rate": 7.1601088182602355e-06, "loss": 0.9037, "step": 5956 }, { "epoch": 0.3557267407142004, "grad_norm": 1.900878667831421, "learning_rate": 7.159445292283194e-06, "loss": 0.8586, "step": 5957 }, { "epoch": 0.3557864564672161, "grad_norm": 2.4557971954345703, "learning_rate": 7.158781766306151e-06, "loss": 0.9204, "step": 5958 }, { "epoch": 0.3558461722202317, "grad_norm": 2.1719112396240234, "learning_rate": 7.15811824032911e-06, "loss": 0.9011, "step": 5959 }, { "epoch": 0.35590588797324735, "grad_norm": 1.8139902353286743, "learning_rate": 7.157454714352068e-06, "loss": 0.8704, "step": 5960 }, { "epoch": 0.355965603726263, "grad_norm": 3.44649076461792, "learning_rate": 7.156791188375025e-06, "loss": 0.8831, "step": 5961 }, { "epoch": 0.3560253194792786, "grad_norm": 2.4747934341430664, "learning_rate": 7.156127662397983e-06, "loss": 0.8825, "step": 5962 }, { "epoch": 0.3560850352322943, "grad_norm": 3.0347084999084473, "learning_rate": 7.155464136420941e-06, "loss": 0.8809, "step": 5963 }, { "epoch": 0.3561447509853099, "grad_norm": 2.2851169109344482, "learning_rate": 7.1548006104438994e-06, "loss": 0.8787, "step": 5964 }, { "epoch": 0.35620446673832556, "grad_norm": 3.6195194721221924, "learning_rate": 7.1541370844668575e-06, "loss": 0.9022, "step": 5965 }, { "epoch": 0.35626418249134123, "grad_norm": 2.6967310905456543, "learning_rate": 7.153473558489815e-06, "loss": 0.9155, "step": 5966 }, { "epoch": 0.35632389824435684, "grad_norm": 1.8493776321411133, "learning_rate": 7.152810032512774e-06, "loss": 0.9113, "step": 5967 }, { "epoch": 0.3563836139973725, "grad_norm": 2.1322715282440186, "learning_rate": 7.152146506535731e-06, "loss": 0.8465, "step": 5968 }, { "epoch": 0.35644332975038817, "grad_norm": 4.592240810394287, "learning_rate": 7.151482980558689e-06, "loss": 0.9072, "step": 5969 }, { "epoch": 0.3565030455034038, "grad_norm": 2.399986982345581, "learning_rate": 7.150819454581648e-06, "loss": 0.876, "step": 5970 }, { "epoch": 0.35656276125641945, "grad_norm": 2.2004995346069336, "learning_rate": 7.150155928604605e-06, "loss": 0.8951, "step": 5971 }, { "epoch": 0.3566224770094351, "grad_norm": 2.133129596710205, "learning_rate": 7.149492402627563e-06, "loss": 0.9019, "step": 5972 }, { "epoch": 0.3566821927624507, "grad_norm": 2.9743945598602295, "learning_rate": 7.148828876650521e-06, "loss": 0.8709, "step": 5973 }, { "epoch": 0.3567419085154664, "grad_norm": 8.074103355407715, "learning_rate": 7.14816535067348e-06, "loss": 0.9226, "step": 5974 }, { "epoch": 0.356801624268482, "grad_norm": 2.449833631515503, "learning_rate": 7.147501824696438e-06, "loss": 0.8843, "step": 5975 }, { "epoch": 0.35686134002149766, "grad_norm": 2.267892360687256, "learning_rate": 7.146838298719395e-06, "loss": 0.894, "step": 5976 }, { "epoch": 0.35692105577451333, "grad_norm": 2.934614658355713, "learning_rate": 7.146174772742354e-06, "loss": 0.8797, "step": 5977 }, { "epoch": 0.35698077152752894, "grad_norm": 2.2037274837493896, "learning_rate": 7.145511246765311e-06, "loss": 0.8652, "step": 5978 }, { "epoch": 0.3570404872805446, "grad_norm": 4.021765232086182, "learning_rate": 7.144847720788269e-06, "loss": 0.8803, "step": 5979 }, { "epoch": 0.35710020303356027, "grad_norm": 1.8367199897766113, "learning_rate": 7.144184194811228e-06, "loss": 0.8886, "step": 5980 }, { "epoch": 0.3571599187865759, "grad_norm": 2.4972217082977295, "learning_rate": 7.1435206688341855e-06, "loss": 0.8918, "step": 5981 }, { "epoch": 0.35721963453959155, "grad_norm": 1.9257891178131104, "learning_rate": 7.1428571428571436e-06, "loss": 0.8173, "step": 5982 }, { "epoch": 0.3572793502926072, "grad_norm": 3.3215932846069336, "learning_rate": 7.142193616880101e-06, "loss": 0.8956, "step": 5983 }, { "epoch": 0.3573390660456228, "grad_norm": 2.1100378036499023, "learning_rate": 7.14153009090306e-06, "loss": 0.8623, "step": 5984 }, { "epoch": 0.3573987817986385, "grad_norm": 3.1677610874176025, "learning_rate": 7.140866564926018e-06, "loss": 0.848, "step": 5985 }, { "epoch": 0.35745849755165415, "grad_norm": 2.6384546756744385, "learning_rate": 7.140203038948975e-06, "loss": 0.8541, "step": 5986 }, { "epoch": 0.35751821330466976, "grad_norm": 2.3210577964782715, "learning_rate": 7.139539512971933e-06, "loss": 0.881, "step": 5987 }, { "epoch": 0.35757792905768543, "grad_norm": 1.882378101348877, "learning_rate": 7.138875986994891e-06, "loss": 0.8498, "step": 5988 }, { "epoch": 0.35763764481070104, "grad_norm": 2.194178819656372, "learning_rate": 7.138212461017849e-06, "loss": 0.8911, "step": 5989 }, { "epoch": 0.3576973605637167, "grad_norm": 2.394319772720337, "learning_rate": 7.1375489350408075e-06, "loss": 0.9083, "step": 5990 }, { "epoch": 0.35775707631673237, "grad_norm": 2.5109457969665527, "learning_rate": 7.136885409063765e-06, "loss": 0.8842, "step": 5991 }, { "epoch": 0.357816792069748, "grad_norm": 5.655229568481445, "learning_rate": 7.136221883086724e-06, "loss": 0.8833, "step": 5992 }, { "epoch": 0.35787650782276365, "grad_norm": 4.457569122314453, "learning_rate": 7.135558357109681e-06, "loss": 0.855, "step": 5993 }, { "epoch": 0.3579362235757793, "grad_norm": 2.6267731189727783, "learning_rate": 7.134894831132639e-06, "loss": 0.862, "step": 5994 }, { "epoch": 0.3579959393287949, "grad_norm": 3.6189379692077637, "learning_rate": 7.134231305155598e-06, "loss": 0.8757, "step": 5995 }, { "epoch": 0.3580556550818106, "grad_norm": 3.09456467628479, "learning_rate": 7.133567779178555e-06, "loss": 0.9196, "step": 5996 }, { "epoch": 0.35811537083482625, "grad_norm": 3.5009829998016357, "learning_rate": 7.132904253201513e-06, "loss": 0.8993, "step": 5997 }, { "epoch": 0.35817508658784186, "grad_norm": 2.245941638946533, "learning_rate": 7.132240727224471e-06, "loss": 0.8803, "step": 5998 }, { "epoch": 0.3582348023408575, "grad_norm": 3.9562511444091797, "learning_rate": 7.13157720124743e-06, "loss": 0.9076, "step": 5999 }, { "epoch": 0.35829451809387314, "grad_norm": 2.99696683883667, "learning_rate": 7.130913675270388e-06, "loss": 0.906, "step": 6000 }, { "epoch": 0.35829451809387314, "eval_text_loss": 0.9317573308944702, "eval_text_runtime": 15.1798, "eval_text_samples_per_second": 263.508, "eval_text_steps_per_second": 0.527, "step": 6000 }, { "epoch": 0.35829451809387314, "eval_image_loss": 0.6492756605148315, "eval_image_runtime": 5.0416, "eval_image_samples_per_second": 793.393, "eval_image_steps_per_second": 1.587, "step": 6000 }, { "epoch": 0.35829451809387314, "eval_video_loss": 1.1059513092041016, "eval_video_runtime": 76.5499, "eval_video_samples_per_second": 52.254, "eval_video_steps_per_second": 0.105, "step": 6000 }, { "epoch": 0.3583542338468888, "grad_norm": 2.3146915435791016, "learning_rate": 7.130250149293345e-06, "loss": 0.8953, "step": 6001 }, { "epoch": 0.35841394959990447, "grad_norm": 2.427917242050171, "learning_rate": 7.129586623316304e-06, "loss": 0.917, "step": 6002 }, { "epoch": 0.3584736653529201, "grad_norm": 5.151915550231934, "learning_rate": 7.128923097339261e-06, "loss": 0.8966, "step": 6003 }, { "epoch": 0.35853338110593574, "grad_norm": 1.7452532052993774, "learning_rate": 7.128259571362219e-06, "loss": 0.8573, "step": 6004 }, { "epoch": 0.3585930968589514, "grad_norm": 1.8122307062149048, "learning_rate": 7.127596045385178e-06, "loss": 0.8394, "step": 6005 }, { "epoch": 0.358652812611967, "grad_norm": 1.9077218770980835, "learning_rate": 7.1269325194081354e-06, "loss": 0.8582, "step": 6006 }, { "epoch": 0.3587125283649827, "grad_norm": 3.200713634490967, "learning_rate": 7.1262689934310935e-06, "loss": 0.8959, "step": 6007 }, { "epoch": 0.35877224411799835, "grad_norm": 9.166791915893555, "learning_rate": 7.125605467454051e-06, "loss": 0.8859, "step": 6008 }, { "epoch": 0.35883195987101396, "grad_norm": 2.6378188133239746, "learning_rate": 7.12494194147701e-06, "loss": 0.8667, "step": 6009 }, { "epoch": 0.3588916756240296, "grad_norm": 2.0158777236938477, "learning_rate": 7.124278415499968e-06, "loss": 0.8581, "step": 6010 }, { "epoch": 0.35895139137704524, "grad_norm": 2.1428277492523193, "learning_rate": 7.123614889522925e-06, "loss": 0.8797, "step": 6011 }, { "epoch": 0.3590111071300609, "grad_norm": 4.5248026847839355, "learning_rate": 7.122951363545883e-06, "loss": 0.8809, "step": 6012 }, { "epoch": 0.35907082288307657, "grad_norm": 4.1088972091674805, "learning_rate": 7.122287837568841e-06, "loss": 0.8738, "step": 6013 }, { "epoch": 0.3591305386360922, "grad_norm": 7.012158393859863, "learning_rate": 7.121624311591799e-06, "loss": 0.8759, "step": 6014 }, { "epoch": 0.35919025438910784, "grad_norm": 3.2081222534179688, "learning_rate": 7.1209607856147575e-06, "loss": 0.8709, "step": 6015 }, { "epoch": 0.3592499701421235, "grad_norm": 2.659184455871582, "learning_rate": 7.120297259637715e-06, "loss": 0.9071, "step": 6016 }, { "epoch": 0.3593096858951391, "grad_norm": 5.017912864685059, "learning_rate": 7.119633733660674e-06, "loss": 0.862, "step": 6017 }, { "epoch": 0.3593694016481548, "grad_norm": 2.668015241622925, "learning_rate": 7.118970207683631e-06, "loss": 0.8515, "step": 6018 }, { "epoch": 0.35942911740117045, "grad_norm": 3.241328477859497, "learning_rate": 7.118306681706589e-06, "loss": 0.868, "step": 6019 }, { "epoch": 0.35948883315418606, "grad_norm": 4.556182861328125, "learning_rate": 7.117643155729548e-06, "loss": 0.8654, "step": 6020 }, { "epoch": 0.3595485489072017, "grad_norm": 3.8438587188720703, "learning_rate": 7.116979629752505e-06, "loss": 0.8925, "step": 6021 }, { "epoch": 0.3596082646602174, "grad_norm": 1.9844812154769897, "learning_rate": 7.116316103775463e-06, "loss": 0.8993, "step": 6022 }, { "epoch": 0.359667980413233, "grad_norm": 2.683786630630493, "learning_rate": 7.115652577798421e-06, "loss": 0.8855, "step": 6023 }, { "epoch": 0.35972769616624867, "grad_norm": 1.9635369777679443, "learning_rate": 7.1149890518213795e-06, "loss": 0.9161, "step": 6024 }, { "epoch": 0.3597874119192643, "grad_norm": 12.537109375, "learning_rate": 7.114325525844338e-06, "loss": 0.8875, "step": 6025 }, { "epoch": 0.35984712767227994, "grad_norm": 2.207958936691284, "learning_rate": 7.113661999867295e-06, "loss": 0.8497, "step": 6026 }, { "epoch": 0.3599068434252956, "grad_norm": 2.6875455379486084, "learning_rate": 7.112998473890254e-06, "loss": 0.8554, "step": 6027 }, { "epoch": 0.3599665591783112, "grad_norm": 1.8463044166564941, "learning_rate": 7.112334947913211e-06, "loss": 0.8986, "step": 6028 }, { "epoch": 0.3600262749313269, "grad_norm": 4.084487438201904, "learning_rate": 7.111671421936169e-06, "loss": 0.9062, "step": 6029 }, { "epoch": 0.36008599068434255, "grad_norm": 2.5692057609558105, "learning_rate": 7.111007895959128e-06, "loss": 0.8538, "step": 6030 }, { "epoch": 0.36014570643735816, "grad_norm": 2.6447317600250244, "learning_rate": 7.110344369982085e-06, "loss": 0.8637, "step": 6031 }, { "epoch": 0.3602054221903738, "grad_norm": 3.8787357807159424, "learning_rate": 7.1096808440050435e-06, "loss": 0.8601, "step": 6032 }, { "epoch": 0.3602651379433895, "grad_norm": 1.7879424095153809, "learning_rate": 7.109017318028001e-06, "loss": 0.8716, "step": 6033 }, { "epoch": 0.3603248536964051, "grad_norm": 2.201040744781494, "learning_rate": 7.10835379205096e-06, "loss": 0.9024, "step": 6034 }, { "epoch": 0.36038456944942077, "grad_norm": 2.318004608154297, "learning_rate": 7.107690266073918e-06, "loss": 0.8859, "step": 6035 }, { "epoch": 0.3604442852024364, "grad_norm": 3.876101493835449, "learning_rate": 7.107026740096875e-06, "loss": 0.8573, "step": 6036 }, { "epoch": 0.36050400095545204, "grad_norm": 2.8646745681762695, "learning_rate": 7.106363214119833e-06, "loss": 0.89, "step": 6037 }, { "epoch": 0.3605637167084677, "grad_norm": 2.431584596633911, "learning_rate": 7.105699688142791e-06, "loss": 0.9106, "step": 6038 }, { "epoch": 0.3606234324614833, "grad_norm": 2.743497610092163, "learning_rate": 7.105036162165749e-06, "loss": 0.8756, "step": 6039 }, { "epoch": 0.360683148214499, "grad_norm": 2.5895440578460693, "learning_rate": 7.1043726361887075e-06, "loss": 0.8892, "step": 6040 }, { "epoch": 0.36074286396751465, "grad_norm": 2.907754421234131, "learning_rate": 7.103709110211665e-06, "loss": 0.8683, "step": 6041 }, { "epoch": 0.36080257972053026, "grad_norm": 2.493793249130249, "learning_rate": 7.103045584234624e-06, "loss": 0.8805, "step": 6042 }, { "epoch": 0.3608622954735459, "grad_norm": 4.762048244476318, "learning_rate": 7.102382058257581e-06, "loss": 0.9065, "step": 6043 }, { "epoch": 0.3609220112265616, "grad_norm": 3.3999252319335938, "learning_rate": 7.101718532280539e-06, "loss": 0.8981, "step": 6044 }, { "epoch": 0.3609817269795772, "grad_norm": 2.449108123779297, "learning_rate": 7.101055006303498e-06, "loss": 0.8706, "step": 6045 }, { "epoch": 0.36104144273259287, "grad_norm": 3.410275936126709, "learning_rate": 7.100391480326455e-06, "loss": 0.9007, "step": 6046 }, { "epoch": 0.36110115848560853, "grad_norm": 5.820784091949463, "learning_rate": 7.099727954349413e-06, "loss": 0.8846, "step": 6047 }, { "epoch": 0.36116087423862414, "grad_norm": 3.7390904426574707, "learning_rate": 7.0990644283723706e-06, "loss": 0.8775, "step": 6048 }, { "epoch": 0.3612205899916398, "grad_norm": 2.4062535762786865, "learning_rate": 7.0984009023953295e-06, "loss": 0.9166, "step": 6049 }, { "epoch": 0.3612803057446554, "grad_norm": 2.382439374923706, "learning_rate": 7.097737376418288e-06, "loss": 0.887, "step": 6050 }, { "epoch": 0.3613400214976711, "grad_norm": 2.1099088191986084, "learning_rate": 7.097073850441245e-06, "loss": 0.8669, "step": 6051 }, { "epoch": 0.36139973725068675, "grad_norm": 2.3470447063446045, "learning_rate": 7.096410324464204e-06, "loss": 0.8785, "step": 6052 }, { "epoch": 0.36145945300370236, "grad_norm": 2.1774063110351562, "learning_rate": 7.095746798487161e-06, "loss": 0.8897, "step": 6053 }, { "epoch": 0.361519168756718, "grad_norm": 1.6360652446746826, "learning_rate": 7.095083272510119e-06, "loss": 0.8588, "step": 6054 }, { "epoch": 0.3615788845097337, "grad_norm": 2.470231771469116, "learning_rate": 7.094419746533078e-06, "loss": 0.8846, "step": 6055 }, { "epoch": 0.3616386002627493, "grad_norm": 2.9311375617980957, "learning_rate": 7.093756220556035e-06, "loss": 0.8836, "step": 6056 }, { "epoch": 0.36169831601576496, "grad_norm": 2.7312605381011963, "learning_rate": 7.0930926945789935e-06, "loss": 0.8588, "step": 6057 }, { "epoch": 0.36175803176878063, "grad_norm": 1.8374032974243164, "learning_rate": 7.092429168601951e-06, "loss": 0.8606, "step": 6058 }, { "epoch": 0.36181774752179624, "grad_norm": 2.566206216812134, "learning_rate": 7.09176564262491e-06, "loss": 0.8949, "step": 6059 }, { "epoch": 0.3618774632748119, "grad_norm": 2.6718757152557373, "learning_rate": 7.091102116647868e-06, "loss": 0.9232, "step": 6060 }, { "epoch": 0.3619371790278275, "grad_norm": 2.3347110748291016, "learning_rate": 7.090438590670825e-06, "loss": 0.8647, "step": 6061 }, { "epoch": 0.3619968947808432, "grad_norm": 2.546480417251587, "learning_rate": 7.089775064693783e-06, "loss": 0.8701, "step": 6062 }, { "epoch": 0.36205661053385885, "grad_norm": 2.548567056655884, "learning_rate": 7.089111538716741e-06, "loss": 0.902, "step": 6063 }, { "epoch": 0.36211632628687446, "grad_norm": 2.0656704902648926, "learning_rate": 7.088448012739699e-06, "loss": 0.875, "step": 6064 }, { "epoch": 0.3621760420398901, "grad_norm": 1.848469614982605, "learning_rate": 7.0877844867626574e-06, "loss": 0.9064, "step": 6065 }, { "epoch": 0.3622357577929058, "grad_norm": 3.4190099239349365, "learning_rate": 7.087120960785615e-06, "loss": 0.8658, "step": 6066 }, { "epoch": 0.3622954735459214, "grad_norm": 3.0621066093444824, "learning_rate": 7.086457434808574e-06, "loss": 0.8469, "step": 6067 }, { "epoch": 0.36235518929893706, "grad_norm": 2.7920992374420166, "learning_rate": 7.085793908831531e-06, "loss": 0.9037, "step": 6068 }, { "epoch": 0.36241490505195273, "grad_norm": 6.541232585906982, "learning_rate": 7.085130382854489e-06, "loss": 0.9125, "step": 6069 }, { "epoch": 0.36247462080496834, "grad_norm": 2.313913106918335, "learning_rate": 7.084466856877448e-06, "loss": 0.9031, "step": 6070 }, { "epoch": 0.362534336557984, "grad_norm": 2.2296979427337646, "learning_rate": 7.083803330900405e-06, "loss": 0.8974, "step": 6071 }, { "epoch": 0.3625940523109996, "grad_norm": 2.100003719329834, "learning_rate": 7.083139804923363e-06, "loss": 0.9171, "step": 6072 }, { "epoch": 0.3626537680640153, "grad_norm": 3.7990500926971436, "learning_rate": 7.0824762789463205e-06, "loss": 0.906, "step": 6073 }, { "epoch": 0.36271348381703095, "grad_norm": 2.1049141883850098, "learning_rate": 7.0818127529692795e-06, "loss": 0.8813, "step": 6074 }, { "epoch": 0.36277319957004656, "grad_norm": 1.908262014389038, "learning_rate": 7.081149226992238e-06, "loss": 0.8511, "step": 6075 }, { "epoch": 0.3628329153230622, "grad_norm": 1.6740738153457642, "learning_rate": 7.080485701015195e-06, "loss": 0.8923, "step": 6076 }, { "epoch": 0.3628926310760779, "grad_norm": 3.620704412460327, "learning_rate": 7.079822175038154e-06, "loss": 0.8899, "step": 6077 }, { "epoch": 0.3629523468290935, "grad_norm": 4.518049240112305, "learning_rate": 7.079158649061111e-06, "loss": 0.8702, "step": 6078 }, { "epoch": 0.36301206258210916, "grad_norm": 1.930357575416565, "learning_rate": 7.078495123084069e-06, "loss": 0.9097, "step": 6079 }, { "epoch": 0.36307177833512483, "grad_norm": 3.1978299617767334, "learning_rate": 7.077831597107028e-06, "loss": 0.8933, "step": 6080 }, { "epoch": 0.36313149408814044, "grad_norm": 3.556992769241333, "learning_rate": 7.077168071129985e-06, "loss": 0.8651, "step": 6081 }, { "epoch": 0.3631912098411561, "grad_norm": 3.1493210792541504, "learning_rate": 7.0765045451529434e-06, "loss": 0.937, "step": 6082 }, { "epoch": 0.36325092559417177, "grad_norm": 1.8813966512680054, "learning_rate": 7.075841019175901e-06, "loss": 0.8703, "step": 6083 }, { "epoch": 0.3633106413471874, "grad_norm": 4.045205116271973, "learning_rate": 7.07517749319886e-06, "loss": 0.9339, "step": 6084 }, { "epoch": 0.36337035710020305, "grad_norm": 2.0452096462249756, "learning_rate": 7.074513967221818e-06, "loss": 0.8973, "step": 6085 }, { "epoch": 0.36343007285321866, "grad_norm": 2.39729642868042, "learning_rate": 7.073850441244775e-06, "loss": 0.9194, "step": 6086 }, { "epoch": 0.3634897886062343, "grad_norm": 3.5297839641571045, "learning_rate": 7.073186915267733e-06, "loss": 0.8938, "step": 6087 }, { "epoch": 0.36354950435925, "grad_norm": 1.9122891426086426, "learning_rate": 7.072523389290691e-06, "loss": 0.8912, "step": 6088 }, { "epoch": 0.3636092201122656, "grad_norm": 3.236347198486328, "learning_rate": 7.071859863313649e-06, "loss": 0.8889, "step": 6089 }, { "epoch": 0.36366893586528126, "grad_norm": 3.716641664505005, "learning_rate": 7.071196337336607e-06, "loss": 0.9099, "step": 6090 }, { "epoch": 0.3637286516182969, "grad_norm": 2.8055574893951416, "learning_rate": 7.070532811359565e-06, "loss": 0.8845, "step": 6091 }, { "epoch": 0.36378836737131254, "grad_norm": 2.268010139465332, "learning_rate": 7.069869285382524e-06, "loss": 0.8952, "step": 6092 }, { "epoch": 0.3638480831243282, "grad_norm": 2.1320464611053467, "learning_rate": 7.069205759405481e-06, "loss": 0.9071, "step": 6093 }, { "epoch": 0.36390779887734387, "grad_norm": 3.645181655883789, "learning_rate": 7.068542233428439e-06, "loss": 0.8302, "step": 6094 }, { "epoch": 0.3639675146303595, "grad_norm": 2.123093605041504, "learning_rate": 7.067878707451398e-06, "loss": 0.9154, "step": 6095 }, { "epoch": 0.36402723038337514, "grad_norm": 3.3917248249053955, "learning_rate": 7.067215181474355e-06, "loss": 0.8757, "step": 6096 }, { "epoch": 0.36408694613639075, "grad_norm": 1.7827184200286865, "learning_rate": 7.066551655497313e-06, "loss": 0.8257, "step": 6097 }, { "epoch": 0.3641466618894064, "grad_norm": 1.831627607345581, "learning_rate": 7.0658881295202705e-06, "loss": 0.8784, "step": 6098 }, { "epoch": 0.3642063776424221, "grad_norm": 3.828812837600708, "learning_rate": 7.0652246035432295e-06, "loss": 0.8913, "step": 6099 }, { "epoch": 0.3642660933954377, "grad_norm": 2.3863158226013184, "learning_rate": 7.0645610775661876e-06, "loss": 0.8819, "step": 6100 }, { "epoch": 0.36432580914845336, "grad_norm": 2.685209035873413, "learning_rate": 7.063897551589145e-06, "loss": 0.8895, "step": 6101 }, { "epoch": 0.364385524901469, "grad_norm": 2.674095630645752, "learning_rate": 7.063234025612104e-06, "loss": 0.8864, "step": 6102 }, { "epoch": 0.36444524065448464, "grad_norm": 2.310727596282959, "learning_rate": 7.062570499635061e-06, "loss": 0.8806, "step": 6103 }, { "epoch": 0.3645049564075003, "grad_norm": 2.234956741333008, "learning_rate": 7.061906973658019e-06, "loss": 0.8857, "step": 6104 }, { "epoch": 0.36456467216051597, "grad_norm": 4.89149284362793, "learning_rate": 7.061243447680978e-06, "loss": 0.9197, "step": 6105 }, { "epoch": 0.3646243879135316, "grad_norm": 5.379378318786621, "learning_rate": 7.060579921703935e-06, "loss": 0.8668, "step": 6106 }, { "epoch": 0.36468410366654724, "grad_norm": 2.560926914215088, "learning_rate": 7.059916395726893e-06, "loss": 0.899, "step": 6107 }, { "epoch": 0.36474381941956285, "grad_norm": 2.6858811378479004, "learning_rate": 7.059252869749851e-06, "loss": 0.931, "step": 6108 }, { "epoch": 0.3648035351725785, "grad_norm": 1.9560866355895996, "learning_rate": 7.05858934377281e-06, "loss": 0.9124, "step": 6109 }, { "epoch": 0.3648632509255942, "grad_norm": 2.413038969039917, "learning_rate": 7.057925817795768e-06, "loss": 0.836, "step": 6110 }, { "epoch": 0.3649229666786098, "grad_norm": 3.0334813594818115, "learning_rate": 7.057262291818725e-06, "loss": 0.8883, "step": 6111 }, { "epoch": 0.36498268243162546, "grad_norm": 2.2690303325653076, "learning_rate": 7.056598765841683e-06, "loss": 0.8565, "step": 6112 }, { "epoch": 0.3650423981846411, "grad_norm": 3.0626556873321533, "learning_rate": 7.055935239864641e-06, "loss": 0.9047, "step": 6113 }, { "epoch": 0.36510211393765674, "grad_norm": 2.8203141689300537, "learning_rate": 7.055271713887599e-06, "loss": 0.8643, "step": 6114 }, { "epoch": 0.3651618296906724, "grad_norm": 2.081015110015869, "learning_rate": 7.054608187910557e-06, "loss": 0.8754, "step": 6115 }, { "epoch": 0.36522154544368807, "grad_norm": 1.8667758703231812, "learning_rate": 7.053944661933515e-06, "loss": 0.8826, "step": 6116 }, { "epoch": 0.3652812611967037, "grad_norm": 2.1716010570526123, "learning_rate": 7.0532811359564736e-06, "loss": 0.8796, "step": 6117 }, { "epoch": 0.36534097694971934, "grad_norm": 2.142944574356079, "learning_rate": 7.052617609979431e-06, "loss": 0.8918, "step": 6118 }, { "epoch": 0.365400692702735, "grad_norm": 2.4136438369750977, "learning_rate": 7.051954084002389e-06, "loss": 0.8528, "step": 6119 }, { "epoch": 0.3654604084557506, "grad_norm": 2.279057025909424, "learning_rate": 7.051290558025348e-06, "loss": 0.836, "step": 6120 }, { "epoch": 0.3655201242087663, "grad_norm": 2.7274484634399414, "learning_rate": 7.050627032048305e-06, "loss": 0.9189, "step": 6121 }, { "epoch": 0.3655798399617819, "grad_norm": 2.7916743755340576, "learning_rate": 7.049963506071263e-06, "loss": 0.9083, "step": 6122 }, { "epoch": 0.36563955571479756, "grad_norm": 1.723649024963379, "learning_rate": 7.0492999800942205e-06, "loss": 0.9224, "step": 6123 }, { "epoch": 0.3656992714678132, "grad_norm": 23.96551513671875, "learning_rate": 7.0486364541171794e-06, "loss": 0.9113, "step": 6124 }, { "epoch": 0.36575898722082884, "grad_norm": 2.045348644256592, "learning_rate": 7.0479729281401375e-06, "loss": 0.8917, "step": 6125 }, { "epoch": 0.3658187029738445, "grad_norm": 2.0333847999572754, "learning_rate": 7.047309402163095e-06, "loss": 0.8793, "step": 6126 }, { "epoch": 0.36587841872686017, "grad_norm": 2.886321783065796, "learning_rate": 7.046645876186054e-06, "loss": 0.8644, "step": 6127 }, { "epoch": 0.3659381344798758, "grad_norm": 1.8156688213348389, "learning_rate": 7.045982350209011e-06, "loss": 0.8655, "step": 6128 }, { "epoch": 0.36599785023289144, "grad_norm": 2.5751163959503174, "learning_rate": 7.045318824231969e-06, "loss": 0.8507, "step": 6129 }, { "epoch": 0.3660575659859071, "grad_norm": 1.945387363433838, "learning_rate": 7.044655298254928e-06, "loss": 0.8811, "step": 6130 }, { "epoch": 0.3661172817389227, "grad_norm": 2.3210887908935547, "learning_rate": 7.043991772277885e-06, "loss": 0.8655, "step": 6131 }, { "epoch": 0.3661769974919384, "grad_norm": 2.2066826820373535, "learning_rate": 7.043328246300843e-06, "loss": 0.8879, "step": 6132 }, { "epoch": 0.366236713244954, "grad_norm": 2.8492448329925537, "learning_rate": 7.042664720323801e-06, "loss": 0.8786, "step": 6133 }, { "epoch": 0.36629642899796966, "grad_norm": 2.10709285736084, "learning_rate": 7.04200119434676e-06, "loss": 0.9106, "step": 6134 }, { "epoch": 0.3663561447509853, "grad_norm": 9.391310691833496, "learning_rate": 7.041337668369718e-06, "loss": 0.9032, "step": 6135 }, { "epoch": 0.36641586050400093, "grad_norm": 3.3035645484924316, "learning_rate": 7.040674142392675e-06, "loss": 0.9183, "step": 6136 }, { "epoch": 0.3664755762570166, "grad_norm": 4.340754985809326, "learning_rate": 7.040010616415633e-06, "loss": 0.9027, "step": 6137 }, { "epoch": 0.36653529201003227, "grad_norm": 1.7909094095230103, "learning_rate": 7.039347090438591e-06, "loss": 0.8839, "step": 6138 }, { "epoch": 0.3665950077630479, "grad_norm": 5.119968891143799, "learning_rate": 7.038683564461549e-06, "loss": 0.9025, "step": 6139 }, { "epoch": 0.36665472351606354, "grad_norm": 2.1649794578552246, "learning_rate": 7.038020038484507e-06, "loss": 0.8688, "step": 6140 }, { "epoch": 0.3667144392690792, "grad_norm": 3.102297306060791, "learning_rate": 7.037356512507465e-06, "loss": 0.9013, "step": 6141 }, { "epoch": 0.3667741550220948, "grad_norm": 2.3763427734375, "learning_rate": 7.0366929865304235e-06, "loss": 0.8788, "step": 6142 }, { "epoch": 0.3668338707751105, "grad_norm": 3.329679250717163, "learning_rate": 7.036029460553381e-06, "loss": 0.8824, "step": 6143 }, { "epoch": 0.3668935865281261, "grad_norm": 3.2864818572998047, "learning_rate": 7.035365934576339e-06, "loss": 0.8718, "step": 6144 }, { "epoch": 0.36695330228114176, "grad_norm": 2.2261576652526855, "learning_rate": 7.034702408599298e-06, "loss": 0.8781, "step": 6145 }, { "epoch": 0.3670130180341574, "grad_norm": 2.3441295623779297, "learning_rate": 7.034038882622255e-06, "loss": 0.8998, "step": 6146 }, { "epoch": 0.36707273378717303, "grad_norm": 2.314349412918091, "learning_rate": 7.033375356645213e-06, "loss": 0.852, "step": 6147 }, { "epoch": 0.3671324495401887, "grad_norm": 1.8326432704925537, "learning_rate": 7.0327118306681705e-06, "loss": 0.9254, "step": 6148 }, { "epoch": 0.36719216529320436, "grad_norm": 2.341346263885498, "learning_rate": 7.032048304691129e-06, "loss": 0.8639, "step": 6149 }, { "epoch": 0.36725188104622, "grad_norm": 2.9421584606170654, "learning_rate": 7.0313847787140875e-06, "loss": 0.9332, "step": 6150 }, { "epoch": 0.36731159679923564, "grad_norm": 2.716198205947876, "learning_rate": 7.030721252737045e-06, "loss": 0.8339, "step": 6151 }, { "epoch": 0.3673713125522513, "grad_norm": 2.629920482635498, "learning_rate": 7.030057726760004e-06, "loss": 0.8638, "step": 6152 }, { "epoch": 0.3674310283052669, "grad_norm": 2.129835367202759, "learning_rate": 7.029394200782961e-06, "loss": 0.8634, "step": 6153 }, { "epoch": 0.3674907440582826, "grad_norm": 3.184645414352417, "learning_rate": 7.028730674805919e-06, "loss": 0.8476, "step": 6154 }, { "epoch": 0.36755045981129825, "grad_norm": 2.5502686500549316, "learning_rate": 7.028067148828878e-06, "loss": 0.9165, "step": 6155 }, { "epoch": 0.36761017556431386, "grad_norm": 1.8069132566452026, "learning_rate": 7.027403622851835e-06, "loss": 0.8784, "step": 6156 }, { "epoch": 0.3676698913173295, "grad_norm": 2.5461697578430176, "learning_rate": 7.026740096874793e-06, "loss": 0.9079, "step": 6157 }, { "epoch": 0.36772960707034513, "grad_norm": 2.133727788925171, "learning_rate": 7.026076570897751e-06, "loss": 0.8736, "step": 6158 }, { "epoch": 0.3677893228233608, "grad_norm": 3.685283899307251, "learning_rate": 7.0254130449207096e-06, "loss": 0.8667, "step": 6159 }, { "epoch": 0.36784903857637646, "grad_norm": 1.9261219501495361, "learning_rate": 7.024749518943668e-06, "loss": 0.8662, "step": 6160 }, { "epoch": 0.3679087543293921, "grad_norm": 2.796137571334839, "learning_rate": 7.024085992966625e-06, "loss": 0.8335, "step": 6161 }, { "epoch": 0.36796847008240774, "grad_norm": 2.3608806133270264, "learning_rate": 7.023422466989583e-06, "loss": 0.8886, "step": 6162 }, { "epoch": 0.3680281858354234, "grad_norm": 2.9266841411590576, "learning_rate": 7.022758941012541e-06, "loss": 0.9044, "step": 6163 }, { "epoch": 0.368087901588439, "grad_norm": 2.1249256134033203, "learning_rate": 7.022095415035499e-06, "loss": 0.8614, "step": 6164 }, { "epoch": 0.3681476173414547, "grad_norm": 2.0594866275787354, "learning_rate": 7.021431889058457e-06, "loss": 0.8431, "step": 6165 }, { "epoch": 0.36820733309447035, "grad_norm": 2.249013900756836, "learning_rate": 7.0207683630814146e-06, "loss": 0.8965, "step": 6166 }, { "epoch": 0.36826704884748596, "grad_norm": 2.6223304271698, "learning_rate": 7.0201048371043735e-06, "loss": 0.8755, "step": 6167 }, { "epoch": 0.3683267646005016, "grad_norm": 2.0748255252838135, "learning_rate": 7.019441311127331e-06, "loss": 0.8994, "step": 6168 }, { "epoch": 0.36838648035351723, "grad_norm": 3.547548294067383, "learning_rate": 7.018777785150289e-06, "loss": 0.88, "step": 6169 }, { "epoch": 0.3684461961065329, "grad_norm": 2.1415600776672363, "learning_rate": 7.018114259173248e-06, "loss": 0.8815, "step": 6170 }, { "epoch": 0.36850591185954856, "grad_norm": 1.961061716079712, "learning_rate": 7.017450733196205e-06, "loss": 0.8632, "step": 6171 }, { "epoch": 0.3685656276125642, "grad_norm": 2.0114853382110596, "learning_rate": 7.016787207219163e-06, "loss": 0.8871, "step": 6172 }, { "epoch": 0.36862534336557984, "grad_norm": 2.1755897998809814, "learning_rate": 7.0161236812421204e-06, "loss": 0.9097, "step": 6173 }, { "epoch": 0.3686850591185955, "grad_norm": 5.409998416900635, "learning_rate": 7.015460155265079e-06, "loss": 0.8941, "step": 6174 }, { "epoch": 0.3687447748716111, "grad_norm": 3.0266711711883545, "learning_rate": 7.0147966292880375e-06, "loss": 0.8999, "step": 6175 }, { "epoch": 0.3688044906246268, "grad_norm": 1.7864065170288086, "learning_rate": 7.014133103310995e-06, "loss": 0.8383, "step": 6176 }, { "epoch": 0.36886420637764245, "grad_norm": 6.224855899810791, "learning_rate": 7.013469577333954e-06, "loss": 0.8626, "step": 6177 }, { "epoch": 0.36892392213065806, "grad_norm": 2.1548755168914795, "learning_rate": 7.012806051356911e-06, "loss": 0.8976, "step": 6178 }, { "epoch": 0.3689836378836737, "grad_norm": 2.9758095741271973, "learning_rate": 7.012142525379869e-06, "loss": 0.8803, "step": 6179 }, { "epoch": 0.3690433536366894, "grad_norm": 3.1125266551971436, "learning_rate": 7.011478999402828e-06, "loss": 0.9362, "step": 6180 }, { "epoch": 0.369103069389705, "grad_norm": 2.3606855869293213, "learning_rate": 7.010815473425785e-06, "loss": 0.8932, "step": 6181 }, { "epoch": 0.36916278514272066, "grad_norm": 1.957506775856018, "learning_rate": 7.010151947448743e-06, "loss": 0.8851, "step": 6182 }, { "epoch": 0.36922250089573627, "grad_norm": 4.644923210144043, "learning_rate": 7.009488421471701e-06, "loss": 0.8809, "step": 6183 }, { "epoch": 0.36928221664875194, "grad_norm": 2.688344955444336, "learning_rate": 7.0088248954946595e-06, "loss": 0.8795, "step": 6184 }, { "epoch": 0.3693419324017676, "grad_norm": 1.8274781703948975, "learning_rate": 7.008161369517618e-06, "loss": 0.9034, "step": 6185 }, { "epoch": 0.3694016481547832, "grad_norm": 2.121288537979126, "learning_rate": 7.007497843540575e-06, "loss": 0.8631, "step": 6186 }, { "epoch": 0.3694613639077989, "grad_norm": 2.391324758529663, "learning_rate": 7.006834317563533e-06, "loss": 0.9246, "step": 6187 }, { "epoch": 0.36952107966081454, "grad_norm": 3.3660244941711426, "learning_rate": 7.006170791586491e-06, "loss": 0.8941, "step": 6188 }, { "epoch": 0.36958079541383015, "grad_norm": 2.1591689586639404, "learning_rate": 7.005507265609449e-06, "loss": 0.8719, "step": 6189 }, { "epoch": 0.3696405111668458, "grad_norm": 4.99747896194458, "learning_rate": 7.004843739632407e-06, "loss": 0.8344, "step": 6190 }, { "epoch": 0.3697002269198615, "grad_norm": 2.2281956672668457, "learning_rate": 7.0041802136553645e-06, "loss": 0.8949, "step": 6191 }, { "epoch": 0.3697599426728771, "grad_norm": 3.0991992950439453, "learning_rate": 7.0035166876783235e-06, "loss": 0.8841, "step": 6192 }, { "epoch": 0.36981965842589276, "grad_norm": 3.0833091735839844, "learning_rate": 7.002853161701281e-06, "loss": 0.8848, "step": 6193 }, { "epoch": 0.36987937417890837, "grad_norm": 2.3645689487457275, "learning_rate": 7.002189635724239e-06, "loss": 0.8951, "step": 6194 }, { "epoch": 0.36993908993192404, "grad_norm": 2.965930461883545, "learning_rate": 7.001526109747198e-06, "loss": 0.8699, "step": 6195 }, { "epoch": 0.3699988056849397, "grad_norm": 2.213719129562378, "learning_rate": 7.000862583770155e-06, "loss": 0.8893, "step": 6196 }, { "epoch": 0.3700585214379553, "grad_norm": 2.264711380004883, "learning_rate": 7.000199057793113e-06, "loss": 0.8953, "step": 6197 }, { "epoch": 0.370118237190971, "grad_norm": 4.6298723220825195, "learning_rate": 6.99953553181607e-06, "loss": 0.9262, "step": 6198 }, { "epoch": 0.37017795294398664, "grad_norm": 2.181084632873535, "learning_rate": 6.998872005839029e-06, "loss": 0.9184, "step": 6199 }, { "epoch": 0.37023766869700225, "grad_norm": 2.9823389053344727, "learning_rate": 6.9982084798619874e-06, "loss": 0.8495, "step": 6200 }, { "epoch": 0.37023766869700225, "eval_text_loss": 0.9301369190216064, "eval_text_runtime": 15.219, "eval_text_samples_per_second": 262.829, "eval_text_steps_per_second": 0.526, "step": 6200 }, { "epoch": 0.37023766869700225, "eval_image_loss": 0.6467739343643188, "eval_image_runtime": 4.9817, "eval_image_samples_per_second": 802.934, "eval_image_steps_per_second": 1.606, "step": 6200 }, { "epoch": 0.37023766869700225, "eval_video_loss": 1.1029760837554932, "eval_video_runtime": 76.4864, "eval_video_samples_per_second": 52.297, "eval_video_steps_per_second": 0.105, "step": 6200 }, { "epoch": 0.3702973844500179, "grad_norm": 2.1810142993927, "learning_rate": 6.997544953884945e-06, "loss": 0.9453, "step": 6201 }, { "epoch": 0.3703571002030336, "grad_norm": 1.8041315078735352, "learning_rate": 6.996881427907904e-06, "loss": 0.9175, "step": 6202 }, { "epoch": 0.3704168159560492, "grad_norm": 3.499835729598999, "learning_rate": 6.996217901930861e-06, "loss": 0.8431, "step": 6203 }, { "epoch": 0.37047653170906486, "grad_norm": 1.8565593957901, "learning_rate": 6.995554375953819e-06, "loss": 0.8442, "step": 6204 }, { "epoch": 0.37053624746208047, "grad_norm": 2.1192004680633545, "learning_rate": 6.994890849976778e-06, "loss": 0.8727, "step": 6205 }, { "epoch": 0.37059596321509614, "grad_norm": 2.7144620418548584, "learning_rate": 6.994227323999735e-06, "loss": 0.8864, "step": 6206 }, { "epoch": 0.3706556789681118, "grad_norm": 4.1156840324401855, "learning_rate": 6.993563798022693e-06, "loss": 0.8715, "step": 6207 }, { "epoch": 0.3707153947211274, "grad_norm": 2.287557363510132, "learning_rate": 6.9929002720456506e-06, "loss": 0.8672, "step": 6208 }, { "epoch": 0.3707751104741431, "grad_norm": 2.1091508865356445, "learning_rate": 6.9922367460686095e-06, "loss": 0.8769, "step": 6209 }, { "epoch": 0.37083482622715874, "grad_norm": 2.4653098583221436, "learning_rate": 6.991573220091568e-06, "loss": 0.839, "step": 6210 }, { "epoch": 0.37089454198017435, "grad_norm": 2.4181084632873535, "learning_rate": 6.990909694114525e-06, "loss": 0.8809, "step": 6211 }, { "epoch": 0.37095425773319, "grad_norm": 2.590803861618042, "learning_rate": 6.990246168137483e-06, "loss": 0.8767, "step": 6212 }, { "epoch": 0.3710139734862057, "grad_norm": 2.0781149864196777, "learning_rate": 6.989582642160441e-06, "loss": 0.8736, "step": 6213 }, { "epoch": 0.3710736892392213, "grad_norm": 2.44448184967041, "learning_rate": 6.988919116183399e-06, "loss": 0.8655, "step": 6214 }, { "epoch": 0.37113340499223696, "grad_norm": 3.051764726638794, "learning_rate": 6.988255590206357e-06, "loss": 0.8753, "step": 6215 }, { "epoch": 0.3711931207452526, "grad_norm": 1.9868435859680176, "learning_rate": 6.9875920642293145e-06, "loss": 0.8665, "step": 6216 }, { "epoch": 0.37125283649826823, "grad_norm": 3.050588369369507, "learning_rate": 6.9869285382522735e-06, "loss": 0.8557, "step": 6217 }, { "epoch": 0.3713125522512839, "grad_norm": 2.51790452003479, "learning_rate": 6.986265012275231e-06, "loss": 0.8884, "step": 6218 }, { "epoch": 0.3713722680042995, "grad_norm": 3.623966693878174, "learning_rate": 6.985601486298189e-06, "loss": 0.8785, "step": 6219 }, { "epoch": 0.3714319837573152, "grad_norm": 2.1912784576416016, "learning_rate": 6.984937960321148e-06, "loss": 0.9033, "step": 6220 }, { "epoch": 0.37149169951033084, "grad_norm": 2.575460910797119, "learning_rate": 6.984274434344105e-06, "loss": 0.8798, "step": 6221 }, { "epoch": 0.37155141526334645, "grad_norm": 1.9284965991973877, "learning_rate": 6.983610908367063e-06, "loss": 0.8472, "step": 6222 }, { "epoch": 0.3716111310163621, "grad_norm": 2.2981467247009277, "learning_rate": 6.98294738239002e-06, "loss": 0.9006, "step": 6223 }, { "epoch": 0.3716708467693778, "grad_norm": 3.268099546432495, "learning_rate": 6.982283856412979e-06, "loss": 0.8912, "step": 6224 }, { "epoch": 0.3717305625223934, "grad_norm": 2.8143248558044434, "learning_rate": 6.981620330435937e-06, "loss": 0.8762, "step": 6225 }, { "epoch": 0.37179027827540906, "grad_norm": 2.089080810546875, "learning_rate": 6.980956804458895e-06, "loss": 0.9007, "step": 6226 }, { "epoch": 0.3718499940284247, "grad_norm": 2.0692687034606934, "learning_rate": 6.980293278481854e-06, "loss": 0.8885, "step": 6227 }, { "epoch": 0.37190970978144033, "grad_norm": 5.176792621612549, "learning_rate": 6.979629752504811e-06, "loss": 0.9026, "step": 6228 }, { "epoch": 0.371969425534456, "grad_norm": 2.3481647968292236, "learning_rate": 6.978966226527769e-06, "loss": 0.8834, "step": 6229 }, { "epoch": 0.3720291412874716, "grad_norm": 2.1654715538024902, "learning_rate": 6.978302700550728e-06, "loss": 0.8954, "step": 6230 }, { "epoch": 0.3720888570404873, "grad_norm": 2.7042741775512695, "learning_rate": 6.977639174573685e-06, "loss": 0.8506, "step": 6231 }, { "epoch": 0.37214857279350294, "grad_norm": 3.369144916534424, "learning_rate": 6.976975648596643e-06, "loss": 0.8861, "step": 6232 }, { "epoch": 0.37220828854651855, "grad_norm": 1.8869142532348633, "learning_rate": 6.9763121226196005e-06, "loss": 0.8496, "step": 6233 }, { "epoch": 0.3722680042995342, "grad_norm": 2.3337273597717285, "learning_rate": 6.9756485966425595e-06, "loss": 0.8964, "step": 6234 }, { "epoch": 0.3723277200525499, "grad_norm": 4.0440826416015625, "learning_rate": 6.9749850706655176e-06, "loss": 0.891, "step": 6235 }, { "epoch": 0.3723874358055655, "grad_norm": 2.419858455657959, "learning_rate": 6.974321544688475e-06, "loss": 0.8339, "step": 6236 }, { "epoch": 0.37244715155858116, "grad_norm": 2.458029270172119, "learning_rate": 6.973658018711433e-06, "loss": 0.8964, "step": 6237 }, { "epoch": 0.3725068673115968, "grad_norm": 2.1537766456604004, "learning_rate": 6.972994492734391e-06, "loss": 0.9005, "step": 6238 }, { "epoch": 0.37256658306461243, "grad_norm": 4.243879318237305, "learning_rate": 6.972330966757349e-06, "loss": 0.8781, "step": 6239 }, { "epoch": 0.3726262988176281, "grad_norm": 2.786412477493286, "learning_rate": 6.971667440780307e-06, "loss": 0.9072, "step": 6240 }, { "epoch": 0.3726860145706437, "grad_norm": 2.3358230590820312, "learning_rate": 6.9710039148032645e-06, "loss": 0.8523, "step": 6241 }, { "epoch": 0.3727457303236594, "grad_norm": 2.374264717102051, "learning_rate": 6.9703403888262234e-06, "loss": 0.8899, "step": 6242 }, { "epoch": 0.37280544607667504, "grad_norm": 2.001371145248413, "learning_rate": 6.969676862849181e-06, "loss": 0.8678, "step": 6243 }, { "epoch": 0.37286516182969065, "grad_norm": 2.8441829681396484, "learning_rate": 6.969013336872139e-06, "loss": 0.8696, "step": 6244 }, { "epoch": 0.3729248775827063, "grad_norm": 1.9520198106765747, "learning_rate": 6.968349810895098e-06, "loss": 0.8977, "step": 6245 }, { "epoch": 0.372984593335722, "grad_norm": 1.7749522924423218, "learning_rate": 6.967686284918055e-06, "loss": 0.849, "step": 6246 }, { "epoch": 0.3730443090887376, "grad_norm": 2.2132396697998047, "learning_rate": 6.967022758941013e-06, "loss": 0.9118, "step": 6247 }, { "epoch": 0.37310402484175326, "grad_norm": 7.145448684692383, "learning_rate": 6.96635923296397e-06, "loss": 0.8556, "step": 6248 }, { "epoch": 0.3731637405947689, "grad_norm": 1.8034800291061401, "learning_rate": 6.965695706986929e-06, "loss": 0.896, "step": 6249 }, { "epoch": 0.37322345634778453, "grad_norm": 2.5353028774261475, "learning_rate": 6.965032181009887e-06, "loss": 0.9177, "step": 6250 }, { "epoch": 0.3732831721008002, "grad_norm": 2.5688297748565674, "learning_rate": 6.964368655032845e-06, "loss": 0.8725, "step": 6251 }, { "epoch": 0.37334288785381586, "grad_norm": 2.065092086791992, "learning_rate": 6.963705129055804e-06, "loss": 0.899, "step": 6252 }, { "epoch": 0.3734026036068315, "grad_norm": 2.1097166538238525, "learning_rate": 6.963041603078761e-06, "loss": 0.8469, "step": 6253 }, { "epoch": 0.37346231935984714, "grad_norm": 4.418522834777832, "learning_rate": 6.962378077101719e-06, "loss": 0.8975, "step": 6254 }, { "epoch": 0.37352203511286275, "grad_norm": 1.7601032257080078, "learning_rate": 6.961714551124678e-06, "loss": 0.8978, "step": 6255 }, { "epoch": 0.3735817508658784, "grad_norm": 1.9123867750167847, "learning_rate": 6.961051025147635e-06, "loss": 0.8686, "step": 6256 }, { "epoch": 0.3736414666188941, "grad_norm": 2.8206114768981934, "learning_rate": 6.960387499170593e-06, "loss": 0.8208, "step": 6257 }, { "epoch": 0.3737011823719097, "grad_norm": 1.9280160665512085, "learning_rate": 6.9597239731935505e-06, "loss": 0.8583, "step": 6258 }, { "epoch": 0.37376089812492536, "grad_norm": 2.572835922241211, "learning_rate": 6.9590604472165094e-06, "loss": 0.8876, "step": 6259 }, { "epoch": 0.373820613877941, "grad_norm": 2.526935338973999, "learning_rate": 6.9583969212394675e-06, "loss": 0.8818, "step": 6260 }, { "epoch": 0.37388032963095663, "grad_norm": 4.366240978240967, "learning_rate": 6.957733395262425e-06, "loss": 0.8612, "step": 6261 }, { "epoch": 0.3739400453839723, "grad_norm": 2.6286516189575195, "learning_rate": 6.957069869285383e-06, "loss": 0.8798, "step": 6262 }, { "epoch": 0.37399976113698796, "grad_norm": 2.3437108993530273, "learning_rate": 6.956406343308341e-06, "loss": 0.8885, "step": 6263 }, { "epoch": 0.3740594768900036, "grad_norm": 3.162099838256836, "learning_rate": 6.955742817331299e-06, "loss": 0.8635, "step": 6264 }, { "epoch": 0.37411919264301924, "grad_norm": 2.1773300170898438, "learning_rate": 6.955079291354257e-06, "loss": 0.8695, "step": 6265 }, { "epoch": 0.37417890839603485, "grad_norm": 2.234369993209839, "learning_rate": 6.9544157653772145e-06, "loss": 0.8722, "step": 6266 }, { "epoch": 0.3742386241490505, "grad_norm": 14.386242866516113, "learning_rate": 6.953752239400173e-06, "loss": 0.8951, "step": 6267 }, { "epoch": 0.3742983399020662, "grad_norm": 2.3453571796417236, "learning_rate": 6.953088713423131e-06, "loss": 0.8379, "step": 6268 }, { "epoch": 0.3743580556550818, "grad_norm": 2.127225875854492, "learning_rate": 6.952425187446089e-06, "loss": 0.9055, "step": 6269 }, { "epoch": 0.37441777140809746, "grad_norm": 1.70816969871521, "learning_rate": 6.951761661469048e-06, "loss": 0.851, "step": 6270 }, { "epoch": 0.3744774871611131, "grad_norm": 2.824846029281616, "learning_rate": 6.951098135492005e-06, "loss": 0.8908, "step": 6271 }, { "epoch": 0.37453720291412873, "grad_norm": 1.8803611993789673, "learning_rate": 6.950434609514963e-06, "loss": 0.8914, "step": 6272 }, { "epoch": 0.3745969186671444, "grad_norm": 1.629224181175232, "learning_rate": 6.94977108353792e-06, "loss": 0.8742, "step": 6273 }, { "epoch": 0.37465663442016006, "grad_norm": 2.4869441986083984, "learning_rate": 6.949107557560879e-06, "loss": 0.8615, "step": 6274 }, { "epoch": 0.37471635017317567, "grad_norm": 3.858258008956909, "learning_rate": 6.948444031583837e-06, "loss": 0.9121, "step": 6275 }, { "epoch": 0.37477606592619134, "grad_norm": 3.492358446121216, "learning_rate": 6.947780505606795e-06, "loss": 0.8674, "step": 6276 }, { "epoch": 0.37483578167920695, "grad_norm": 1.8293975591659546, "learning_rate": 6.9471169796297536e-06, "loss": 0.9103, "step": 6277 }, { "epoch": 0.3748954974322226, "grad_norm": 2.1960554122924805, "learning_rate": 6.946453453652711e-06, "loss": 0.8883, "step": 6278 }, { "epoch": 0.3749552131852383, "grad_norm": 2.2608234882354736, "learning_rate": 6.945789927675669e-06, "loss": 0.8798, "step": 6279 }, { "epoch": 0.3750149289382539, "grad_norm": 1.762181282043457, "learning_rate": 6.945126401698628e-06, "loss": 0.8513, "step": 6280 }, { "epoch": 0.37507464469126955, "grad_norm": 2.1495752334594727, "learning_rate": 6.944462875721585e-06, "loss": 0.889, "step": 6281 }, { "epoch": 0.3751343604442852, "grad_norm": 5.855923175811768, "learning_rate": 6.943799349744543e-06, "loss": 0.8832, "step": 6282 }, { "epoch": 0.37519407619730083, "grad_norm": 2.6479640007019043, "learning_rate": 6.9431358237675005e-06, "loss": 0.8842, "step": 6283 }, { "epoch": 0.3752537919503165, "grad_norm": 1.664738416671753, "learning_rate": 6.942472297790459e-06, "loss": 0.8473, "step": 6284 }, { "epoch": 0.37531350770333216, "grad_norm": 2.806596279144287, "learning_rate": 6.9418087718134175e-06, "loss": 0.8909, "step": 6285 }, { "epoch": 0.37537322345634777, "grad_norm": 2.7841224670410156, "learning_rate": 6.941145245836375e-06, "loss": 0.8821, "step": 6286 }, { "epoch": 0.37543293920936344, "grad_norm": 2.658850908279419, "learning_rate": 6.940481719859333e-06, "loss": 0.8988, "step": 6287 }, { "epoch": 0.3754926549623791, "grad_norm": 2.2017152309417725, "learning_rate": 6.939818193882291e-06, "loss": 0.879, "step": 6288 }, { "epoch": 0.3755523707153947, "grad_norm": 2.245574712753296, "learning_rate": 6.939154667905249e-06, "loss": 0.8595, "step": 6289 }, { "epoch": 0.3756120864684104, "grad_norm": 2.180931568145752, "learning_rate": 6.938491141928207e-06, "loss": 0.862, "step": 6290 }, { "epoch": 0.375671802221426, "grad_norm": 3.317535877227783, "learning_rate": 6.937827615951164e-06, "loss": 0.8818, "step": 6291 }, { "epoch": 0.37573151797444165, "grad_norm": 2.8835363388061523, "learning_rate": 6.937164089974123e-06, "loss": 0.8938, "step": 6292 }, { "epoch": 0.3757912337274573, "grad_norm": 3.3753156661987305, "learning_rate": 6.936500563997081e-06, "loss": 0.8993, "step": 6293 }, { "epoch": 0.37585094948047293, "grad_norm": 1.948035478591919, "learning_rate": 6.935837038020039e-06, "loss": 0.8795, "step": 6294 }, { "epoch": 0.3759106652334886, "grad_norm": 2.0790441036224365, "learning_rate": 6.935173512042998e-06, "loss": 0.894, "step": 6295 }, { "epoch": 0.37597038098650426, "grad_norm": 2.254211902618408, "learning_rate": 6.934509986065955e-06, "loss": 0.8446, "step": 6296 }, { "epoch": 0.37603009673951987, "grad_norm": 2.6371843814849854, "learning_rate": 6.933846460088913e-06, "loss": 0.8903, "step": 6297 }, { "epoch": 0.37608981249253554, "grad_norm": 2.4161486625671387, "learning_rate": 6.93318293411187e-06, "loss": 0.8798, "step": 6298 }, { "epoch": 0.3761495282455512, "grad_norm": 2.125830888748169, "learning_rate": 6.932519408134829e-06, "loss": 0.8871, "step": 6299 }, { "epoch": 0.3762092439985668, "grad_norm": 1.7332820892333984, "learning_rate": 6.931855882157787e-06, "loss": 0.8797, "step": 6300 }, { "epoch": 0.3762689597515825, "grad_norm": 1.8439061641693115, "learning_rate": 6.931192356180745e-06, "loss": 0.8968, "step": 6301 }, { "epoch": 0.3763286755045981, "grad_norm": 3.184735059738159, "learning_rate": 6.9305288302037035e-06, "loss": 0.8807, "step": 6302 }, { "epoch": 0.37638839125761375, "grad_norm": 2.3257174491882324, "learning_rate": 6.929865304226661e-06, "loss": 0.8695, "step": 6303 }, { "epoch": 0.3764481070106294, "grad_norm": 2.499742031097412, "learning_rate": 6.929201778249619e-06, "loss": 0.8747, "step": 6304 }, { "epoch": 0.37650782276364503, "grad_norm": 2.2484660148620605, "learning_rate": 6.928538252272578e-06, "loss": 0.8787, "step": 6305 }, { "epoch": 0.3765675385166607, "grad_norm": 1.9360064268112183, "learning_rate": 6.927874726295535e-06, "loss": 0.9219, "step": 6306 }, { "epoch": 0.37662725426967636, "grad_norm": 2.658046245574951, "learning_rate": 6.927211200318493e-06, "loss": 0.888, "step": 6307 }, { "epoch": 0.37668697002269197, "grad_norm": 1.7875652313232422, "learning_rate": 6.9265476743414504e-06, "loss": 0.8506, "step": 6308 }, { "epoch": 0.37674668577570763, "grad_norm": 2.8900935649871826, "learning_rate": 6.925884148364409e-06, "loss": 0.909, "step": 6309 }, { "epoch": 0.3768064015287233, "grad_norm": 2.3425662517547607, "learning_rate": 6.9252206223873675e-06, "loss": 0.8734, "step": 6310 }, { "epoch": 0.3768661172817389, "grad_norm": 1.8135234117507935, "learning_rate": 6.924557096410325e-06, "loss": 0.8789, "step": 6311 }, { "epoch": 0.3769258330347546, "grad_norm": 3.3883249759674072, "learning_rate": 6.923893570433283e-06, "loss": 0.9246, "step": 6312 }, { "epoch": 0.3769855487877702, "grad_norm": 2.48382830619812, "learning_rate": 6.923230044456241e-06, "loss": 0.8985, "step": 6313 }, { "epoch": 0.37704526454078585, "grad_norm": 2.012378215789795, "learning_rate": 6.922566518479199e-06, "loss": 0.8646, "step": 6314 }, { "epoch": 0.3771049802938015, "grad_norm": 2.9651081562042236, "learning_rate": 6.921902992502157e-06, "loss": 0.8624, "step": 6315 }, { "epoch": 0.3771646960468171, "grad_norm": 3.4617815017700195, "learning_rate": 6.921239466525114e-06, "loss": 0.8963, "step": 6316 }, { "epoch": 0.3772244117998328, "grad_norm": 1.8714258670806885, "learning_rate": 6.920575940548073e-06, "loss": 0.8782, "step": 6317 }, { "epoch": 0.37728412755284846, "grad_norm": 2.3020458221435547, "learning_rate": 6.919912414571031e-06, "loss": 0.9007, "step": 6318 }, { "epoch": 0.37734384330586407, "grad_norm": 2.137268304824829, "learning_rate": 6.919248888593989e-06, "loss": 0.8709, "step": 6319 }, { "epoch": 0.37740355905887973, "grad_norm": 2.6749625205993652, "learning_rate": 6.918585362616948e-06, "loss": 0.8697, "step": 6320 }, { "epoch": 0.3774632748118954, "grad_norm": 2.549867630004883, "learning_rate": 6.917921836639905e-06, "loss": 0.8627, "step": 6321 }, { "epoch": 0.377522990564911, "grad_norm": 2.4046578407287598, "learning_rate": 6.917258310662863e-06, "loss": 0.8725, "step": 6322 }, { "epoch": 0.3775827063179267, "grad_norm": 2.1187021732330322, "learning_rate": 6.91659478468582e-06, "loss": 0.9077, "step": 6323 }, { "epoch": 0.37764242207094234, "grad_norm": 2.004089593887329, "learning_rate": 6.915931258708779e-06, "loss": 0.9281, "step": 6324 }, { "epoch": 0.37770213782395795, "grad_norm": 3.5734992027282715, "learning_rate": 6.915267732731737e-06, "loss": 0.8951, "step": 6325 }, { "epoch": 0.3777618535769736, "grad_norm": 2.3627254962921143, "learning_rate": 6.9146042067546946e-06, "loss": 0.8751, "step": 6326 }, { "epoch": 0.3778215693299892, "grad_norm": 2.320312261581421, "learning_rate": 6.9139406807776535e-06, "loss": 0.9113, "step": 6327 }, { "epoch": 0.3778812850830049, "grad_norm": 2.2134361267089844, "learning_rate": 6.913277154800611e-06, "loss": 0.861, "step": 6328 }, { "epoch": 0.37794100083602056, "grad_norm": 2.795189619064331, "learning_rate": 6.912613628823569e-06, "loss": 0.9097, "step": 6329 }, { "epoch": 0.37800071658903617, "grad_norm": 2.1605336666107178, "learning_rate": 6.911950102846528e-06, "loss": 0.9003, "step": 6330 }, { "epoch": 0.37806043234205183, "grad_norm": 2.707998037338257, "learning_rate": 6.911286576869485e-06, "loss": 0.8642, "step": 6331 }, { "epoch": 0.3781201480950675, "grad_norm": 2.5207359790802, "learning_rate": 6.910623050892443e-06, "loss": 0.9043, "step": 6332 }, { "epoch": 0.3781798638480831, "grad_norm": 3.7933316230773926, "learning_rate": 6.9099595249154e-06, "loss": 0.8856, "step": 6333 }, { "epoch": 0.3782395796010988, "grad_norm": 1.951003909111023, "learning_rate": 6.909295998938359e-06, "loss": 0.8754, "step": 6334 }, { "epoch": 0.37829929535411444, "grad_norm": 2.0979254245758057, "learning_rate": 6.9086324729613175e-06, "loss": 0.8828, "step": 6335 }, { "epoch": 0.37835901110713005, "grad_norm": 2.1065258979797363, "learning_rate": 6.907968946984275e-06, "loss": 0.8664, "step": 6336 }, { "epoch": 0.3784187268601457, "grad_norm": 2.0368576049804688, "learning_rate": 6.907305421007233e-06, "loss": 0.8814, "step": 6337 }, { "epoch": 0.3784784426131613, "grad_norm": 1.8186031579971313, "learning_rate": 6.906641895030191e-06, "loss": 0.8468, "step": 6338 }, { "epoch": 0.378538158366177, "grad_norm": 2.8574717044830322, "learning_rate": 6.905978369053149e-06, "loss": 0.8434, "step": 6339 }, { "epoch": 0.37859787411919266, "grad_norm": 1.6425833702087402, "learning_rate": 6.905314843076107e-06, "loss": 0.8576, "step": 6340 }, { "epoch": 0.37865758987220827, "grad_norm": 3.635120153427124, "learning_rate": 6.904651317099064e-06, "loss": 0.8941, "step": 6341 }, { "epoch": 0.37871730562522393, "grad_norm": 2.236207962036133, "learning_rate": 6.903987791122023e-06, "loss": 0.88, "step": 6342 }, { "epoch": 0.3787770213782396, "grad_norm": 3.1476242542266846, "learning_rate": 6.9033242651449806e-06, "loss": 0.8747, "step": 6343 }, { "epoch": 0.3788367371312552, "grad_norm": 2.4459545612335205, "learning_rate": 6.902660739167939e-06, "loss": 0.9267, "step": 6344 }, { "epoch": 0.3788964528842709, "grad_norm": 2.092874765396118, "learning_rate": 6.901997213190898e-06, "loss": 0.8536, "step": 6345 }, { "epoch": 0.37895616863728654, "grad_norm": 2.822315216064453, "learning_rate": 6.901333687213855e-06, "loss": 0.8801, "step": 6346 }, { "epoch": 0.37901588439030215, "grad_norm": 2.373873710632324, "learning_rate": 6.900670161236813e-06, "loss": 0.924, "step": 6347 }, { "epoch": 0.3790756001433178, "grad_norm": 2.3943161964416504, "learning_rate": 6.90000663525977e-06, "loss": 0.884, "step": 6348 }, { "epoch": 0.3791353158963335, "grad_norm": 2.492283582687378, "learning_rate": 6.899343109282729e-06, "loss": 0.8578, "step": 6349 }, { "epoch": 0.3791950316493491, "grad_norm": 2.1422278881073, "learning_rate": 6.898679583305687e-06, "loss": 0.8653, "step": 6350 }, { "epoch": 0.37925474740236476, "grad_norm": 1.8423842191696167, "learning_rate": 6.8980160573286445e-06, "loss": 0.8566, "step": 6351 }, { "epoch": 0.37931446315538037, "grad_norm": 4.135441780090332, "learning_rate": 6.8973525313516035e-06, "loss": 0.8837, "step": 6352 }, { "epoch": 0.37937417890839603, "grad_norm": 3.61195707321167, "learning_rate": 6.896689005374561e-06, "loss": 0.8864, "step": 6353 }, { "epoch": 0.3794338946614117, "grad_norm": 2.083153486251831, "learning_rate": 6.896025479397519e-06, "loss": 0.9253, "step": 6354 }, { "epoch": 0.3794936104144273, "grad_norm": 2.7636282444000244, "learning_rate": 6.895361953420478e-06, "loss": 0.9331, "step": 6355 }, { "epoch": 0.379553326167443, "grad_norm": 3.567690372467041, "learning_rate": 6.894698427443435e-06, "loss": 0.8703, "step": 6356 }, { "epoch": 0.37961304192045864, "grad_norm": 1.8902491331100464, "learning_rate": 6.894034901466393e-06, "loss": 0.8642, "step": 6357 }, { "epoch": 0.37967275767347425, "grad_norm": 2.273521661758423, "learning_rate": 6.89337137548935e-06, "loss": 0.8373, "step": 6358 }, { "epoch": 0.3797324734264899, "grad_norm": 2.436140298843384, "learning_rate": 6.892707849512309e-06, "loss": 0.9081, "step": 6359 }, { "epoch": 0.3797921891795056, "grad_norm": 14.791947364807129, "learning_rate": 6.8920443235352674e-06, "loss": 0.8814, "step": 6360 }, { "epoch": 0.3798519049325212, "grad_norm": 2.5185537338256836, "learning_rate": 6.891380797558225e-06, "loss": 0.9027, "step": 6361 }, { "epoch": 0.37991162068553685, "grad_norm": 11.052257537841797, "learning_rate": 6.890717271581183e-06, "loss": 0.8707, "step": 6362 }, { "epoch": 0.37997133643855246, "grad_norm": 2.1369221210479736, "learning_rate": 6.890053745604141e-06, "loss": 0.8463, "step": 6363 }, { "epoch": 0.38003105219156813, "grad_norm": 3.791055679321289, "learning_rate": 6.889390219627099e-06, "loss": 0.8934, "step": 6364 }, { "epoch": 0.3800907679445838, "grad_norm": 2.842979907989502, "learning_rate": 6.888726693650057e-06, "loss": 0.8992, "step": 6365 }, { "epoch": 0.3801504836975994, "grad_norm": 1.9903843402862549, "learning_rate": 6.888063167673014e-06, "loss": 0.9107, "step": 6366 }, { "epoch": 0.38021019945061507, "grad_norm": 3.3819684982299805, "learning_rate": 6.887399641695973e-06, "loss": 0.8904, "step": 6367 }, { "epoch": 0.38026991520363074, "grad_norm": 2.9513752460479736, "learning_rate": 6.8867361157189305e-06, "loss": 0.9068, "step": 6368 }, { "epoch": 0.38032963095664635, "grad_norm": 1.9611238241195679, "learning_rate": 6.886072589741889e-06, "loss": 0.8752, "step": 6369 }, { "epoch": 0.380389346709662, "grad_norm": 1.9978771209716797, "learning_rate": 6.885409063764848e-06, "loss": 0.9017, "step": 6370 }, { "epoch": 0.3804490624626777, "grad_norm": 4.088076114654541, "learning_rate": 6.884745537787805e-06, "loss": 0.8979, "step": 6371 }, { "epoch": 0.3805087782156933, "grad_norm": 2.0226287841796875, "learning_rate": 6.884082011810763e-06, "loss": 0.9005, "step": 6372 }, { "epoch": 0.38056849396870895, "grad_norm": 4.08914041519165, "learning_rate": 6.88341848583372e-06, "loss": 0.8878, "step": 6373 }, { "epoch": 0.38062820972172456, "grad_norm": 2.1516778469085693, "learning_rate": 6.882754959856679e-06, "loss": 0.8984, "step": 6374 }, { "epoch": 0.38068792547474023, "grad_norm": 3.622105836868286, "learning_rate": 6.882091433879637e-06, "loss": 0.8671, "step": 6375 }, { "epoch": 0.3807476412277559, "grad_norm": 2.3311781883239746, "learning_rate": 6.8814279079025945e-06, "loss": 0.922, "step": 6376 }, { "epoch": 0.3808073569807715, "grad_norm": 2.3873767852783203, "learning_rate": 6.8807643819255534e-06, "loss": 0.8949, "step": 6377 }, { "epoch": 0.38086707273378717, "grad_norm": 2.4364511966705322, "learning_rate": 6.880100855948511e-06, "loss": 0.8843, "step": 6378 }, { "epoch": 0.38092678848680284, "grad_norm": 2.3073747158050537, "learning_rate": 6.879437329971469e-06, "loss": 0.9311, "step": 6379 }, { "epoch": 0.38098650423981845, "grad_norm": 1.7103321552276611, "learning_rate": 6.878773803994428e-06, "loss": 0.8992, "step": 6380 }, { "epoch": 0.3810462199928341, "grad_norm": 2.213559865951538, "learning_rate": 6.878110278017385e-06, "loss": 0.8796, "step": 6381 }, { "epoch": 0.3811059357458498, "grad_norm": 2.6006460189819336, "learning_rate": 6.877446752040343e-06, "loss": 0.9096, "step": 6382 }, { "epoch": 0.3811656514988654, "grad_norm": 1.8244749307632446, "learning_rate": 6.8767832260633e-06, "loss": 0.9006, "step": 6383 }, { "epoch": 0.38122536725188105, "grad_norm": 2.065676212310791, "learning_rate": 6.876119700086259e-06, "loss": 0.8945, "step": 6384 }, { "epoch": 0.3812850830048967, "grad_norm": 2.41892147064209, "learning_rate": 6.875456174109217e-06, "loss": 0.8602, "step": 6385 }, { "epoch": 0.38134479875791233, "grad_norm": 2.0329864025115967, "learning_rate": 6.874792648132175e-06, "loss": 0.8458, "step": 6386 }, { "epoch": 0.381404514510928, "grad_norm": 1.9649242162704468, "learning_rate": 6.874129122155133e-06, "loss": 0.8568, "step": 6387 }, { "epoch": 0.3814642302639436, "grad_norm": 2.2319161891937256, "learning_rate": 6.873465596178091e-06, "loss": 0.8557, "step": 6388 }, { "epoch": 0.38152394601695927, "grad_norm": 1.8278143405914307, "learning_rate": 6.872802070201049e-06, "loss": 0.8753, "step": 6389 }, { "epoch": 0.38158366176997494, "grad_norm": 2.500715494155884, "learning_rate": 6.872138544224007e-06, "loss": 0.9485, "step": 6390 }, { "epoch": 0.38164337752299055, "grad_norm": 3.8153529167175293, "learning_rate": 6.871475018246964e-06, "loss": 0.8682, "step": 6391 }, { "epoch": 0.3817030932760062, "grad_norm": 1.7351065874099731, "learning_rate": 6.870811492269923e-06, "loss": 0.8745, "step": 6392 }, { "epoch": 0.3817628090290219, "grad_norm": 2.45263934135437, "learning_rate": 6.8701479662928805e-06, "loss": 0.8902, "step": 6393 }, { "epoch": 0.3818225247820375, "grad_norm": 1.9389245510101318, "learning_rate": 6.869484440315839e-06, "loss": 0.883, "step": 6394 }, { "epoch": 0.38188224053505315, "grad_norm": 1.7106645107269287, "learning_rate": 6.8688209143387976e-06, "loss": 0.8683, "step": 6395 }, { "epoch": 0.3819419562880688, "grad_norm": 1.906416893005371, "learning_rate": 6.868157388361755e-06, "loss": 0.8891, "step": 6396 }, { "epoch": 0.38200167204108443, "grad_norm": 1.8813682794570923, "learning_rate": 6.867493862384713e-06, "loss": 0.9018, "step": 6397 }, { "epoch": 0.3820613877941001, "grad_norm": 2.975039482116699, "learning_rate": 6.86683033640767e-06, "loss": 0.8806, "step": 6398 }, { "epoch": 0.3821211035471157, "grad_norm": 1.9168181419372559, "learning_rate": 6.866166810430629e-06, "loss": 0.8692, "step": 6399 }, { "epoch": 0.38218081930013137, "grad_norm": 1.8354909420013428, "learning_rate": 6.865503284453587e-06, "loss": 0.8756, "step": 6400 }, { "epoch": 0.38218081930013137, "eval_text_loss": 0.9286981821060181, "eval_text_runtime": 15.1824, "eval_text_samples_per_second": 263.462, "eval_text_steps_per_second": 0.527, "step": 6400 }, { "epoch": 0.38218081930013137, "eval_image_loss": 0.6458794474601746, "eval_image_runtime": 4.9951, "eval_image_samples_per_second": 800.792, "eval_image_steps_per_second": 1.602, "step": 6400 }, { "epoch": 0.38218081930013137, "eval_video_loss": 1.1003074645996094, "eval_video_runtime": 76.6023, "eval_video_samples_per_second": 52.218, "eval_video_steps_per_second": 0.104, "step": 6400 }, { "epoch": 0.38224053505314703, "grad_norm": 3.375809669494629, "learning_rate": 6.8648397584765445e-06, "loss": 0.8861, "step": 6401 }, { "epoch": 0.38230025080616264, "grad_norm": 3.012084484100342, "learning_rate": 6.864176232499503e-06, "loss": 0.8935, "step": 6402 }, { "epoch": 0.3823599665591783, "grad_norm": 1.6467067003250122, "learning_rate": 6.863512706522461e-06, "loss": 0.894, "step": 6403 }, { "epoch": 0.382419682312194, "grad_norm": 2.6333401203155518, "learning_rate": 6.862849180545419e-06, "loss": 0.859, "step": 6404 }, { "epoch": 0.3824793980652096, "grad_norm": 1.751983404159546, "learning_rate": 6.862185654568378e-06, "loss": 0.889, "step": 6405 }, { "epoch": 0.38253911381822525, "grad_norm": 2.2304461002349854, "learning_rate": 6.861522128591335e-06, "loss": 0.8812, "step": 6406 }, { "epoch": 0.3825988295712409, "grad_norm": 2.13411021232605, "learning_rate": 6.860858602614293e-06, "loss": 0.8862, "step": 6407 }, { "epoch": 0.3826585453242565, "grad_norm": 1.6679283380508423, "learning_rate": 6.86019507663725e-06, "loss": 0.8413, "step": 6408 }, { "epoch": 0.3827182610772722, "grad_norm": 2.768800735473633, "learning_rate": 6.859531550660209e-06, "loss": 0.8946, "step": 6409 }, { "epoch": 0.3827779768302878, "grad_norm": 3.2838075160980225, "learning_rate": 6.858868024683167e-06, "loss": 0.856, "step": 6410 }, { "epoch": 0.38283769258330347, "grad_norm": 2.5346007347106934, "learning_rate": 6.858204498706125e-06, "loss": 0.8883, "step": 6411 }, { "epoch": 0.38289740833631913, "grad_norm": 2.381298065185547, "learning_rate": 6.857540972729083e-06, "loss": 0.8756, "step": 6412 }, { "epoch": 0.38295712408933474, "grad_norm": 2.04274845123291, "learning_rate": 6.856877446752041e-06, "loss": 0.8982, "step": 6413 }, { "epoch": 0.3830168398423504, "grad_norm": 3.616360664367676, "learning_rate": 6.856213920774999e-06, "loss": 0.8945, "step": 6414 }, { "epoch": 0.3830765555953661, "grad_norm": 2.0810437202453613, "learning_rate": 6.855550394797957e-06, "loss": 0.8858, "step": 6415 }, { "epoch": 0.3831362713483817, "grad_norm": 2.5801663398742676, "learning_rate": 6.854886868820914e-06, "loss": 0.8375, "step": 6416 }, { "epoch": 0.38319598710139735, "grad_norm": 1.7505841255187988, "learning_rate": 6.854223342843873e-06, "loss": 0.8793, "step": 6417 }, { "epoch": 0.383255702854413, "grad_norm": 1.8396666049957275, "learning_rate": 6.8535598168668305e-06, "loss": 0.8408, "step": 6418 }, { "epoch": 0.3833154186074286, "grad_norm": 2.135378360748291, "learning_rate": 6.852896290889789e-06, "loss": 0.8738, "step": 6419 }, { "epoch": 0.3833751343604443, "grad_norm": 2.4778544902801514, "learning_rate": 6.8522327649127475e-06, "loss": 0.9017, "step": 6420 }, { "epoch": 0.38343485011345996, "grad_norm": 5.416745662689209, "learning_rate": 6.851569238935705e-06, "loss": 0.879, "step": 6421 }, { "epoch": 0.38349456586647557, "grad_norm": 2.182096242904663, "learning_rate": 6.850905712958663e-06, "loss": 0.8954, "step": 6422 }, { "epoch": 0.38355428161949123, "grad_norm": 2.266666889190674, "learning_rate": 6.85024218698162e-06, "loss": 0.9045, "step": 6423 }, { "epoch": 0.38361399737250684, "grad_norm": 2.0856876373291016, "learning_rate": 6.849578661004579e-06, "loss": 0.8648, "step": 6424 }, { "epoch": 0.3836737131255225, "grad_norm": 1.7943363189697266, "learning_rate": 6.848915135027537e-06, "loss": 0.887, "step": 6425 }, { "epoch": 0.3837334288785382, "grad_norm": 2.370530843734741, "learning_rate": 6.8482516090504944e-06, "loss": 0.8983, "step": 6426 }, { "epoch": 0.3837931446315538, "grad_norm": 2.0793840885162354, "learning_rate": 6.847588083073453e-06, "loss": 0.8806, "step": 6427 }, { "epoch": 0.38385286038456945, "grad_norm": 6.859910488128662, "learning_rate": 6.846924557096411e-06, "loss": 0.8555, "step": 6428 }, { "epoch": 0.3839125761375851, "grad_norm": 2.0992748737335205, "learning_rate": 6.846261031119369e-06, "loss": 0.8907, "step": 6429 }, { "epoch": 0.3839722918906007, "grad_norm": 1.8427222967147827, "learning_rate": 6.845597505142328e-06, "loss": 0.8819, "step": 6430 }, { "epoch": 0.3840320076436164, "grad_norm": 4.069450378417969, "learning_rate": 6.844933979165285e-06, "loss": 0.8796, "step": 6431 }, { "epoch": 0.38409172339663206, "grad_norm": 2.2788124084472656, "learning_rate": 6.844270453188243e-06, "loss": 0.8798, "step": 6432 }, { "epoch": 0.38415143914964767, "grad_norm": 2.099128007888794, "learning_rate": 6.8436069272112e-06, "loss": 0.8593, "step": 6433 }, { "epoch": 0.38421115490266333, "grad_norm": 3.0674517154693604, "learning_rate": 6.842943401234159e-06, "loss": 0.8627, "step": 6434 }, { "epoch": 0.38427087065567894, "grad_norm": 2.246946334838867, "learning_rate": 6.842279875257117e-06, "loss": 0.8805, "step": 6435 }, { "epoch": 0.3843305864086946, "grad_norm": 2.775482177734375, "learning_rate": 6.841616349280075e-06, "loss": 0.9105, "step": 6436 }, { "epoch": 0.3843903021617103, "grad_norm": 2.1276752948760986, "learning_rate": 6.8409528233030335e-06, "loss": 0.8649, "step": 6437 }, { "epoch": 0.3844500179147259, "grad_norm": 2.2878713607788086, "learning_rate": 6.840289297325991e-06, "loss": 0.8356, "step": 6438 }, { "epoch": 0.38450973366774155, "grad_norm": 2.1063897609710693, "learning_rate": 6.839625771348949e-06, "loss": 0.9115, "step": 6439 }, { "epoch": 0.3845694494207572, "grad_norm": 2.0891599655151367, "learning_rate": 6.838962245371907e-06, "loss": 0.8825, "step": 6440 }, { "epoch": 0.3846291651737728, "grad_norm": 8.175517082214355, "learning_rate": 6.838298719394864e-06, "loss": 0.8492, "step": 6441 }, { "epoch": 0.3846888809267885, "grad_norm": 3.0771660804748535, "learning_rate": 6.837635193417823e-06, "loss": 0.8937, "step": 6442 }, { "epoch": 0.38474859667980416, "grad_norm": 2.0960819721221924, "learning_rate": 6.8369716674407804e-06, "loss": 0.8985, "step": 6443 }, { "epoch": 0.38480831243281977, "grad_norm": 1.7536910772323608, "learning_rate": 6.8363081414637385e-06, "loss": 0.8899, "step": 6444 }, { "epoch": 0.38486802818583543, "grad_norm": 2.7259621620178223, "learning_rate": 6.8356446154866975e-06, "loss": 0.9163, "step": 6445 }, { "epoch": 0.38492774393885104, "grad_norm": 2.4394307136535645, "learning_rate": 6.834981089509655e-06, "loss": 0.9231, "step": 6446 }, { "epoch": 0.3849874596918667, "grad_norm": 1.734592318534851, "learning_rate": 6.834317563532613e-06, "loss": 0.877, "step": 6447 }, { "epoch": 0.3850471754448824, "grad_norm": 6.623769283294678, "learning_rate": 6.83365403755557e-06, "loss": 0.8991, "step": 6448 }, { "epoch": 0.385106891197898, "grad_norm": 1.7926323413848877, "learning_rate": 6.832990511578529e-06, "loss": 0.8654, "step": 6449 }, { "epoch": 0.38516660695091365, "grad_norm": 3.490565061569214, "learning_rate": 6.832326985601487e-06, "loss": 0.9234, "step": 6450 }, { "epoch": 0.3852263227039293, "grad_norm": 4.115938186645508, "learning_rate": 6.831663459624444e-06, "loss": 0.8731, "step": 6451 }, { "epoch": 0.3852860384569449, "grad_norm": 2.271320104598999, "learning_rate": 6.830999933647403e-06, "loss": 0.9077, "step": 6452 }, { "epoch": 0.3853457542099606, "grad_norm": 4.776697158813477, "learning_rate": 6.830336407670361e-06, "loss": 0.902, "step": 6453 }, { "epoch": 0.38540546996297625, "grad_norm": 3.445465087890625, "learning_rate": 6.829672881693319e-06, "loss": 0.8837, "step": 6454 }, { "epoch": 0.38546518571599186, "grad_norm": 2.107487678527832, "learning_rate": 6.829009355716278e-06, "loss": 0.841, "step": 6455 }, { "epoch": 0.38552490146900753, "grad_norm": 2.379237413406372, "learning_rate": 6.828345829739235e-06, "loss": 0.8681, "step": 6456 }, { "epoch": 0.3855846172220232, "grad_norm": 2.01438045501709, "learning_rate": 6.827682303762193e-06, "loss": 0.8576, "step": 6457 }, { "epoch": 0.3856443329750388, "grad_norm": 1.876818060874939, "learning_rate": 6.82701877778515e-06, "loss": 0.8602, "step": 6458 }, { "epoch": 0.38570404872805447, "grad_norm": 2.574167490005493, "learning_rate": 6.826355251808109e-06, "loss": 0.876, "step": 6459 }, { "epoch": 0.3857637644810701, "grad_norm": 3.6698710918426514, "learning_rate": 6.825691725831067e-06, "loss": 0.9052, "step": 6460 }, { "epoch": 0.38582348023408575, "grad_norm": 3.0083537101745605, "learning_rate": 6.8250281998540246e-06, "loss": 0.8826, "step": 6461 }, { "epoch": 0.3858831959871014, "grad_norm": 2.7062735557556152, "learning_rate": 6.8243646738769835e-06, "loss": 0.8706, "step": 6462 }, { "epoch": 0.385942911740117, "grad_norm": 2.2504312992095947, "learning_rate": 6.823701147899941e-06, "loss": 0.8973, "step": 6463 }, { "epoch": 0.3860026274931327, "grad_norm": 1.8039720058441162, "learning_rate": 6.823037621922899e-06, "loss": 0.873, "step": 6464 }, { "epoch": 0.38606234324614835, "grad_norm": 3.116227388381958, "learning_rate": 6.822374095945857e-06, "loss": 0.9096, "step": 6465 }, { "epoch": 0.38612205899916396, "grad_norm": 1.9974465370178223, "learning_rate": 6.821710569968815e-06, "loss": 0.8665, "step": 6466 }, { "epoch": 0.38618177475217963, "grad_norm": 2.7249903678894043, "learning_rate": 6.821047043991773e-06, "loss": 0.9126, "step": 6467 }, { "epoch": 0.3862414905051953, "grad_norm": 2.2793867588043213, "learning_rate": 6.82038351801473e-06, "loss": 0.8545, "step": 6468 }, { "epoch": 0.3863012062582109, "grad_norm": 2.4782397747039795, "learning_rate": 6.8197199920376885e-06, "loss": 0.8913, "step": 6469 }, { "epoch": 0.38636092201122657, "grad_norm": 1.9727839231491089, "learning_rate": 6.8190564660606475e-06, "loss": 0.8612, "step": 6470 }, { "epoch": 0.3864206377642422, "grad_norm": 2.1072280406951904, "learning_rate": 6.818392940083605e-06, "loss": 0.8628, "step": 6471 }, { "epoch": 0.38648035351725785, "grad_norm": 1.8479604721069336, "learning_rate": 6.817729414106563e-06, "loss": 0.8526, "step": 6472 }, { "epoch": 0.3865400692702735, "grad_norm": 2.125711441040039, "learning_rate": 6.81706588812952e-06, "loss": 0.9225, "step": 6473 }, { "epoch": 0.3865997850232891, "grad_norm": 2.6779513359069824, "learning_rate": 6.816402362152479e-06, "loss": 0.9045, "step": 6474 }, { "epoch": 0.3866595007763048, "grad_norm": 2.0529911518096924, "learning_rate": 6.815738836175437e-06, "loss": 0.8508, "step": 6475 }, { "epoch": 0.38671921652932045, "grad_norm": 2.4790685176849365, "learning_rate": 6.815075310198394e-06, "loss": 0.9059, "step": 6476 }, { "epoch": 0.38677893228233606, "grad_norm": 1.6945726871490479, "learning_rate": 6.814411784221353e-06, "loss": 0.9199, "step": 6477 }, { "epoch": 0.38683864803535173, "grad_norm": 5.7129058837890625, "learning_rate": 6.813748258244311e-06, "loss": 0.8522, "step": 6478 }, { "epoch": 0.3868983637883674, "grad_norm": 3.2514662742614746, "learning_rate": 6.813084732267269e-06, "loss": 0.9207, "step": 6479 }, { "epoch": 0.386958079541383, "grad_norm": 2.116773843765259, "learning_rate": 6.812421206290228e-06, "loss": 0.8699, "step": 6480 }, { "epoch": 0.38701779529439867, "grad_norm": 2.160984754562378, "learning_rate": 6.811757680313185e-06, "loss": 0.9068, "step": 6481 }, { "epoch": 0.38707751104741434, "grad_norm": 2.2719738483428955, "learning_rate": 6.811094154336143e-06, "loss": 0.9218, "step": 6482 }, { "epoch": 0.38713722680042995, "grad_norm": 1.824218511581421, "learning_rate": 6.8104306283591e-06, "loss": 0.8693, "step": 6483 }, { "epoch": 0.3871969425534456, "grad_norm": 2.2725980281829834, "learning_rate": 6.809767102382059e-06, "loss": 0.8729, "step": 6484 }, { "epoch": 0.3872566583064612, "grad_norm": 2.6258742809295654, "learning_rate": 6.809103576405017e-06, "loss": 0.8953, "step": 6485 }, { "epoch": 0.3873163740594769, "grad_norm": 1.7829937934875488, "learning_rate": 6.8084400504279745e-06, "loss": 0.8648, "step": 6486 }, { "epoch": 0.38737608981249255, "grad_norm": 1.9175337553024292, "learning_rate": 6.8077765244509335e-06, "loss": 0.912, "step": 6487 }, { "epoch": 0.38743580556550816, "grad_norm": 2.346859931945801, "learning_rate": 6.807112998473891e-06, "loss": 0.8557, "step": 6488 }, { "epoch": 0.38749552131852383, "grad_norm": 3.229146957397461, "learning_rate": 6.806449472496849e-06, "loss": 0.8993, "step": 6489 }, { "epoch": 0.3875552370715395, "grad_norm": 1.711990475654602, "learning_rate": 6.805785946519807e-06, "loss": 0.844, "step": 6490 }, { "epoch": 0.3876149528245551, "grad_norm": 2.904963970184326, "learning_rate": 6.805122420542765e-06, "loss": 0.9011, "step": 6491 }, { "epoch": 0.38767466857757077, "grad_norm": 2.0119800567626953, "learning_rate": 6.804458894565723e-06, "loss": 0.8776, "step": 6492 }, { "epoch": 0.38773438433058643, "grad_norm": 2.536022186279297, "learning_rate": 6.80379536858868e-06, "loss": 0.8565, "step": 6493 }, { "epoch": 0.38779410008360204, "grad_norm": 1.9838218688964844, "learning_rate": 6.8031318426116385e-06, "loss": 0.9045, "step": 6494 }, { "epoch": 0.3878538158366177, "grad_norm": 3.244739294052124, "learning_rate": 6.8024683166345974e-06, "loss": 0.8586, "step": 6495 }, { "epoch": 0.3879135315896333, "grad_norm": 1.8896687030792236, "learning_rate": 6.801804790657555e-06, "loss": 0.8602, "step": 6496 }, { "epoch": 0.387973247342649, "grad_norm": 1.9795023202896118, "learning_rate": 6.801141264680513e-06, "loss": 0.8351, "step": 6497 }, { "epoch": 0.38803296309566465, "grad_norm": 2.098459243774414, "learning_rate": 6.80047773870347e-06, "loss": 0.8375, "step": 6498 }, { "epoch": 0.38809267884868026, "grad_norm": 3.046372413635254, "learning_rate": 6.799814212726429e-06, "loss": 0.8794, "step": 6499 }, { "epoch": 0.3881523946016959, "grad_norm": 1.7834573984146118, "learning_rate": 6.799150686749387e-06, "loss": 0.878, "step": 6500 }, { "epoch": 0.3882121103547116, "grad_norm": 2.367913246154785, "learning_rate": 6.798487160772344e-06, "loss": 0.8502, "step": 6501 }, { "epoch": 0.3882718261077272, "grad_norm": 2.4554226398468018, "learning_rate": 6.797823634795303e-06, "loss": 0.8998, "step": 6502 }, { "epoch": 0.38833154186074287, "grad_norm": 1.9040238857269287, "learning_rate": 6.7971601088182605e-06, "loss": 0.8739, "step": 6503 }, { "epoch": 0.38839125761375853, "grad_norm": 2.3726396560668945, "learning_rate": 6.796496582841219e-06, "loss": 0.9032, "step": 6504 }, { "epoch": 0.38845097336677414, "grad_norm": 1.7798418998718262, "learning_rate": 6.795833056864178e-06, "loss": 0.8682, "step": 6505 }, { "epoch": 0.3885106891197898, "grad_norm": 2.2216124534606934, "learning_rate": 6.795169530887135e-06, "loss": 0.899, "step": 6506 }, { "epoch": 0.3885704048728054, "grad_norm": 1.8155359029769897, "learning_rate": 6.794506004910093e-06, "loss": 0.8704, "step": 6507 }, { "epoch": 0.3886301206258211, "grad_norm": 2.2024309635162354, "learning_rate": 6.79384247893305e-06, "loss": 0.8726, "step": 6508 }, { "epoch": 0.38868983637883675, "grad_norm": 2.1596567630767822, "learning_rate": 6.793178952956009e-06, "loss": 0.8943, "step": 6509 }, { "epoch": 0.38874955213185236, "grad_norm": 2.8510751724243164, "learning_rate": 6.792515426978967e-06, "loss": 0.891, "step": 6510 }, { "epoch": 0.388809267884868, "grad_norm": 1.8313474655151367, "learning_rate": 6.7918519010019245e-06, "loss": 0.9004, "step": 6511 }, { "epoch": 0.3888689836378837, "grad_norm": 2.3037304878234863, "learning_rate": 6.7911883750248835e-06, "loss": 0.8931, "step": 6512 }, { "epoch": 0.3889286993908993, "grad_norm": 5.929737567901611, "learning_rate": 6.790524849047841e-06, "loss": 0.861, "step": 6513 }, { "epoch": 0.38898841514391497, "grad_norm": 2.395587682723999, "learning_rate": 6.789861323070799e-06, "loss": 0.9032, "step": 6514 }, { "epoch": 0.38904813089693063, "grad_norm": 2.2770485877990723, "learning_rate": 6.789197797093757e-06, "loss": 0.9103, "step": 6515 }, { "epoch": 0.38910784664994624, "grad_norm": 2.398344039916992, "learning_rate": 6.788534271116715e-06, "loss": 0.8531, "step": 6516 }, { "epoch": 0.3891675624029619, "grad_norm": 5.898573875427246, "learning_rate": 6.787870745139673e-06, "loss": 0.868, "step": 6517 }, { "epoch": 0.3892272781559776, "grad_norm": 2.170264720916748, "learning_rate": 6.78720721916263e-06, "loss": 0.8897, "step": 6518 }, { "epoch": 0.3892869939089932, "grad_norm": 1.8071174621582031, "learning_rate": 6.7865436931855885e-06, "loss": 0.8421, "step": 6519 }, { "epoch": 0.38934670966200885, "grad_norm": 3.2306549549102783, "learning_rate": 6.785880167208547e-06, "loss": 0.8548, "step": 6520 }, { "epoch": 0.38940642541502446, "grad_norm": 2.050840139389038, "learning_rate": 6.785216641231505e-06, "loss": 0.907, "step": 6521 }, { "epoch": 0.3894661411680401, "grad_norm": 3.578521728515625, "learning_rate": 6.784553115254463e-06, "loss": 0.8814, "step": 6522 }, { "epoch": 0.3895258569210558, "grad_norm": 3.519204616546631, "learning_rate": 6.78388958927742e-06, "loss": 0.8969, "step": 6523 }, { "epoch": 0.3895855726740714, "grad_norm": 4.1292924880981445, "learning_rate": 6.783226063300379e-06, "loss": 0.8975, "step": 6524 }, { "epoch": 0.38964528842708707, "grad_norm": 1.959147334098816, "learning_rate": 6.782562537323337e-06, "loss": 0.8453, "step": 6525 }, { "epoch": 0.38970500418010273, "grad_norm": 2.625277042388916, "learning_rate": 6.781899011346294e-06, "loss": 0.9297, "step": 6526 }, { "epoch": 0.38976471993311834, "grad_norm": 2.5197219848632812, "learning_rate": 6.781235485369253e-06, "loss": 0.8853, "step": 6527 }, { "epoch": 0.389824435686134, "grad_norm": 3.143073558807373, "learning_rate": 6.7805719593922105e-06, "loss": 0.9117, "step": 6528 }, { "epoch": 0.3898841514391497, "grad_norm": 2.0651497840881348, "learning_rate": 6.779908433415169e-06, "loss": 0.8624, "step": 6529 }, { "epoch": 0.3899438671921653, "grad_norm": 2.109707832336426, "learning_rate": 6.7792449074381276e-06, "loss": 0.889, "step": 6530 }, { "epoch": 0.39000358294518095, "grad_norm": 1.796608567237854, "learning_rate": 6.778581381461085e-06, "loss": 0.8679, "step": 6531 }, { "epoch": 0.39006329869819656, "grad_norm": 2.8068220615386963, "learning_rate": 6.777917855484043e-06, "loss": 0.8318, "step": 6532 }, { "epoch": 0.3901230144512122, "grad_norm": 2.1194140911102295, "learning_rate": 6.777254329507e-06, "loss": 0.8493, "step": 6533 }, { "epoch": 0.3901827302042279, "grad_norm": 1.7050087451934814, "learning_rate": 6.776590803529959e-06, "loss": 0.8438, "step": 6534 }, { "epoch": 0.3902424459572435, "grad_norm": 2.015913248062134, "learning_rate": 6.775927277552917e-06, "loss": 0.8855, "step": 6535 }, { "epoch": 0.39030216171025917, "grad_norm": 2.761453628540039, "learning_rate": 6.7752637515758745e-06, "loss": 0.8793, "step": 6536 }, { "epoch": 0.39036187746327483, "grad_norm": 2.4264543056488037, "learning_rate": 6.7746002255988334e-06, "loss": 0.8387, "step": 6537 }, { "epoch": 0.39042159321629044, "grad_norm": 2.526898145675659, "learning_rate": 6.773936699621791e-06, "loss": 0.8676, "step": 6538 }, { "epoch": 0.3904813089693061, "grad_norm": 2.057291269302368, "learning_rate": 6.773273173644749e-06, "loss": 0.8791, "step": 6539 }, { "epoch": 0.3905410247223218, "grad_norm": 1.9139093160629272, "learning_rate": 6.772609647667707e-06, "loss": 0.8527, "step": 6540 }, { "epoch": 0.3906007404753374, "grad_norm": 1.6288819313049316, "learning_rate": 6.771946121690665e-06, "loss": 0.8885, "step": 6541 }, { "epoch": 0.39066045622835305, "grad_norm": 2.2460241317749023, "learning_rate": 6.771282595713623e-06, "loss": 0.8857, "step": 6542 }, { "epoch": 0.39072017198136866, "grad_norm": 2.2144715785980225, "learning_rate": 6.77061906973658e-06, "loss": 0.8649, "step": 6543 }, { "epoch": 0.3907798877343843, "grad_norm": 3.254967212677002, "learning_rate": 6.7699555437595384e-06, "loss": 0.8914, "step": 6544 }, { "epoch": 0.3908396034874, "grad_norm": 2.8894593715667725, "learning_rate": 6.769292017782497e-06, "loss": 0.859, "step": 6545 }, { "epoch": 0.3908993192404156, "grad_norm": 3.7125227451324463, "learning_rate": 6.768628491805455e-06, "loss": 0.8996, "step": 6546 }, { "epoch": 0.39095903499343126, "grad_norm": 2.119964361190796, "learning_rate": 6.767964965828413e-06, "loss": 0.8946, "step": 6547 }, { "epoch": 0.39101875074644693, "grad_norm": 2.362452983856201, "learning_rate": 6.76730143985137e-06, "loss": 0.93, "step": 6548 }, { "epoch": 0.39107846649946254, "grad_norm": 5.843267917633057, "learning_rate": 6.766637913874329e-06, "loss": 0.8574, "step": 6549 }, { "epoch": 0.3911381822524782, "grad_norm": 2.0243515968322754, "learning_rate": 6.765974387897287e-06, "loss": 0.8434, "step": 6550 }, { "epoch": 0.39119789800549387, "grad_norm": 1.6822994947433472, "learning_rate": 6.765310861920244e-06, "loss": 0.8588, "step": 6551 }, { "epoch": 0.3912576137585095, "grad_norm": 2.5408148765563965, "learning_rate": 6.764647335943203e-06, "loss": 0.9098, "step": 6552 }, { "epoch": 0.39131732951152515, "grad_norm": 3.1586809158325195, "learning_rate": 6.7639838099661605e-06, "loss": 0.8715, "step": 6553 }, { "epoch": 0.3913770452645408, "grad_norm": 2.264939069747925, "learning_rate": 6.763320283989119e-06, "loss": 0.873, "step": 6554 }, { "epoch": 0.3914367610175564, "grad_norm": 2.158384323120117, "learning_rate": 6.7626567580120775e-06, "loss": 0.8643, "step": 6555 }, { "epoch": 0.3914964767705721, "grad_norm": 1.6646759510040283, "learning_rate": 6.761993232035035e-06, "loss": 0.8685, "step": 6556 }, { "epoch": 0.3915561925235877, "grad_norm": 2.4189131259918213, "learning_rate": 6.761329706057993e-06, "loss": 0.9015, "step": 6557 }, { "epoch": 0.39161590827660336, "grad_norm": 2.246692419052124, "learning_rate": 6.76066618008095e-06, "loss": 0.889, "step": 6558 }, { "epoch": 0.39167562402961903, "grad_norm": 2.6740221977233887, "learning_rate": 6.760002654103909e-06, "loss": 0.8921, "step": 6559 }, { "epoch": 0.39173533978263464, "grad_norm": 2.6056647300720215, "learning_rate": 6.759339128126867e-06, "loss": 0.8806, "step": 6560 }, { "epoch": 0.3917950555356503, "grad_norm": 4.3973846435546875, "learning_rate": 6.7586756021498244e-06, "loss": 0.8788, "step": 6561 }, { "epoch": 0.39185477128866597, "grad_norm": 2.8324079513549805, "learning_rate": 6.758012076172783e-06, "loss": 0.9163, "step": 6562 }, { "epoch": 0.3919144870416816, "grad_norm": 1.9687659740447998, "learning_rate": 6.757348550195741e-06, "loss": 0.8735, "step": 6563 }, { "epoch": 0.39197420279469725, "grad_norm": 2.62650990486145, "learning_rate": 6.756685024218699e-06, "loss": 0.9074, "step": 6564 }, { "epoch": 0.3920339185477129, "grad_norm": 3.6055502891540527, "learning_rate": 6.756021498241657e-06, "loss": 0.9029, "step": 6565 }, { "epoch": 0.3920936343007285, "grad_norm": 2.2417492866516113, "learning_rate": 6.755357972264615e-06, "loss": 0.9233, "step": 6566 }, { "epoch": 0.3921533500537442, "grad_norm": 3.898029327392578, "learning_rate": 6.754694446287573e-06, "loss": 0.8673, "step": 6567 }, { "epoch": 0.3922130658067598, "grad_norm": 2.503978729248047, "learning_rate": 6.75403092031053e-06, "loss": 0.9362, "step": 6568 }, { "epoch": 0.39227278155977546, "grad_norm": 2.72698712348938, "learning_rate": 6.753367394333488e-06, "loss": 0.8782, "step": 6569 }, { "epoch": 0.39233249731279113, "grad_norm": 4.082156658172607, "learning_rate": 6.752703868356447e-06, "loss": 0.894, "step": 6570 }, { "epoch": 0.39239221306580674, "grad_norm": 1.829784631729126, "learning_rate": 6.752040342379405e-06, "loss": 0.8742, "step": 6571 }, { "epoch": 0.3924519288188224, "grad_norm": 2.1889896392822266, "learning_rate": 6.751376816402363e-06, "loss": 0.8696, "step": 6572 }, { "epoch": 0.39251164457183807, "grad_norm": 1.796073317527771, "learning_rate": 6.75071329042532e-06, "loss": 0.8775, "step": 6573 }, { "epoch": 0.3925713603248537, "grad_norm": 2.2175354957580566, "learning_rate": 6.750049764448279e-06, "loss": 0.8576, "step": 6574 }, { "epoch": 0.39263107607786935, "grad_norm": 2.087674379348755, "learning_rate": 6.749386238471237e-06, "loss": 0.8647, "step": 6575 }, { "epoch": 0.392690791830885, "grad_norm": 2.890190839767456, "learning_rate": 6.748722712494194e-06, "loss": 0.9227, "step": 6576 }, { "epoch": 0.3927505075839006, "grad_norm": 1.7991105318069458, "learning_rate": 6.748059186517153e-06, "loss": 0.8733, "step": 6577 }, { "epoch": 0.3928102233369163, "grad_norm": 2.094738721847534, "learning_rate": 6.7473956605401105e-06, "loss": 0.8654, "step": 6578 }, { "epoch": 0.3928699390899319, "grad_norm": 1.7860616445541382, "learning_rate": 6.7467321345630686e-06, "loss": 0.8895, "step": 6579 }, { "epoch": 0.39292965484294756, "grad_norm": 2.2428269386291504, "learning_rate": 6.7460686085860275e-06, "loss": 0.8562, "step": 6580 }, { "epoch": 0.3929893705959632, "grad_norm": 2.121995687484741, "learning_rate": 6.745405082608985e-06, "loss": 0.9004, "step": 6581 }, { "epoch": 0.39304908634897884, "grad_norm": 2.9659621715545654, "learning_rate": 6.744741556631943e-06, "loss": 0.8897, "step": 6582 }, { "epoch": 0.3931088021019945, "grad_norm": 2.171647787094116, "learning_rate": 6.7440780306549e-06, "loss": 0.866, "step": 6583 }, { "epoch": 0.39316851785501017, "grad_norm": 2.743389129638672, "learning_rate": 6.743414504677859e-06, "loss": 0.9003, "step": 6584 }, { "epoch": 0.3932282336080258, "grad_norm": 7.08245849609375, "learning_rate": 6.742750978700817e-06, "loss": 0.8976, "step": 6585 }, { "epoch": 0.39328794936104144, "grad_norm": 2.6814396381378174, "learning_rate": 6.742087452723774e-06, "loss": 0.8645, "step": 6586 }, { "epoch": 0.3933476651140571, "grad_norm": 1.7864822149276733, "learning_rate": 6.741423926746733e-06, "loss": 0.8914, "step": 6587 }, { "epoch": 0.3934073808670727, "grad_norm": 2.735062599182129, "learning_rate": 6.740760400769691e-06, "loss": 0.8464, "step": 6588 }, { "epoch": 0.3934670966200884, "grad_norm": 2.549180746078491, "learning_rate": 6.740096874792649e-06, "loss": 0.8391, "step": 6589 }, { "epoch": 0.39352681237310405, "grad_norm": 1.890433669090271, "learning_rate": 6.739433348815607e-06, "loss": 0.8852, "step": 6590 }, { "epoch": 0.39358652812611966, "grad_norm": 2.266606092453003, "learning_rate": 6.738769822838565e-06, "loss": 0.8847, "step": 6591 }, { "epoch": 0.3936462438791353, "grad_norm": 2.0098695755004883, "learning_rate": 6.738106296861523e-06, "loss": 0.8879, "step": 6592 }, { "epoch": 0.39370595963215094, "grad_norm": 2.1616618633270264, "learning_rate": 6.73744277088448e-06, "loss": 0.8854, "step": 6593 }, { "epoch": 0.3937656753851666, "grad_norm": 2.567171812057495, "learning_rate": 6.736779244907438e-06, "loss": 0.8743, "step": 6594 }, { "epoch": 0.39382539113818227, "grad_norm": 4.072733402252197, "learning_rate": 6.736115718930397e-06, "loss": 0.8764, "step": 6595 }, { "epoch": 0.3938851068911979, "grad_norm": 5.199814796447754, "learning_rate": 6.7354521929533546e-06, "loss": 0.8432, "step": 6596 }, { "epoch": 0.39394482264421354, "grad_norm": 2.6373047828674316, "learning_rate": 6.734788666976313e-06, "loss": 0.8952, "step": 6597 }, { "epoch": 0.3940045383972292, "grad_norm": 2.909991502761841, "learning_rate": 6.73412514099927e-06, "loss": 0.9159, "step": 6598 }, { "epoch": 0.3940642541502448, "grad_norm": 2.4654085636138916, "learning_rate": 6.733461615022229e-06, "loss": 0.891, "step": 6599 }, { "epoch": 0.3941239699032605, "grad_norm": 2.1720964908599854, "learning_rate": 6.732798089045187e-06, "loss": 0.856, "step": 6600 }, { "epoch": 0.3941239699032605, "eval_text_loss": 0.9283416271209717, "eval_text_runtime": 15.2218, "eval_text_samples_per_second": 262.781, "eval_text_steps_per_second": 0.526, "step": 6600 }, { "epoch": 0.3941239699032605, "eval_image_loss": 0.6431838274002075, "eval_image_runtime": 5.0012, "eval_image_samples_per_second": 799.812, "eval_image_steps_per_second": 1.6, "step": 6600 }, { "epoch": 0.3941239699032605, "eval_video_loss": 1.0993351936340332, "eval_video_runtime": 77.0256, "eval_video_samples_per_second": 51.931, "eval_video_steps_per_second": 0.104, "step": 6600 }, { "epoch": 0.39418368565627615, "grad_norm": 3.1201884746551514, "learning_rate": 6.732134563068144e-06, "loss": 0.836, "step": 6601 }, { "epoch": 0.39424340140929176, "grad_norm": 2.9506428241729736, "learning_rate": 6.731471037091103e-06, "loss": 0.8906, "step": 6602 }, { "epoch": 0.3943031171623074, "grad_norm": 2.303170919418335, "learning_rate": 6.7308075111140604e-06, "loss": 0.8776, "step": 6603 }, { "epoch": 0.39436283291532304, "grad_norm": 2.294081687927246, "learning_rate": 6.7301439851370185e-06, "loss": 0.8785, "step": 6604 }, { "epoch": 0.3944225486683387, "grad_norm": 2.1246557235717773, "learning_rate": 6.7294804591599775e-06, "loss": 0.8802, "step": 6605 }, { "epoch": 0.39448226442135437, "grad_norm": 2.7212231159210205, "learning_rate": 6.728816933182935e-06, "loss": 0.9117, "step": 6606 }, { "epoch": 0.39454198017437, "grad_norm": 3.53991961479187, "learning_rate": 6.728153407205893e-06, "loss": 0.8702, "step": 6607 }, { "epoch": 0.39460169592738564, "grad_norm": 2.0155606269836426, "learning_rate": 6.72748988122885e-06, "loss": 0.9002, "step": 6608 }, { "epoch": 0.3946614116804013, "grad_norm": 3.0827879905700684, "learning_rate": 6.726826355251809e-06, "loss": 0.8832, "step": 6609 }, { "epoch": 0.3947211274334169, "grad_norm": 3.120345115661621, "learning_rate": 6.726162829274767e-06, "loss": 0.8824, "step": 6610 }, { "epoch": 0.3947808431864326, "grad_norm": 3.090867042541504, "learning_rate": 6.725499303297724e-06, "loss": 0.834, "step": 6611 }, { "epoch": 0.39484055893944825, "grad_norm": 2.730983018875122, "learning_rate": 6.724835777320683e-06, "loss": 0.8634, "step": 6612 }, { "epoch": 0.39490027469246386, "grad_norm": 3.1454451084136963, "learning_rate": 6.724172251343641e-06, "loss": 0.8475, "step": 6613 }, { "epoch": 0.3949599904454795, "grad_norm": 2.0830845832824707, "learning_rate": 6.723508725366599e-06, "loss": 0.8787, "step": 6614 }, { "epoch": 0.3950197061984952, "grad_norm": 11.232992172241211, "learning_rate": 6.722845199389557e-06, "loss": 0.8627, "step": 6615 }, { "epoch": 0.3950794219515108, "grad_norm": 2.383951187133789, "learning_rate": 6.722181673412515e-06, "loss": 0.897, "step": 6616 }, { "epoch": 0.39513913770452647, "grad_norm": 2.9278981685638428, "learning_rate": 6.721518147435473e-06, "loss": 0.8753, "step": 6617 }, { "epoch": 0.3951988534575421, "grad_norm": 1.8899257183074951, "learning_rate": 6.72085462145843e-06, "loss": 0.861, "step": 6618 }, { "epoch": 0.39525856921055774, "grad_norm": 1.9772753715515137, "learning_rate": 6.720191095481388e-06, "loss": 0.846, "step": 6619 }, { "epoch": 0.3953182849635734, "grad_norm": 2.4487152099609375, "learning_rate": 6.719527569504347e-06, "loss": 0.8873, "step": 6620 }, { "epoch": 0.395378000716589, "grad_norm": 2.523115873336792, "learning_rate": 6.7188640435273045e-06, "loss": 0.8561, "step": 6621 }, { "epoch": 0.3954377164696047, "grad_norm": 3.6708550453186035, "learning_rate": 6.718200517550263e-06, "loss": 0.8623, "step": 6622 }, { "epoch": 0.39549743222262035, "grad_norm": 3.1744508743286133, "learning_rate": 6.71753699157322e-06, "loss": 0.916, "step": 6623 }, { "epoch": 0.39555714797563596, "grad_norm": 4.107338905334473, "learning_rate": 6.716873465596179e-06, "loss": 0.8897, "step": 6624 }, { "epoch": 0.3956168637286516, "grad_norm": 2.541801691055298, "learning_rate": 6.716209939619137e-06, "loss": 0.8575, "step": 6625 }, { "epoch": 0.3956765794816673, "grad_norm": 1.9274438619613647, "learning_rate": 6.715546413642094e-06, "loss": 0.8857, "step": 6626 }, { "epoch": 0.3957362952346829, "grad_norm": 2.1114604473114014, "learning_rate": 6.714882887665053e-06, "loss": 0.8762, "step": 6627 }, { "epoch": 0.39579601098769857, "grad_norm": 1.7592631578445435, "learning_rate": 6.71421936168801e-06, "loss": 0.8636, "step": 6628 }, { "epoch": 0.3958557267407142, "grad_norm": 1.9887722730636597, "learning_rate": 6.7135558357109685e-06, "loss": 0.8896, "step": 6629 }, { "epoch": 0.39591544249372984, "grad_norm": 2.3700168132781982, "learning_rate": 6.7128923097339275e-06, "loss": 0.9115, "step": 6630 }, { "epoch": 0.3959751582467455, "grad_norm": 5.333016395568848, "learning_rate": 6.712228783756885e-06, "loss": 0.864, "step": 6631 }, { "epoch": 0.3960348739997611, "grad_norm": 3.4403908252716064, "learning_rate": 6.711565257779843e-06, "loss": 0.8994, "step": 6632 }, { "epoch": 0.3960945897527768, "grad_norm": 2.788013458251953, "learning_rate": 6.7109017318028e-06, "loss": 0.8622, "step": 6633 }, { "epoch": 0.39615430550579245, "grad_norm": 3.2735252380371094, "learning_rate": 6.710238205825759e-06, "loss": 0.8721, "step": 6634 }, { "epoch": 0.39621402125880806, "grad_norm": 2.124884605407715, "learning_rate": 6.709574679848717e-06, "loss": 0.8768, "step": 6635 }, { "epoch": 0.3962737370118237, "grad_norm": 2.110480546951294, "learning_rate": 6.708911153871674e-06, "loss": 0.8585, "step": 6636 }, { "epoch": 0.3963334527648394, "grad_norm": 1.9374085664749146, "learning_rate": 6.708247627894633e-06, "loss": 0.8637, "step": 6637 }, { "epoch": 0.396393168517855, "grad_norm": 2.1007227897644043, "learning_rate": 6.7075841019175906e-06, "loss": 0.9075, "step": 6638 }, { "epoch": 0.39645288427087066, "grad_norm": 2.024829387664795, "learning_rate": 6.706920575940549e-06, "loss": 0.8689, "step": 6639 }, { "epoch": 0.3965126000238863, "grad_norm": 3.139845371246338, "learning_rate": 6.706257049963507e-06, "loss": 0.8731, "step": 6640 }, { "epoch": 0.39657231577690194, "grad_norm": 2.1809098720550537, "learning_rate": 6.705593523986465e-06, "loss": 0.929, "step": 6641 }, { "epoch": 0.3966320315299176, "grad_norm": 2.2443814277648926, "learning_rate": 6.704929998009423e-06, "loss": 0.8821, "step": 6642 }, { "epoch": 0.3966917472829332, "grad_norm": 2.3594491481781006, "learning_rate": 6.70426647203238e-06, "loss": 0.8463, "step": 6643 }, { "epoch": 0.3967514630359489, "grad_norm": 1.967399001121521, "learning_rate": 6.703602946055338e-06, "loss": 0.8712, "step": 6644 }, { "epoch": 0.39681117878896455, "grad_norm": 4.659850597381592, "learning_rate": 6.702939420078297e-06, "loss": 0.8757, "step": 6645 }, { "epoch": 0.39687089454198016, "grad_norm": 2.966848373413086, "learning_rate": 6.7022758941012545e-06, "loss": 0.8511, "step": 6646 }, { "epoch": 0.3969306102949958, "grad_norm": 2.5033717155456543, "learning_rate": 6.701612368124213e-06, "loss": 0.867, "step": 6647 }, { "epoch": 0.3969903260480115, "grad_norm": 1.9461815357208252, "learning_rate": 6.70094884214717e-06, "loss": 0.88, "step": 6648 }, { "epoch": 0.3970500418010271, "grad_norm": 2.1613693237304688, "learning_rate": 6.700285316170129e-06, "loss": 0.9005, "step": 6649 }, { "epoch": 0.39710975755404276, "grad_norm": 2.4986112117767334, "learning_rate": 6.699621790193087e-06, "loss": 0.8993, "step": 6650 }, { "epoch": 0.39716947330705843, "grad_norm": 3.5574891567230225, "learning_rate": 6.698958264216044e-06, "loss": 0.8694, "step": 6651 }, { "epoch": 0.39722918906007404, "grad_norm": 2.414639472961426, "learning_rate": 6.698294738239003e-06, "loss": 0.8661, "step": 6652 }, { "epoch": 0.3972889048130897, "grad_norm": 2.304715633392334, "learning_rate": 6.69763121226196e-06, "loss": 0.9108, "step": 6653 }, { "epoch": 0.3973486205661053, "grad_norm": 1.4904959201812744, "learning_rate": 6.6969676862849185e-06, "loss": 0.8652, "step": 6654 }, { "epoch": 0.397408336319121, "grad_norm": 2.4541921615600586, "learning_rate": 6.696304160307877e-06, "loss": 0.914, "step": 6655 }, { "epoch": 0.39746805207213665, "grad_norm": 2.9802048206329346, "learning_rate": 6.695640634330835e-06, "loss": 0.8684, "step": 6656 }, { "epoch": 0.39752776782515226, "grad_norm": 1.9075491428375244, "learning_rate": 6.694977108353793e-06, "loss": 0.8799, "step": 6657 }, { "epoch": 0.3975874835781679, "grad_norm": 3.0285227298736572, "learning_rate": 6.69431358237675e-06, "loss": 0.8938, "step": 6658 }, { "epoch": 0.3976471993311836, "grad_norm": 2.0477471351623535, "learning_rate": 6.693650056399709e-06, "loss": 0.8885, "step": 6659 }, { "epoch": 0.3977069150841992, "grad_norm": 2.9214158058166504, "learning_rate": 6.692986530422667e-06, "loss": 0.8869, "step": 6660 }, { "epoch": 0.39776663083721486, "grad_norm": 2.2670094966888428, "learning_rate": 6.692323004445624e-06, "loss": 0.886, "step": 6661 }, { "epoch": 0.39782634659023053, "grad_norm": 2.917182683944702, "learning_rate": 6.691659478468583e-06, "loss": 0.8958, "step": 6662 }, { "epoch": 0.39788606234324614, "grad_norm": 1.8363150358200073, "learning_rate": 6.6909959524915405e-06, "loss": 0.8867, "step": 6663 }, { "epoch": 0.3979457780962618, "grad_norm": 1.8212764263153076, "learning_rate": 6.690332426514499e-06, "loss": 0.882, "step": 6664 }, { "epoch": 0.3980054938492774, "grad_norm": 2.7201411724090576, "learning_rate": 6.689668900537457e-06, "loss": 0.8965, "step": 6665 }, { "epoch": 0.3980652096022931, "grad_norm": 1.6125625371932983, "learning_rate": 6.689005374560415e-06, "loss": 0.9158, "step": 6666 }, { "epoch": 0.39812492535530875, "grad_norm": 3.4144515991210938, "learning_rate": 6.688341848583373e-06, "loss": 0.8574, "step": 6667 }, { "epoch": 0.39818464110832436, "grad_norm": 2.017518997192383, "learning_rate": 6.68767832260633e-06, "loss": 0.8626, "step": 6668 }, { "epoch": 0.39824435686134, "grad_norm": 2.06957745552063, "learning_rate": 6.687014796629288e-06, "loss": 0.8658, "step": 6669 }, { "epoch": 0.3983040726143557, "grad_norm": 2.939716339111328, "learning_rate": 6.686351270652247e-06, "loss": 0.8665, "step": 6670 }, { "epoch": 0.3983637883673713, "grad_norm": 3.500593900680542, "learning_rate": 6.6856877446752045e-06, "loss": 0.8817, "step": 6671 }, { "epoch": 0.39842350412038696, "grad_norm": 2.350618600845337, "learning_rate": 6.685024218698163e-06, "loss": 0.8525, "step": 6672 }, { "epoch": 0.3984832198734026, "grad_norm": 2.6823086738586426, "learning_rate": 6.68436069272112e-06, "loss": 0.8365, "step": 6673 }, { "epoch": 0.39854293562641824, "grad_norm": 2.9248270988464355, "learning_rate": 6.683697166744079e-06, "loss": 0.8995, "step": 6674 }, { "epoch": 0.3986026513794339, "grad_norm": 1.476708173751831, "learning_rate": 6.683033640767037e-06, "loss": 0.8701, "step": 6675 }, { "epoch": 0.3986623671324495, "grad_norm": 2.374662399291992, "learning_rate": 6.682370114789994e-06, "loss": 0.8665, "step": 6676 }, { "epoch": 0.3987220828854652, "grad_norm": 1.9541329145431519, "learning_rate": 6.681706588812953e-06, "loss": 0.902, "step": 6677 }, { "epoch": 0.39878179863848084, "grad_norm": 2.134258985519409, "learning_rate": 6.68104306283591e-06, "loss": 0.8587, "step": 6678 }, { "epoch": 0.39884151439149645, "grad_norm": 4.886813640594482, "learning_rate": 6.6803795368588684e-06, "loss": 0.8579, "step": 6679 }, { "epoch": 0.3989012301445121, "grad_norm": 1.986965298652649, "learning_rate": 6.679716010881827e-06, "loss": 0.8917, "step": 6680 }, { "epoch": 0.3989609458975278, "grad_norm": 2.966050624847412, "learning_rate": 6.679052484904785e-06, "loss": 0.8778, "step": 6681 }, { "epoch": 0.3990206616505434, "grad_norm": 2.0883922576904297, "learning_rate": 6.678388958927743e-06, "loss": 0.8964, "step": 6682 }, { "epoch": 0.39908037740355906, "grad_norm": 2.1842615604400635, "learning_rate": 6.6777254329507e-06, "loss": 0.9133, "step": 6683 }, { "epoch": 0.3991400931565747, "grad_norm": 1.9512121677398682, "learning_rate": 6.677061906973659e-06, "loss": 0.8986, "step": 6684 }, { "epoch": 0.39919980890959034, "grad_norm": 3.3677589893341064, "learning_rate": 6.676398380996617e-06, "loss": 0.8688, "step": 6685 }, { "epoch": 0.399259524662606, "grad_norm": 2.420583724975586, "learning_rate": 6.675734855019574e-06, "loss": 0.89, "step": 6686 }, { "epoch": 0.39931924041562167, "grad_norm": 2.284823417663574, "learning_rate": 6.675071329042533e-06, "loss": 0.919, "step": 6687 }, { "epoch": 0.3993789561686373, "grad_norm": 2.72190260887146, "learning_rate": 6.6744078030654905e-06, "loss": 0.8911, "step": 6688 }, { "epoch": 0.39943867192165294, "grad_norm": 2.9896178245544434, "learning_rate": 6.673744277088449e-06, "loss": 0.8795, "step": 6689 }, { "epoch": 0.39949838767466855, "grad_norm": 2.398115873336792, "learning_rate": 6.673080751111407e-06, "loss": 0.9164, "step": 6690 }, { "epoch": 0.3995581034276842, "grad_norm": 2.765676736831665, "learning_rate": 6.672417225134365e-06, "loss": 0.8976, "step": 6691 }, { "epoch": 0.3996178191806999, "grad_norm": 1.9765502214431763, "learning_rate": 6.671753699157323e-06, "loss": 0.8752, "step": 6692 }, { "epoch": 0.3996775349337155, "grad_norm": 2.2788808345794678, "learning_rate": 6.67109017318028e-06, "loss": 0.8654, "step": 6693 }, { "epoch": 0.39973725068673116, "grad_norm": 1.5952147245407104, "learning_rate": 6.670426647203238e-06, "loss": 0.8465, "step": 6694 }, { "epoch": 0.3997969664397468, "grad_norm": 1.9198156595230103, "learning_rate": 6.669763121226197e-06, "loss": 0.8898, "step": 6695 }, { "epoch": 0.39985668219276244, "grad_norm": 2.166938543319702, "learning_rate": 6.6690995952491545e-06, "loss": 0.9019, "step": 6696 }, { "epoch": 0.3999163979457781, "grad_norm": 1.9909676313400269, "learning_rate": 6.6684360692721126e-06, "loss": 0.8589, "step": 6697 }, { "epoch": 0.39997611369879377, "grad_norm": 1.926845908164978, "learning_rate": 6.66777254329507e-06, "loss": 0.871, "step": 6698 }, { "epoch": 0.4000358294518094, "grad_norm": 1.892323613166809, "learning_rate": 6.667109017318029e-06, "loss": 0.8693, "step": 6699 }, { "epoch": 0.40009554520482504, "grad_norm": 2.6442904472351074, "learning_rate": 6.666445491340987e-06, "loss": 0.9158, "step": 6700 }, { "epoch": 0.40015526095784065, "grad_norm": 2.0021238327026367, "learning_rate": 6.665781965363944e-06, "loss": 0.8656, "step": 6701 }, { "epoch": 0.4002149767108563, "grad_norm": 2.2551703453063965, "learning_rate": 6.665118439386903e-06, "loss": 0.8759, "step": 6702 }, { "epoch": 0.400274692463872, "grad_norm": 2.420194387435913, "learning_rate": 6.66445491340986e-06, "loss": 0.8952, "step": 6703 }, { "epoch": 0.4003344082168876, "grad_norm": 1.8671889305114746, "learning_rate": 6.663791387432818e-06, "loss": 0.8657, "step": 6704 }, { "epoch": 0.40039412396990326, "grad_norm": 2.1626980304718018, "learning_rate": 6.663127861455777e-06, "loss": 0.8544, "step": 6705 }, { "epoch": 0.4004538397229189, "grad_norm": 3.0162134170532227, "learning_rate": 6.662464335478735e-06, "loss": 0.8565, "step": 6706 }, { "epoch": 0.40051355547593454, "grad_norm": 2.7334492206573486, "learning_rate": 6.661800809501693e-06, "loss": 0.8974, "step": 6707 }, { "epoch": 0.4005732712289502, "grad_norm": 1.7627015113830566, "learning_rate": 6.66113728352465e-06, "loss": 0.8672, "step": 6708 }, { "epoch": 0.40063298698196587, "grad_norm": 2.0935556888580322, "learning_rate": 6.660473757547609e-06, "loss": 0.8573, "step": 6709 }, { "epoch": 0.4006927027349815, "grad_norm": 1.839620590209961, "learning_rate": 6.659810231570567e-06, "loss": 0.8571, "step": 6710 }, { "epoch": 0.40075241848799714, "grad_norm": 2.5000643730163574, "learning_rate": 6.659146705593524e-06, "loss": 0.8988, "step": 6711 }, { "epoch": 0.40081213424101275, "grad_norm": 3.673668622970581, "learning_rate": 6.658483179616483e-06, "loss": 0.8639, "step": 6712 }, { "epoch": 0.4008718499940284, "grad_norm": 4.0679521560668945, "learning_rate": 6.6578196536394405e-06, "loss": 0.8895, "step": 6713 }, { "epoch": 0.4009315657470441, "grad_norm": 2.510406017303467, "learning_rate": 6.6571561276623986e-06, "loss": 0.8745, "step": 6714 }, { "epoch": 0.4009912815000597, "grad_norm": 2.0250437259674072, "learning_rate": 6.656492601685357e-06, "loss": 0.8953, "step": 6715 }, { "epoch": 0.40105099725307536, "grad_norm": 2.137800693511963, "learning_rate": 6.655829075708315e-06, "loss": 0.8701, "step": 6716 }, { "epoch": 0.401110713006091, "grad_norm": 1.8271033763885498, "learning_rate": 6.655165549731273e-06, "loss": 0.872, "step": 6717 }, { "epoch": 0.40117042875910663, "grad_norm": 1.9582608938217163, "learning_rate": 6.65450202375423e-06, "loss": 0.8745, "step": 6718 }, { "epoch": 0.4012301445121223, "grad_norm": 4.924047470092773, "learning_rate": 6.653838497777188e-06, "loss": 0.8609, "step": 6719 }, { "epoch": 0.40128986026513797, "grad_norm": 2.1718924045562744, "learning_rate": 6.653174971800147e-06, "loss": 0.8505, "step": 6720 }, { "epoch": 0.4013495760181536, "grad_norm": 3.9999165534973145, "learning_rate": 6.6525114458231044e-06, "loss": 0.9503, "step": 6721 }, { "epoch": 0.40140929177116924, "grad_norm": 2.336266279220581, "learning_rate": 6.6518479198460625e-06, "loss": 0.9363, "step": 6722 }, { "epoch": 0.4014690075241849, "grad_norm": 3.876643657684326, "learning_rate": 6.65118439386902e-06, "loss": 0.9069, "step": 6723 }, { "epoch": 0.4015287232772005, "grad_norm": 3.5338823795318604, "learning_rate": 6.650520867891979e-06, "loss": 0.8899, "step": 6724 }, { "epoch": 0.4015884390302162, "grad_norm": 2.284059524536133, "learning_rate": 6.649857341914937e-06, "loss": 0.8827, "step": 6725 }, { "epoch": 0.4016481547832318, "grad_norm": 2.7905666828155518, "learning_rate": 6.649193815937894e-06, "loss": 0.8691, "step": 6726 }, { "epoch": 0.40170787053624746, "grad_norm": 3.0873520374298096, "learning_rate": 6.648530289960853e-06, "loss": 0.9047, "step": 6727 }, { "epoch": 0.4017675862892631, "grad_norm": 3.0198116302490234, "learning_rate": 6.64786676398381e-06, "loss": 0.8411, "step": 6728 }, { "epoch": 0.40182730204227873, "grad_norm": 3.0580005645751953, "learning_rate": 6.647203238006768e-06, "loss": 0.8622, "step": 6729 }, { "epoch": 0.4018870177952944, "grad_norm": 2.674694061279297, "learning_rate": 6.646539712029727e-06, "loss": 0.8827, "step": 6730 }, { "epoch": 0.40194673354831006, "grad_norm": 2.1451196670532227, "learning_rate": 6.645876186052685e-06, "loss": 0.8853, "step": 6731 }, { "epoch": 0.4020064493013257, "grad_norm": 2.1633224487304688, "learning_rate": 6.645212660075643e-06, "loss": 0.9297, "step": 6732 }, { "epoch": 0.40206616505434134, "grad_norm": 2.0935590267181396, "learning_rate": 6.6445491340986e-06, "loss": 0.9006, "step": 6733 }, { "epoch": 0.402125880807357, "grad_norm": 1.7139019966125488, "learning_rate": 6.643885608121559e-06, "loss": 0.856, "step": 6734 }, { "epoch": 0.4021855965603726, "grad_norm": 2.011904001235962, "learning_rate": 6.643222082144517e-06, "loss": 0.8738, "step": 6735 }, { "epoch": 0.4022453123133883, "grad_norm": 1.895944356918335, "learning_rate": 6.642558556167474e-06, "loss": 0.9013, "step": 6736 }, { "epoch": 0.4023050280664039, "grad_norm": 4.49920654296875, "learning_rate": 6.641895030190433e-06, "loss": 0.8625, "step": 6737 }, { "epoch": 0.40236474381941956, "grad_norm": 2.385129690170288, "learning_rate": 6.6412315042133904e-06, "loss": 0.9013, "step": 6738 }, { "epoch": 0.4024244595724352, "grad_norm": 1.8757305145263672, "learning_rate": 6.6405679782363485e-06, "loss": 0.898, "step": 6739 }, { "epoch": 0.40248417532545083, "grad_norm": 2.134878158569336, "learning_rate": 6.639904452259307e-06, "loss": 0.8622, "step": 6740 }, { "epoch": 0.4025438910784665, "grad_norm": 2.8613102436065674, "learning_rate": 6.639240926282265e-06, "loss": 0.8632, "step": 6741 }, { "epoch": 0.40260360683148216, "grad_norm": 2.44439697265625, "learning_rate": 6.638577400305223e-06, "loss": 0.8595, "step": 6742 }, { "epoch": 0.4026633225844978, "grad_norm": 2.4149134159088135, "learning_rate": 6.63791387432818e-06, "loss": 0.8773, "step": 6743 }, { "epoch": 0.40272303833751344, "grad_norm": 2.801413059234619, "learning_rate": 6.637250348351138e-06, "loss": 0.8636, "step": 6744 }, { "epoch": 0.4027827540905291, "grad_norm": 6.736935615539551, "learning_rate": 6.636586822374097e-06, "loss": 0.8692, "step": 6745 }, { "epoch": 0.4028424698435447, "grad_norm": 2.632645606994629, "learning_rate": 6.635923296397054e-06, "loss": 0.8769, "step": 6746 }, { "epoch": 0.4029021855965604, "grad_norm": 2.3213937282562256, "learning_rate": 6.6352597704200125e-06, "loss": 0.9001, "step": 6747 }, { "epoch": 0.402961901349576, "grad_norm": 2.2790629863739014, "learning_rate": 6.63459624444297e-06, "loss": 0.8738, "step": 6748 }, { "epoch": 0.40302161710259166, "grad_norm": 2.355470895767212, "learning_rate": 6.633932718465929e-06, "loss": 0.868, "step": 6749 }, { "epoch": 0.4030813328556073, "grad_norm": 2.283398151397705, "learning_rate": 6.633269192488887e-06, "loss": 0.8773, "step": 6750 }, { "epoch": 0.40314104860862293, "grad_norm": 2.493265151977539, "learning_rate": 6.632605666511844e-06, "loss": 0.887, "step": 6751 }, { "epoch": 0.4032007643616386, "grad_norm": 1.6359050273895264, "learning_rate": 6.631942140534803e-06, "loss": 0.8911, "step": 6752 }, { "epoch": 0.40326048011465426, "grad_norm": 2.266939163208008, "learning_rate": 6.63127861455776e-06, "loss": 0.8482, "step": 6753 }, { "epoch": 0.4033201958676699, "grad_norm": 2.2784509658813477, "learning_rate": 6.630615088580718e-06, "loss": 0.8462, "step": 6754 }, { "epoch": 0.40337991162068554, "grad_norm": 3.0336902141571045, "learning_rate": 6.629951562603677e-06, "loss": 0.8927, "step": 6755 }, { "epoch": 0.4034396273737012, "grad_norm": 1.9846878051757812, "learning_rate": 6.6292880366266346e-06, "loss": 0.8855, "step": 6756 }, { "epoch": 0.4034993431267168, "grad_norm": 2.356369733810425, "learning_rate": 6.628624510649593e-06, "loss": 0.8711, "step": 6757 }, { "epoch": 0.4035590588797325, "grad_norm": 1.6687043905258179, "learning_rate": 6.62796098467255e-06, "loss": 0.8431, "step": 6758 }, { "epoch": 0.40361877463274815, "grad_norm": 2.846790075302124, "learning_rate": 6.627297458695509e-06, "loss": 0.8846, "step": 6759 }, { "epoch": 0.40367849038576376, "grad_norm": 2.972482919692993, "learning_rate": 6.626633932718467e-06, "loss": 0.8545, "step": 6760 }, { "epoch": 0.4037382061387794, "grad_norm": 1.8955292701721191, "learning_rate": 6.625970406741424e-06, "loss": 0.8724, "step": 6761 }, { "epoch": 0.40379792189179503, "grad_norm": 2.1649248600006104, "learning_rate": 6.625306880764383e-06, "loss": 0.8711, "step": 6762 }, { "epoch": 0.4038576376448107, "grad_norm": 2.1751859188079834, "learning_rate": 6.62464335478734e-06, "loss": 0.8988, "step": 6763 }, { "epoch": 0.40391735339782636, "grad_norm": 3.0723748207092285, "learning_rate": 6.6239798288102985e-06, "loss": 0.8657, "step": 6764 }, { "epoch": 0.40397706915084197, "grad_norm": 2.366929292678833, "learning_rate": 6.623316302833257e-06, "loss": 0.8751, "step": 6765 }, { "epoch": 0.40403678490385764, "grad_norm": 3.0049216747283936, "learning_rate": 6.622652776856215e-06, "loss": 0.8685, "step": 6766 }, { "epoch": 0.4040965006568733, "grad_norm": 2.6882505416870117, "learning_rate": 6.621989250879173e-06, "loss": 0.8881, "step": 6767 }, { "epoch": 0.4041562164098889, "grad_norm": 2.7170751094818115, "learning_rate": 6.62132572490213e-06, "loss": 0.8842, "step": 6768 }, { "epoch": 0.4042159321629046, "grad_norm": 2.548091411590576, "learning_rate": 6.620662198925088e-06, "loss": 0.864, "step": 6769 }, { "epoch": 0.40427564791592024, "grad_norm": 2.5660760402679443, "learning_rate": 6.619998672948047e-06, "loss": 0.8384, "step": 6770 }, { "epoch": 0.40433536366893585, "grad_norm": 3.5907511711120605, "learning_rate": 6.619335146971004e-06, "loss": 0.8503, "step": 6771 }, { "epoch": 0.4043950794219515, "grad_norm": 2.0157856941223145, "learning_rate": 6.6186716209939625e-06, "loss": 0.8498, "step": 6772 }, { "epoch": 0.40445479517496713, "grad_norm": 2.089533567428589, "learning_rate": 6.61800809501692e-06, "loss": 0.8648, "step": 6773 }, { "epoch": 0.4045145109279828, "grad_norm": 2.5708563327789307, "learning_rate": 6.617344569039879e-06, "loss": 0.9249, "step": 6774 }, { "epoch": 0.40457422668099846, "grad_norm": 3.6256346702575684, "learning_rate": 6.616681043062837e-06, "loss": 0.8933, "step": 6775 }, { "epoch": 0.40463394243401407, "grad_norm": 3.0704081058502197, "learning_rate": 6.616017517085794e-06, "loss": 0.8543, "step": 6776 }, { "epoch": 0.40469365818702974, "grad_norm": 2.070389986038208, "learning_rate": 6.615353991108753e-06, "loss": 0.883, "step": 6777 }, { "epoch": 0.4047533739400454, "grad_norm": 2.1359500885009766, "learning_rate": 6.61469046513171e-06, "loss": 0.8958, "step": 6778 }, { "epoch": 0.404813089693061, "grad_norm": 3.121241331100464, "learning_rate": 6.614026939154668e-06, "loss": 0.8676, "step": 6779 }, { "epoch": 0.4048728054460767, "grad_norm": 2.0108296871185303, "learning_rate": 6.613363413177627e-06, "loss": 0.8623, "step": 6780 }, { "epoch": 0.40493252119909234, "grad_norm": 4.10522985458374, "learning_rate": 6.6126998872005845e-06, "loss": 0.8756, "step": 6781 }, { "epoch": 0.40499223695210795, "grad_norm": 2.2052314281463623, "learning_rate": 6.612036361223543e-06, "loss": 0.8537, "step": 6782 }, { "epoch": 0.4050519527051236, "grad_norm": 2.7314248085021973, "learning_rate": 6.6113728352465e-06, "loss": 0.8869, "step": 6783 }, { "epoch": 0.4051116684581393, "grad_norm": 3.7478840351104736, "learning_rate": 6.610709309269459e-06, "loss": 0.8626, "step": 6784 }, { "epoch": 0.4051713842111549, "grad_norm": 2.119969606399536, "learning_rate": 6.610045783292417e-06, "loss": 0.8821, "step": 6785 }, { "epoch": 0.40523109996417056, "grad_norm": 3.1420202255249023, "learning_rate": 6.609382257315374e-06, "loss": 0.8879, "step": 6786 }, { "epoch": 0.40529081571718617, "grad_norm": 2.697938919067383, "learning_rate": 6.608718731338333e-06, "loss": 0.8477, "step": 6787 }, { "epoch": 0.40535053147020184, "grad_norm": 1.689928650856018, "learning_rate": 6.60805520536129e-06, "loss": 0.8813, "step": 6788 }, { "epoch": 0.4054102472232175, "grad_norm": 2.5734829902648926, "learning_rate": 6.6073916793842485e-06, "loss": 0.8698, "step": 6789 }, { "epoch": 0.4054699629762331, "grad_norm": 2.3611690998077393, "learning_rate": 6.606728153407207e-06, "loss": 0.8901, "step": 6790 }, { "epoch": 0.4055296787292488, "grad_norm": 5.471388816833496, "learning_rate": 6.606064627430165e-06, "loss": 0.8757, "step": 6791 }, { "epoch": 0.40558939448226444, "grad_norm": 1.753731369972229, "learning_rate": 6.605401101453123e-06, "loss": 0.8397, "step": 6792 }, { "epoch": 0.40564911023528005, "grad_norm": 2.324831008911133, "learning_rate": 6.60473757547608e-06, "loss": 0.879, "step": 6793 }, { "epoch": 0.4057088259882957, "grad_norm": 2.8018128871917725, "learning_rate": 6.604074049499038e-06, "loss": 0.8722, "step": 6794 }, { "epoch": 0.4057685417413114, "grad_norm": 2.8362371921539307, "learning_rate": 6.603410523521997e-06, "loss": 0.8851, "step": 6795 }, { "epoch": 0.405828257494327, "grad_norm": 2.811875820159912, "learning_rate": 6.602746997544954e-06, "loss": 0.876, "step": 6796 }, { "epoch": 0.40588797324734266, "grad_norm": 1.893989086151123, "learning_rate": 6.6020834715679124e-06, "loss": 0.8596, "step": 6797 }, { "epoch": 0.40594768900035827, "grad_norm": 2.495771646499634, "learning_rate": 6.60141994559087e-06, "loss": 0.8631, "step": 6798 }, { "epoch": 0.40600740475337393, "grad_norm": 2.1007730960845947, "learning_rate": 6.600756419613829e-06, "loss": 0.8658, "step": 6799 }, { "epoch": 0.4060671205063896, "grad_norm": 2.434086799621582, "learning_rate": 6.600092893636787e-06, "loss": 0.8693, "step": 6800 }, { "epoch": 0.4060671205063896, "eval_text_loss": 0.9261260032653809, "eval_text_runtime": 15.1866, "eval_text_samples_per_second": 263.391, "eval_text_steps_per_second": 0.527, "step": 6800 }, { "epoch": 0.4060671205063896, "eval_image_loss": 0.6420998573303223, "eval_image_runtime": 5.0146, "eval_image_samples_per_second": 797.67, "eval_image_steps_per_second": 1.595, "step": 6800 }, { "epoch": 0.4060671205063896, "eval_video_loss": 1.0936486721038818, "eval_video_runtime": 76.3584, "eval_video_samples_per_second": 52.385, "eval_video_steps_per_second": 0.105, "step": 6800 }, { "epoch": 0.4061268362594052, "grad_norm": 4.098711013793945, "learning_rate": 6.599429367659744e-06, "loss": 0.894, "step": 6801 }, { "epoch": 0.4061865520124209, "grad_norm": 3.5744924545288086, "learning_rate": 6.598765841682703e-06, "loss": 0.8962, "step": 6802 }, { "epoch": 0.40624626776543654, "grad_norm": 2.1665337085723877, "learning_rate": 6.59810231570566e-06, "loss": 0.895, "step": 6803 }, { "epoch": 0.40630598351845215, "grad_norm": 3.131352663040161, "learning_rate": 6.597438789728618e-06, "loss": 0.9283, "step": 6804 }, { "epoch": 0.4063656992714678, "grad_norm": 2.3037209510803223, "learning_rate": 6.596775263751577e-06, "loss": 0.8619, "step": 6805 }, { "epoch": 0.4064254150244835, "grad_norm": 1.6570874452590942, "learning_rate": 6.5961117377745345e-06, "loss": 0.863, "step": 6806 }, { "epoch": 0.4064851307774991, "grad_norm": 3.7047340869903564, "learning_rate": 6.595448211797493e-06, "loss": 0.8939, "step": 6807 }, { "epoch": 0.40654484653051476, "grad_norm": 9.294571876525879, "learning_rate": 6.59478468582045e-06, "loss": 0.8653, "step": 6808 }, { "epoch": 0.40660456228353037, "grad_norm": 1.906225323677063, "learning_rate": 6.594121159843409e-06, "loss": 0.8717, "step": 6809 }, { "epoch": 0.40666427803654603, "grad_norm": 2.140892744064331, "learning_rate": 6.593457633866367e-06, "loss": 0.8867, "step": 6810 }, { "epoch": 0.4067239937895617, "grad_norm": 2.006138324737549, "learning_rate": 6.592794107889324e-06, "loss": 0.8768, "step": 6811 }, { "epoch": 0.4067837095425773, "grad_norm": 2.6168017387390137, "learning_rate": 6.592130581912283e-06, "loss": 0.9102, "step": 6812 }, { "epoch": 0.406843425295593, "grad_norm": 1.9684064388275146, "learning_rate": 6.59146705593524e-06, "loss": 0.8761, "step": 6813 }, { "epoch": 0.40690314104860864, "grad_norm": 3.27136492729187, "learning_rate": 6.5908035299581985e-06, "loss": 0.8464, "step": 6814 }, { "epoch": 0.40696285680162425, "grad_norm": 2.0855236053466797, "learning_rate": 6.5901400039811566e-06, "loss": 0.9209, "step": 6815 }, { "epoch": 0.4070225725546399, "grad_norm": 3.0234932899475098, "learning_rate": 6.589476478004115e-06, "loss": 0.8435, "step": 6816 }, { "epoch": 0.4070822883076556, "grad_norm": 1.7284419536590576, "learning_rate": 6.588812952027073e-06, "loss": 0.8299, "step": 6817 }, { "epoch": 0.4071420040606712, "grad_norm": 2.6360433101654053, "learning_rate": 6.58814942605003e-06, "loss": 0.8391, "step": 6818 }, { "epoch": 0.40720171981368686, "grad_norm": 2.9118964672088623, "learning_rate": 6.587485900072988e-06, "loss": 0.8873, "step": 6819 }, { "epoch": 0.4072614355667025, "grad_norm": 2.947641372680664, "learning_rate": 6.586822374095947e-06, "loss": 0.8412, "step": 6820 }, { "epoch": 0.40732115131971813, "grad_norm": 1.6818411350250244, "learning_rate": 6.586158848118904e-06, "loss": 0.8586, "step": 6821 }, { "epoch": 0.4073808670727338, "grad_norm": 3.582401990890503, "learning_rate": 6.585495322141862e-06, "loss": 0.8946, "step": 6822 }, { "epoch": 0.4074405828257494, "grad_norm": 1.9441875219345093, "learning_rate": 6.58483179616482e-06, "loss": 0.8659, "step": 6823 }, { "epoch": 0.4075002985787651, "grad_norm": 7.124149799346924, "learning_rate": 6.584168270187779e-06, "loss": 0.8749, "step": 6824 }, { "epoch": 0.40756001433178074, "grad_norm": 1.9971585273742676, "learning_rate": 6.583504744210737e-06, "loss": 0.8907, "step": 6825 }, { "epoch": 0.40761973008479635, "grad_norm": 1.8781160116195679, "learning_rate": 6.582841218233694e-06, "loss": 0.8647, "step": 6826 }, { "epoch": 0.407679445837812, "grad_norm": 2.39922833442688, "learning_rate": 6.582177692256653e-06, "loss": 0.8974, "step": 6827 }, { "epoch": 0.4077391615908277, "grad_norm": 2.055643320083618, "learning_rate": 6.58151416627961e-06, "loss": 0.8892, "step": 6828 }, { "epoch": 0.4077988773438433, "grad_norm": 1.7537752389907837, "learning_rate": 6.580850640302568e-06, "loss": 0.8681, "step": 6829 }, { "epoch": 0.40785859309685896, "grad_norm": 1.8089827299118042, "learning_rate": 6.580187114325527e-06, "loss": 0.8824, "step": 6830 }, { "epoch": 0.4079183088498746, "grad_norm": 1.776713252067566, "learning_rate": 6.5795235883484845e-06, "loss": 0.8717, "step": 6831 }, { "epoch": 0.40797802460289023, "grad_norm": 2.646174430847168, "learning_rate": 6.5788600623714426e-06, "loss": 0.8941, "step": 6832 }, { "epoch": 0.4080377403559059, "grad_norm": 1.9125370979309082, "learning_rate": 6.5781965363944e-06, "loss": 0.8692, "step": 6833 }, { "epoch": 0.4080974561089215, "grad_norm": 2.3160369396209717, "learning_rate": 6.577533010417359e-06, "loss": 0.8784, "step": 6834 }, { "epoch": 0.4081571718619372, "grad_norm": 2.6942310333251953, "learning_rate": 6.576869484440317e-06, "loss": 0.8412, "step": 6835 }, { "epoch": 0.40821688761495284, "grad_norm": 3.2880547046661377, "learning_rate": 6.576205958463274e-06, "loss": 0.9046, "step": 6836 }, { "epoch": 0.40827660336796845, "grad_norm": 2.0305488109588623, "learning_rate": 6.575542432486233e-06, "loss": 0.8657, "step": 6837 }, { "epoch": 0.4083363191209841, "grad_norm": 1.8473562002182007, "learning_rate": 6.57487890650919e-06, "loss": 0.8657, "step": 6838 }, { "epoch": 0.4083960348739998, "grad_norm": 1.9892053604125977, "learning_rate": 6.5742153805321484e-06, "loss": 0.8904, "step": 6839 }, { "epoch": 0.4084557506270154, "grad_norm": 2.219964027404785, "learning_rate": 6.5735518545551065e-06, "loss": 0.8862, "step": 6840 }, { "epoch": 0.40851546638003106, "grad_norm": 2.3677291870117188, "learning_rate": 6.572888328578065e-06, "loss": 0.8964, "step": 6841 }, { "epoch": 0.4085751821330467, "grad_norm": 2.0958571434020996, "learning_rate": 6.572224802601023e-06, "loss": 0.9149, "step": 6842 }, { "epoch": 0.40863489788606233, "grad_norm": 2.9072489738464355, "learning_rate": 6.57156127662398e-06, "loss": 0.8792, "step": 6843 }, { "epoch": 0.408694613639078, "grad_norm": 2.2887473106384277, "learning_rate": 6.570897750646938e-06, "loss": 0.894, "step": 6844 }, { "epoch": 0.4087543293920936, "grad_norm": 3.9749367237091064, "learning_rate": 6.570234224669897e-06, "loss": 0.8508, "step": 6845 }, { "epoch": 0.4088140451451093, "grad_norm": 1.9668502807617188, "learning_rate": 6.569570698692854e-06, "loss": 0.8731, "step": 6846 }, { "epoch": 0.40887376089812494, "grad_norm": 2.0070347785949707, "learning_rate": 6.568907172715812e-06, "loss": 0.8718, "step": 6847 }, { "epoch": 0.40893347665114055, "grad_norm": 2.341963768005371, "learning_rate": 6.56824364673877e-06, "loss": 0.8609, "step": 6848 }, { "epoch": 0.4089931924041562, "grad_norm": 5.8655781745910645, "learning_rate": 6.567580120761729e-06, "loss": 0.9292, "step": 6849 }, { "epoch": 0.4090529081571719, "grad_norm": 1.709628701210022, "learning_rate": 6.566916594784687e-06, "loss": 0.8644, "step": 6850 }, { "epoch": 0.4091126239101875, "grad_norm": 2.712670087814331, "learning_rate": 6.566253068807644e-06, "loss": 0.8555, "step": 6851 }, { "epoch": 0.40917233966320316, "grad_norm": 1.747230887413025, "learning_rate": 6.565589542830603e-06, "loss": 0.8476, "step": 6852 }, { "epoch": 0.4092320554162188, "grad_norm": 2.2417798042297363, "learning_rate": 6.56492601685356e-06, "loss": 0.844, "step": 6853 }, { "epoch": 0.40929177116923443, "grad_norm": 2.4543726444244385, "learning_rate": 6.564262490876518e-06, "loss": 0.899, "step": 6854 }, { "epoch": 0.4093514869222501, "grad_norm": 2.187910318374634, "learning_rate": 6.563598964899477e-06, "loss": 0.8856, "step": 6855 }, { "epoch": 0.40941120267526576, "grad_norm": 2.2537593841552734, "learning_rate": 6.5629354389224344e-06, "loss": 0.8544, "step": 6856 }, { "epoch": 0.40947091842828137, "grad_norm": 2.2510316371917725, "learning_rate": 6.5622719129453925e-06, "loss": 0.8636, "step": 6857 }, { "epoch": 0.40953063418129704, "grad_norm": 1.9298218488693237, "learning_rate": 6.56160838696835e-06, "loss": 0.8815, "step": 6858 }, { "epoch": 0.40959034993431265, "grad_norm": 2.1773109436035156, "learning_rate": 6.560944860991309e-06, "loss": 0.8807, "step": 6859 }, { "epoch": 0.4096500656873283, "grad_norm": 2.5962414741516113, "learning_rate": 6.560281335014267e-06, "loss": 0.8896, "step": 6860 }, { "epoch": 0.409709781440344, "grad_norm": 2.049799919128418, "learning_rate": 6.559617809037224e-06, "loss": 0.8516, "step": 6861 }, { "epoch": 0.4097694971933596, "grad_norm": 3.0671236515045166, "learning_rate": 6.558954283060183e-06, "loss": 0.8802, "step": 6862 }, { "epoch": 0.40982921294637525, "grad_norm": 2.101440906524658, "learning_rate": 6.55829075708314e-06, "loss": 0.8608, "step": 6863 }, { "epoch": 0.4098889286993909, "grad_norm": 3.495427370071411, "learning_rate": 6.557627231106098e-06, "loss": 0.851, "step": 6864 }, { "epoch": 0.40994864445240653, "grad_norm": 2.5703237056732178, "learning_rate": 6.5569637051290565e-06, "loss": 0.8754, "step": 6865 }, { "epoch": 0.4100083602054222, "grad_norm": 2.5808794498443604, "learning_rate": 6.556300179152015e-06, "loss": 0.8548, "step": 6866 }, { "epoch": 0.41006807595843786, "grad_norm": 2.218860149383545, "learning_rate": 6.555636653174973e-06, "loss": 0.8634, "step": 6867 }, { "epoch": 0.41012779171145347, "grad_norm": 2.7839725017547607, "learning_rate": 6.55497312719793e-06, "loss": 0.8581, "step": 6868 }, { "epoch": 0.41018750746446914, "grad_norm": 2.605623483657837, "learning_rate": 6.554309601220888e-06, "loss": 0.9161, "step": 6869 }, { "epoch": 0.41024722321748475, "grad_norm": 2.4820425510406494, "learning_rate": 6.553646075243847e-06, "loss": 0.8844, "step": 6870 }, { "epoch": 0.4103069389705004, "grad_norm": 2.216411590576172, "learning_rate": 6.552982549266804e-06, "loss": 0.8724, "step": 6871 }, { "epoch": 0.4103666547235161, "grad_norm": 3.744504928588867, "learning_rate": 6.552319023289762e-06, "loss": 0.8682, "step": 6872 }, { "epoch": 0.4104263704765317, "grad_norm": 2.5535356998443604, "learning_rate": 6.55165549731272e-06, "loss": 0.8901, "step": 6873 }, { "epoch": 0.41048608622954735, "grad_norm": 2.3202733993530273, "learning_rate": 6.5509919713356786e-06, "loss": 0.8689, "step": 6874 }, { "epoch": 0.410545801982563, "grad_norm": 1.646767258644104, "learning_rate": 6.550328445358637e-06, "loss": 0.8888, "step": 6875 }, { "epoch": 0.41060551773557863, "grad_norm": 1.3642507791519165, "learning_rate": 6.549664919381594e-06, "loss": 0.8445, "step": 6876 }, { "epoch": 0.4106652334885943, "grad_norm": 2.58856201171875, "learning_rate": 6.549001393404553e-06, "loss": 0.8722, "step": 6877 }, { "epoch": 0.41072494924160996, "grad_norm": 3.107757806777954, "learning_rate": 6.54833786742751e-06, "loss": 0.8888, "step": 6878 }, { "epoch": 0.41078466499462557, "grad_norm": 4.210586071014404, "learning_rate": 6.547674341450468e-06, "loss": 0.8758, "step": 6879 }, { "epoch": 0.41084438074764124, "grad_norm": 2.760160446166992, "learning_rate": 6.547010815473427e-06, "loss": 0.8825, "step": 6880 }, { "epoch": 0.41090409650065685, "grad_norm": 1.6680071353912354, "learning_rate": 6.546347289496384e-06, "loss": 0.842, "step": 6881 }, { "epoch": 0.4109638122536725, "grad_norm": 2.7935609817504883, "learning_rate": 6.5456837635193425e-06, "loss": 0.8794, "step": 6882 }, { "epoch": 0.4110235280066882, "grad_norm": 2.528258800506592, "learning_rate": 6.5450202375423e-06, "loss": 0.866, "step": 6883 }, { "epoch": 0.4110832437597038, "grad_norm": 2.163841485977173, "learning_rate": 6.544356711565259e-06, "loss": 0.8505, "step": 6884 }, { "epoch": 0.41114295951271945, "grad_norm": 2.0696239471435547, "learning_rate": 6.543693185588217e-06, "loss": 0.865, "step": 6885 }, { "epoch": 0.4112026752657351, "grad_norm": 2.6685950756073, "learning_rate": 6.543029659611174e-06, "loss": 0.8795, "step": 6886 }, { "epoch": 0.41126239101875073, "grad_norm": 2.0339484214782715, "learning_rate": 6.542366133634133e-06, "loss": 0.8721, "step": 6887 }, { "epoch": 0.4113221067717664, "grad_norm": 1.6041901111602783, "learning_rate": 6.54170260765709e-06, "loss": 0.8655, "step": 6888 }, { "epoch": 0.41138182252478206, "grad_norm": 1.6102653741836548, "learning_rate": 6.541039081680048e-06, "loss": 0.8769, "step": 6889 }, { "epoch": 0.41144153827779767, "grad_norm": 2.562136650085449, "learning_rate": 6.5403755557030065e-06, "loss": 0.866, "step": 6890 }, { "epoch": 0.41150125403081333, "grad_norm": 1.867933988571167, "learning_rate": 6.5397120297259646e-06, "loss": 0.862, "step": 6891 }, { "epoch": 0.411560969783829, "grad_norm": 2.2460076808929443, "learning_rate": 6.539048503748923e-06, "loss": 0.8769, "step": 6892 }, { "epoch": 0.4116206855368446, "grad_norm": 2.046699047088623, "learning_rate": 6.53838497777188e-06, "loss": 0.8891, "step": 6893 }, { "epoch": 0.4116804012898603, "grad_norm": 2.0928075313568115, "learning_rate": 6.537721451794838e-06, "loss": 0.8836, "step": 6894 }, { "epoch": 0.4117401170428759, "grad_norm": 2.040579080581665, "learning_rate": 6.537057925817796e-06, "loss": 0.8786, "step": 6895 }, { "epoch": 0.41179983279589155, "grad_norm": 1.8795067071914673, "learning_rate": 6.536394399840754e-06, "loss": 0.8391, "step": 6896 }, { "epoch": 0.4118595485489072, "grad_norm": 2.3705172538757324, "learning_rate": 6.535730873863712e-06, "loss": 0.887, "step": 6897 }, { "epoch": 0.4119192643019228, "grad_norm": 2.0455777645111084, "learning_rate": 6.53506734788667e-06, "loss": 0.8515, "step": 6898 }, { "epoch": 0.4119789800549385, "grad_norm": 1.642256736755371, "learning_rate": 6.5344038219096285e-06, "loss": 0.8896, "step": 6899 }, { "epoch": 0.41203869580795416, "grad_norm": 2.5021378993988037, "learning_rate": 6.533740295932586e-06, "loss": 0.8717, "step": 6900 }, { "epoch": 0.41209841156096977, "grad_norm": 2.072936773300171, "learning_rate": 6.533076769955544e-06, "loss": 0.8762, "step": 6901 }, { "epoch": 0.41215812731398543, "grad_norm": 2.1481659412384033, "learning_rate": 6.532413243978503e-06, "loss": 0.8703, "step": 6902 }, { "epoch": 0.4122178430670011, "grad_norm": 2.4616987705230713, "learning_rate": 6.53174971800146e-06, "loss": 0.8977, "step": 6903 }, { "epoch": 0.4122775588200167, "grad_norm": 2.527066469192505, "learning_rate": 6.531086192024418e-06, "loss": 0.8622, "step": 6904 }, { "epoch": 0.4123372745730324, "grad_norm": 1.7184101343154907, "learning_rate": 6.5304226660473754e-06, "loss": 0.8393, "step": 6905 }, { "epoch": 0.412396990326048, "grad_norm": 1.8477150201797485, "learning_rate": 6.529759140070334e-06, "loss": 0.8886, "step": 6906 }, { "epoch": 0.41245670607906365, "grad_norm": 2.040700912475586, "learning_rate": 6.5290956140932925e-06, "loss": 0.8574, "step": 6907 }, { "epoch": 0.4125164218320793, "grad_norm": 2.2599434852600098, "learning_rate": 6.52843208811625e-06, "loss": 0.8785, "step": 6908 }, { "epoch": 0.4125761375850949, "grad_norm": 4.502411365509033, "learning_rate": 6.527768562139209e-06, "loss": 0.8732, "step": 6909 }, { "epoch": 0.4126358533381106, "grad_norm": 2.1439120769500732, "learning_rate": 6.527105036162166e-06, "loss": 0.8891, "step": 6910 }, { "epoch": 0.41269556909112626, "grad_norm": 1.925967812538147, "learning_rate": 6.526441510185124e-06, "loss": 0.8734, "step": 6911 }, { "epoch": 0.41275528484414187, "grad_norm": 2.43715500831604, "learning_rate": 6.525777984208083e-06, "loss": 0.8831, "step": 6912 }, { "epoch": 0.41281500059715753, "grad_norm": 2.291609048843384, "learning_rate": 6.52511445823104e-06, "loss": 0.8774, "step": 6913 }, { "epoch": 0.4128747163501732, "grad_norm": 1.6071386337280273, "learning_rate": 6.524450932253998e-06, "loss": 0.8527, "step": 6914 }, { "epoch": 0.4129344321031888, "grad_norm": 1.6253302097320557, "learning_rate": 6.523787406276956e-06, "loss": 0.9006, "step": 6915 }, { "epoch": 0.4129941478562045, "grad_norm": 3.6817970275878906, "learning_rate": 6.5231238802999145e-06, "loss": 0.89, "step": 6916 }, { "epoch": 0.41305386360922014, "grad_norm": 2.547032594680786, "learning_rate": 6.522460354322873e-06, "loss": 0.906, "step": 6917 }, { "epoch": 0.41311357936223575, "grad_norm": 2.011115074157715, "learning_rate": 6.52179682834583e-06, "loss": 0.8705, "step": 6918 }, { "epoch": 0.4131732951152514, "grad_norm": 2.5216543674468994, "learning_rate": 6.521133302368788e-06, "loss": 0.8953, "step": 6919 }, { "epoch": 0.413233010868267, "grad_norm": 2.572173595428467, "learning_rate": 6.520469776391746e-06, "loss": 0.86, "step": 6920 }, { "epoch": 0.4132927266212827, "grad_norm": 1.8842333555221558, "learning_rate": 6.519806250414704e-06, "loss": 0.8768, "step": 6921 }, { "epoch": 0.41335244237429836, "grad_norm": 2.0570356845855713, "learning_rate": 6.519142724437662e-06, "loss": 0.8936, "step": 6922 }, { "epoch": 0.41341215812731397, "grad_norm": 1.9431055784225464, "learning_rate": 6.5184791984606196e-06, "loss": 0.8548, "step": 6923 }, { "epoch": 0.41347187388032963, "grad_norm": 2.2902071475982666, "learning_rate": 6.5178156724835785e-06, "loss": 0.8946, "step": 6924 }, { "epoch": 0.4135315896333453, "grad_norm": 2.023188829421997, "learning_rate": 6.517152146506536e-06, "loss": 0.8711, "step": 6925 }, { "epoch": 0.4135913053863609, "grad_norm": 2.8344717025756836, "learning_rate": 6.516488620529494e-06, "loss": 0.8519, "step": 6926 }, { "epoch": 0.4136510211393766, "grad_norm": 2.41436505317688, "learning_rate": 6.515825094552453e-06, "loss": 0.8881, "step": 6927 }, { "epoch": 0.41371073689239224, "grad_norm": 2.148587226867676, "learning_rate": 6.51516156857541e-06, "loss": 0.8996, "step": 6928 }, { "epoch": 0.41377045264540785, "grad_norm": 3.0610148906707764, "learning_rate": 6.514498042598368e-06, "loss": 0.8647, "step": 6929 }, { "epoch": 0.4138301683984235, "grad_norm": 1.9720486402511597, "learning_rate": 6.513834516621325e-06, "loss": 0.8995, "step": 6930 }, { "epoch": 0.4138898841514391, "grad_norm": 3.0960075855255127, "learning_rate": 6.513170990644284e-06, "loss": 0.8789, "step": 6931 }, { "epoch": 0.4139495999044548, "grad_norm": 2.9669463634490967, "learning_rate": 6.5125074646672425e-06, "loss": 0.8922, "step": 6932 }, { "epoch": 0.41400931565747046, "grad_norm": 2.052691698074341, "learning_rate": 6.5118439386902e-06, "loss": 0.8988, "step": 6933 }, { "epoch": 0.41406903141048607, "grad_norm": 3.1136364936828613, "learning_rate": 6.511180412713159e-06, "loss": 0.9058, "step": 6934 }, { "epoch": 0.41412874716350173, "grad_norm": 2.447448492050171, "learning_rate": 6.510516886736116e-06, "loss": 0.8438, "step": 6935 }, { "epoch": 0.4141884629165174, "grad_norm": 1.8169159889221191, "learning_rate": 6.509853360759074e-06, "loss": 0.8502, "step": 6936 }, { "epoch": 0.414248178669533, "grad_norm": 2.1105785369873047, "learning_rate": 6.509189834782033e-06, "loss": 0.8553, "step": 6937 }, { "epoch": 0.4143078944225487, "grad_norm": 2.508070230484009, "learning_rate": 6.50852630880499e-06, "loss": 0.8792, "step": 6938 }, { "epoch": 0.41436761017556434, "grad_norm": 1.8216432332992554, "learning_rate": 6.507862782827948e-06, "loss": 0.8894, "step": 6939 }, { "epoch": 0.41442732592857995, "grad_norm": 1.8541995286941528, "learning_rate": 6.5071992568509056e-06, "loss": 0.848, "step": 6940 }, { "epoch": 0.4144870416815956, "grad_norm": 1.868198037147522, "learning_rate": 6.5065357308738645e-06, "loss": 0.8431, "step": 6941 }, { "epoch": 0.4145467574346112, "grad_norm": 2.2117996215820312, "learning_rate": 6.505872204896823e-06, "loss": 0.8569, "step": 6942 }, { "epoch": 0.4146064731876269, "grad_norm": 2.16149640083313, "learning_rate": 6.50520867891978e-06, "loss": 0.856, "step": 6943 }, { "epoch": 0.41466618894064255, "grad_norm": 1.7643872499465942, "learning_rate": 6.504545152942738e-06, "loss": 0.861, "step": 6944 }, { "epoch": 0.41472590469365817, "grad_norm": 2.061199188232422, "learning_rate": 6.503881626965696e-06, "loss": 0.8467, "step": 6945 }, { "epoch": 0.41478562044667383, "grad_norm": 1.8110820055007935, "learning_rate": 6.503218100988654e-06, "loss": 0.8503, "step": 6946 }, { "epoch": 0.4148453361996895, "grad_norm": 1.9222750663757324, "learning_rate": 6.502554575011612e-06, "loss": 0.8425, "step": 6947 }, { "epoch": 0.4149050519527051, "grad_norm": 1.8872098922729492, "learning_rate": 6.5018910490345695e-06, "loss": 0.8842, "step": 6948 }, { "epoch": 0.41496476770572077, "grad_norm": 2.310821294784546, "learning_rate": 6.5012275230575285e-06, "loss": 0.8896, "step": 6949 }, { "epoch": 0.41502448345873644, "grad_norm": 2.876157522201538, "learning_rate": 6.500563997080486e-06, "loss": 0.898, "step": 6950 }, { "epoch": 0.41508419921175205, "grad_norm": 2.26702880859375, "learning_rate": 6.499900471103444e-06, "loss": 0.8618, "step": 6951 }, { "epoch": 0.4151439149647677, "grad_norm": 2.4476072788238525, "learning_rate": 6.499236945126403e-06, "loss": 0.9259, "step": 6952 }, { "epoch": 0.4152036307177834, "grad_norm": 2.4429171085357666, "learning_rate": 6.49857341914936e-06, "loss": 0.878, "step": 6953 }, { "epoch": 0.415263346470799, "grad_norm": 1.9417246580123901, "learning_rate": 6.497909893172318e-06, "loss": 0.8652, "step": 6954 }, { "epoch": 0.41532306222381465, "grad_norm": 2.641879081726074, "learning_rate": 6.497246367195275e-06, "loss": 0.8919, "step": 6955 }, { "epoch": 0.41538277797683026, "grad_norm": 2.2716269493103027, "learning_rate": 6.496582841218234e-06, "loss": 0.8748, "step": 6956 }, { "epoch": 0.41544249372984593, "grad_norm": 4.0073041915893555, "learning_rate": 6.4959193152411924e-06, "loss": 0.9088, "step": 6957 }, { "epoch": 0.4155022094828616, "grad_norm": 1.9117120504379272, "learning_rate": 6.49525578926415e-06, "loss": 0.8697, "step": 6958 }, { "epoch": 0.4155619252358772, "grad_norm": 2.4091949462890625, "learning_rate": 6.494592263287109e-06, "loss": 0.9051, "step": 6959 }, { "epoch": 0.41562164098889287, "grad_norm": 2.677388906478882, "learning_rate": 6.493928737310066e-06, "loss": 0.9008, "step": 6960 }, { "epoch": 0.41568135674190854, "grad_norm": 2.350295066833496, "learning_rate": 6.493265211333024e-06, "loss": 0.8666, "step": 6961 }, { "epoch": 0.41574107249492415, "grad_norm": 2.9076130390167236, "learning_rate": 6.492601685355983e-06, "loss": 0.9043, "step": 6962 }, { "epoch": 0.4158007882479398, "grad_norm": 1.828706979751587, "learning_rate": 6.49193815937894e-06, "loss": 0.9001, "step": 6963 }, { "epoch": 0.4158605040009555, "grad_norm": 1.7332489490509033, "learning_rate": 6.491274633401898e-06, "loss": 0.8787, "step": 6964 }, { "epoch": 0.4159202197539711, "grad_norm": 2.0613105297088623, "learning_rate": 6.4906111074248555e-06, "loss": 0.8627, "step": 6965 }, { "epoch": 0.41597993550698675, "grad_norm": 1.8987805843353271, "learning_rate": 6.4899475814478145e-06, "loss": 0.858, "step": 6966 }, { "epoch": 0.41603965126000236, "grad_norm": 2.499401330947876, "learning_rate": 6.489284055470773e-06, "loss": 0.8846, "step": 6967 }, { "epoch": 0.41609936701301803, "grad_norm": 2.98185396194458, "learning_rate": 6.48862052949373e-06, "loss": 0.8816, "step": 6968 }, { "epoch": 0.4161590827660337, "grad_norm": 3.1476073265075684, "learning_rate": 6.487957003516688e-06, "loss": 0.8518, "step": 6969 }, { "epoch": 0.4162187985190493, "grad_norm": 2.794877290725708, "learning_rate": 6.487293477539646e-06, "loss": 0.8747, "step": 6970 }, { "epoch": 0.41627851427206497, "grad_norm": 1.8088222742080688, "learning_rate": 6.486629951562604e-06, "loss": 0.87, "step": 6971 }, { "epoch": 0.41633823002508064, "grad_norm": 1.822749137878418, "learning_rate": 6.485966425585562e-06, "loss": 0.885, "step": 6972 }, { "epoch": 0.41639794577809625, "grad_norm": 1.876495361328125, "learning_rate": 6.4853028996085195e-06, "loss": 0.8336, "step": 6973 }, { "epoch": 0.4164576615311119, "grad_norm": 3.1788337230682373, "learning_rate": 6.4846393736314784e-06, "loss": 0.8554, "step": 6974 }, { "epoch": 0.4165173772841276, "grad_norm": 2.4610936641693115, "learning_rate": 6.483975847654436e-06, "loss": 0.8806, "step": 6975 }, { "epoch": 0.4165770930371432, "grad_norm": 2.201826810836792, "learning_rate": 6.483312321677394e-06, "loss": 0.8525, "step": 6976 }, { "epoch": 0.41663680879015885, "grad_norm": 2.288947820663452, "learning_rate": 6.482648795700353e-06, "loss": 0.8692, "step": 6977 }, { "epoch": 0.41669652454317446, "grad_norm": 2.285283327102661, "learning_rate": 6.48198526972331e-06, "loss": 0.8581, "step": 6978 }, { "epoch": 0.41675624029619013, "grad_norm": 1.7152364253997803, "learning_rate": 6.481321743746268e-06, "loss": 0.8986, "step": 6979 }, { "epoch": 0.4168159560492058, "grad_norm": 2.6256768703460693, "learning_rate": 6.480658217769225e-06, "loss": 0.9035, "step": 6980 }, { "epoch": 0.4168756718022214, "grad_norm": 2.7961418628692627, "learning_rate": 6.479994691792184e-06, "loss": 0.8741, "step": 6981 }, { "epoch": 0.41693538755523707, "grad_norm": 2.118438243865967, "learning_rate": 6.479331165815142e-06, "loss": 0.9094, "step": 6982 }, { "epoch": 0.41699510330825273, "grad_norm": 4.665754795074463, "learning_rate": 6.4786676398381e-06, "loss": 0.8374, "step": 6983 }, { "epoch": 0.41705481906126834, "grad_norm": 1.9710320234298706, "learning_rate": 6.478004113861059e-06, "loss": 0.8559, "step": 6984 }, { "epoch": 0.417114534814284, "grad_norm": 2.015071392059326, "learning_rate": 6.477340587884016e-06, "loss": 0.8519, "step": 6985 }, { "epoch": 0.4171742505672997, "grad_norm": 2.744572639465332, "learning_rate": 6.476677061906974e-06, "loss": 0.8838, "step": 6986 }, { "epoch": 0.4172339663203153, "grad_norm": 2.209467887878418, "learning_rate": 6.476013535929933e-06, "loss": 0.8816, "step": 6987 }, { "epoch": 0.41729368207333095, "grad_norm": 2.6514525413513184, "learning_rate": 6.47535000995289e-06, "loss": 0.8371, "step": 6988 }, { "epoch": 0.4173533978263466, "grad_norm": 2.5168039798736572, "learning_rate": 6.474686483975848e-06, "loss": 0.8944, "step": 6989 }, { "epoch": 0.4174131135793622, "grad_norm": 2.3979785442352295, "learning_rate": 6.4740229579988055e-06, "loss": 0.8634, "step": 6990 }, { "epoch": 0.4174728293323779, "grad_norm": 2.1512155532836914, "learning_rate": 6.4733594320217645e-06, "loss": 0.8899, "step": 6991 }, { "epoch": 0.4175325450853935, "grad_norm": 1.871096134185791, "learning_rate": 6.4726959060447226e-06, "loss": 0.8965, "step": 6992 }, { "epoch": 0.41759226083840917, "grad_norm": 1.7051745653152466, "learning_rate": 6.47203238006768e-06, "loss": 0.8562, "step": 6993 }, { "epoch": 0.41765197659142483, "grad_norm": 2.793504238128662, "learning_rate": 6.471368854090638e-06, "loss": 0.8481, "step": 6994 }, { "epoch": 0.41771169234444044, "grad_norm": 2.0421812534332275, "learning_rate": 6.470705328113596e-06, "loss": 0.897, "step": 6995 }, { "epoch": 0.4177714080974561, "grad_norm": 2.018131971359253, "learning_rate": 6.470041802136554e-06, "loss": 0.8907, "step": 6996 }, { "epoch": 0.4178311238504718, "grad_norm": 1.670957326889038, "learning_rate": 6.469378276159512e-06, "loss": 0.8764, "step": 6997 }, { "epoch": 0.4178908396034874, "grad_norm": 2.1855063438415527, "learning_rate": 6.4687147501824695e-06, "loss": 0.9026, "step": 6998 }, { "epoch": 0.41795055535650305, "grad_norm": 3.9527249336242676, "learning_rate": 6.468051224205428e-06, "loss": 0.8785, "step": 6999 }, { "epoch": 0.4180102711095187, "grad_norm": 1.8794724941253662, "learning_rate": 6.467387698228386e-06, "loss": 0.9276, "step": 7000 }, { "epoch": 0.4180102711095187, "eval_text_loss": 0.9245206713676453, "eval_text_runtime": 15.1523, "eval_text_samples_per_second": 263.986, "eval_text_steps_per_second": 0.528, "step": 7000 }, { "epoch": 0.4180102711095187, "eval_image_loss": 0.6411994695663452, "eval_image_runtime": 5.0206, "eval_image_samples_per_second": 796.72, "eval_image_steps_per_second": 1.593, "step": 7000 }, { "epoch": 0.4180102711095187, "eval_video_loss": 1.0919487476348877, "eval_video_runtime": 76.6055, "eval_video_samples_per_second": 52.216, "eval_video_steps_per_second": 0.104, "step": 7000 }, { "epoch": 0.4180699868625343, "grad_norm": 2.2751033306121826, "learning_rate": 6.466724172251344e-06, "loss": 0.8969, "step": 7001 }, { "epoch": 0.41812970261555, "grad_norm": 1.691577672958374, "learning_rate": 6.466060646274303e-06, "loss": 0.8815, "step": 7002 }, { "epoch": 0.4181894183685656, "grad_norm": 5.818009376525879, "learning_rate": 6.46539712029726e-06, "loss": 0.8777, "step": 7003 }, { "epoch": 0.41824913412158127, "grad_norm": 5.12659215927124, "learning_rate": 6.464733594320218e-06, "loss": 0.8805, "step": 7004 }, { "epoch": 0.41830884987459693, "grad_norm": 2.408926248550415, "learning_rate": 6.464070068343175e-06, "loss": 0.8731, "step": 7005 }, { "epoch": 0.41836856562761254, "grad_norm": 2.450157642364502, "learning_rate": 6.463406542366134e-06, "loss": 0.8901, "step": 7006 }, { "epoch": 0.4184282813806282, "grad_norm": 3.10829496383667, "learning_rate": 6.462743016389092e-06, "loss": 0.8669, "step": 7007 }, { "epoch": 0.4184879971336439, "grad_norm": 2.0332698822021484, "learning_rate": 6.46207949041205e-06, "loss": 0.8939, "step": 7008 }, { "epoch": 0.4185477128866595, "grad_norm": 1.9329913854599, "learning_rate": 6.4614159644350086e-06, "loss": 0.8672, "step": 7009 }, { "epoch": 0.41860742863967515, "grad_norm": 1.727146863937378, "learning_rate": 6.460752438457966e-06, "loss": 0.8602, "step": 7010 }, { "epoch": 0.4186671443926908, "grad_norm": 2.3879854679107666, "learning_rate": 6.460088912480924e-06, "loss": 0.8602, "step": 7011 }, { "epoch": 0.4187268601457064, "grad_norm": 2.62519907951355, "learning_rate": 6.459425386503883e-06, "loss": 0.8964, "step": 7012 }, { "epoch": 0.4187865758987221, "grad_norm": 2.0615041255950928, "learning_rate": 6.45876186052684e-06, "loss": 0.8892, "step": 7013 }, { "epoch": 0.4188462916517377, "grad_norm": 2.16108775138855, "learning_rate": 6.458098334549798e-06, "loss": 0.8787, "step": 7014 }, { "epoch": 0.41890600740475337, "grad_norm": 2.3523426055908203, "learning_rate": 6.4574348085727555e-06, "loss": 0.9212, "step": 7015 }, { "epoch": 0.41896572315776903, "grad_norm": 2.1061785221099854, "learning_rate": 6.4567712825957144e-06, "loss": 0.8362, "step": 7016 }, { "epoch": 0.41902543891078464, "grad_norm": 2.344522714614868, "learning_rate": 6.4561077566186725e-06, "loss": 0.8555, "step": 7017 }, { "epoch": 0.4190851546638003, "grad_norm": 1.984138011932373, "learning_rate": 6.45544423064163e-06, "loss": 0.903, "step": 7018 }, { "epoch": 0.419144870416816, "grad_norm": 3.322908639907837, "learning_rate": 6.454780704664588e-06, "loss": 0.8586, "step": 7019 }, { "epoch": 0.4192045861698316, "grad_norm": 2.7103641033172607, "learning_rate": 6.454117178687546e-06, "loss": 0.8635, "step": 7020 }, { "epoch": 0.41926430192284725, "grad_norm": 1.9621082544326782, "learning_rate": 6.453453652710504e-06, "loss": 0.8745, "step": 7021 }, { "epoch": 0.4193240176758629, "grad_norm": 2.9005846977233887, "learning_rate": 6.452790126733462e-06, "loss": 0.8981, "step": 7022 }, { "epoch": 0.4193837334288785, "grad_norm": 2.2699193954467773, "learning_rate": 6.4521266007564194e-06, "loss": 0.8948, "step": 7023 }, { "epoch": 0.4194434491818942, "grad_norm": 2.7546944618225098, "learning_rate": 6.451463074779378e-06, "loss": 0.8901, "step": 7024 }, { "epoch": 0.41950316493490986, "grad_norm": 2.9698898792266846, "learning_rate": 6.450799548802336e-06, "loss": 0.888, "step": 7025 }, { "epoch": 0.41956288068792547, "grad_norm": 2.02488374710083, "learning_rate": 6.450136022825294e-06, "loss": 0.8962, "step": 7026 }, { "epoch": 0.41962259644094113, "grad_norm": 1.6528741121292114, "learning_rate": 6.449472496848253e-06, "loss": 0.8691, "step": 7027 }, { "epoch": 0.41968231219395674, "grad_norm": 1.6284716129302979, "learning_rate": 6.44880897087121e-06, "loss": 0.8412, "step": 7028 }, { "epoch": 0.4197420279469724, "grad_norm": 3.059391975402832, "learning_rate": 6.448145444894168e-06, "loss": 0.8702, "step": 7029 }, { "epoch": 0.4198017436999881, "grad_norm": 2.155073881149292, "learning_rate": 6.447481918917125e-06, "loss": 0.8872, "step": 7030 }, { "epoch": 0.4198614594530037, "grad_norm": 3.1715939044952393, "learning_rate": 6.446818392940084e-06, "loss": 0.8821, "step": 7031 }, { "epoch": 0.41992117520601935, "grad_norm": 2.689711093902588, "learning_rate": 6.446154866963042e-06, "loss": 0.8519, "step": 7032 }, { "epoch": 0.419980890959035, "grad_norm": 1.8997746706008911, "learning_rate": 6.445491340986e-06, "loss": 0.8605, "step": 7033 }, { "epoch": 0.4200406067120506, "grad_norm": 1.963667631149292, "learning_rate": 6.4448278150089585e-06, "loss": 0.8865, "step": 7034 }, { "epoch": 0.4201003224650663, "grad_norm": 3.528547525405884, "learning_rate": 6.444164289031916e-06, "loss": 0.8773, "step": 7035 }, { "epoch": 0.42016003821808195, "grad_norm": 2.6930410861968994, "learning_rate": 6.443500763054874e-06, "loss": 0.8507, "step": 7036 }, { "epoch": 0.42021975397109756, "grad_norm": 1.8141858577728271, "learning_rate": 6.442837237077833e-06, "loss": 0.8647, "step": 7037 }, { "epoch": 0.42027946972411323, "grad_norm": 2.377454996109009, "learning_rate": 6.44217371110079e-06, "loss": 0.8954, "step": 7038 }, { "epoch": 0.42033918547712884, "grad_norm": 2.2512803077697754, "learning_rate": 6.441510185123748e-06, "loss": 0.8954, "step": 7039 }, { "epoch": 0.4203989012301445, "grad_norm": 1.7923272848129272, "learning_rate": 6.4408466591467054e-06, "loss": 0.8949, "step": 7040 }, { "epoch": 0.42045861698316017, "grad_norm": 1.6235020160675049, "learning_rate": 6.440183133169664e-06, "loss": 0.8889, "step": 7041 }, { "epoch": 0.4205183327361758, "grad_norm": 3.17722749710083, "learning_rate": 6.4395196071926225e-06, "loss": 0.8798, "step": 7042 }, { "epoch": 0.42057804848919145, "grad_norm": 2.134216785430908, "learning_rate": 6.43885608121558e-06, "loss": 0.8791, "step": 7043 }, { "epoch": 0.4206377642422071, "grad_norm": 2.2655560970306396, "learning_rate": 6.438192555238538e-06, "loss": 0.8618, "step": 7044 }, { "epoch": 0.4206974799952227, "grad_norm": 2.122183322906494, "learning_rate": 6.437529029261496e-06, "loss": 0.8596, "step": 7045 }, { "epoch": 0.4207571957482384, "grad_norm": 2.2425966262817383, "learning_rate": 6.436865503284454e-06, "loss": 0.8732, "step": 7046 }, { "epoch": 0.42081691150125405, "grad_norm": 3.4851834774017334, "learning_rate": 6.436201977307412e-06, "loss": 0.9218, "step": 7047 }, { "epoch": 0.42087662725426966, "grad_norm": 1.908687949180603, "learning_rate": 6.435538451330369e-06, "loss": 0.8737, "step": 7048 }, { "epoch": 0.42093634300728533, "grad_norm": 2.1085445880889893, "learning_rate": 6.434874925353328e-06, "loss": 0.8933, "step": 7049 }, { "epoch": 0.42099605876030094, "grad_norm": 2.1404035091400146, "learning_rate": 6.434211399376286e-06, "loss": 0.8708, "step": 7050 }, { "epoch": 0.4210557745133166, "grad_norm": 2.2070152759552, "learning_rate": 6.433547873399244e-06, "loss": 0.8733, "step": 7051 }, { "epoch": 0.42111549026633227, "grad_norm": 1.6077723503112793, "learning_rate": 6.432884347422203e-06, "loss": 0.8608, "step": 7052 }, { "epoch": 0.4211752060193479, "grad_norm": 1.8791868686676025, "learning_rate": 6.43222082144516e-06, "loss": 0.8841, "step": 7053 }, { "epoch": 0.42123492177236355, "grad_norm": 1.6125555038452148, "learning_rate": 6.431557295468118e-06, "loss": 0.9117, "step": 7054 }, { "epoch": 0.4212946375253792, "grad_norm": 2.3086981773376465, "learning_rate": 6.430893769491075e-06, "loss": 0.918, "step": 7055 }, { "epoch": 0.4213543532783948, "grad_norm": 1.994255781173706, "learning_rate": 6.430230243514034e-06, "loss": 0.8888, "step": 7056 }, { "epoch": 0.4214140690314105, "grad_norm": 2.161355972290039, "learning_rate": 6.429566717536992e-06, "loss": 0.8753, "step": 7057 }, { "epoch": 0.42147378478442615, "grad_norm": 2.0825467109680176, "learning_rate": 6.4289031915599496e-06, "loss": 0.8547, "step": 7058 }, { "epoch": 0.42153350053744176, "grad_norm": 2.8645195960998535, "learning_rate": 6.4282396655829085e-06, "loss": 0.9054, "step": 7059 }, { "epoch": 0.42159321629045743, "grad_norm": 2.156548500061035, "learning_rate": 6.427576139605866e-06, "loss": 0.8677, "step": 7060 }, { "epoch": 0.4216529320434731, "grad_norm": 1.9017525911331177, "learning_rate": 6.426912613628824e-06, "loss": 0.8896, "step": 7061 }, { "epoch": 0.4217126477964887, "grad_norm": 3.103541851043701, "learning_rate": 6.426249087651783e-06, "loss": 0.9076, "step": 7062 }, { "epoch": 0.42177236354950437, "grad_norm": 2.778024673461914, "learning_rate": 6.42558556167474e-06, "loss": 0.8709, "step": 7063 }, { "epoch": 0.42183207930252, "grad_norm": 1.7548538446426392, "learning_rate": 6.424922035697698e-06, "loss": 0.8331, "step": 7064 }, { "epoch": 0.42189179505553565, "grad_norm": 2.8835556507110596, "learning_rate": 6.424258509720655e-06, "loss": 0.8811, "step": 7065 }, { "epoch": 0.4219515108085513, "grad_norm": 2.344315528869629, "learning_rate": 6.423594983743614e-06, "loss": 0.9076, "step": 7066 }, { "epoch": 0.4220112265615669, "grad_norm": 2.8436660766601562, "learning_rate": 6.4229314577665725e-06, "loss": 0.8693, "step": 7067 }, { "epoch": 0.4220709423145826, "grad_norm": 2.6150076389312744, "learning_rate": 6.42226793178953e-06, "loss": 0.8772, "step": 7068 }, { "epoch": 0.42213065806759825, "grad_norm": 3.43192720413208, "learning_rate": 6.421604405812488e-06, "loss": 0.8321, "step": 7069 }, { "epoch": 0.42219037382061386, "grad_norm": 1.698279857635498, "learning_rate": 6.420940879835446e-06, "loss": 0.8569, "step": 7070 }, { "epoch": 0.42225008957362953, "grad_norm": 2.553441286087036, "learning_rate": 6.420277353858404e-06, "loss": 0.8824, "step": 7071 }, { "epoch": 0.4223098053266452, "grad_norm": 2.086641788482666, "learning_rate": 6.419613827881362e-06, "loss": 0.8741, "step": 7072 }, { "epoch": 0.4223695210796608, "grad_norm": 2.0890307426452637, "learning_rate": 6.418950301904319e-06, "loss": 0.8786, "step": 7073 }, { "epoch": 0.42242923683267647, "grad_norm": 1.6402058601379395, "learning_rate": 6.418286775927278e-06, "loss": 0.89, "step": 7074 }, { "epoch": 0.4224889525856921, "grad_norm": 1.7959452867507935, "learning_rate": 6.417623249950236e-06, "loss": 0.8767, "step": 7075 }, { "epoch": 0.42254866833870774, "grad_norm": 2.068727970123291, "learning_rate": 6.416959723973194e-06, "loss": 0.8973, "step": 7076 }, { "epoch": 0.4226083840917234, "grad_norm": 3.5570976734161377, "learning_rate": 6.416296197996153e-06, "loss": 0.8938, "step": 7077 }, { "epoch": 0.422668099844739, "grad_norm": 3.127406597137451, "learning_rate": 6.41563267201911e-06, "loss": 0.9059, "step": 7078 }, { "epoch": 0.4227278155977547, "grad_norm": 2.797508716583252, "learning_rate": 6.414969146042068e-06, "loss": 0.8948, "step": 7079 }, { "epoch": 0.42278753135077035, "grad_norm": 1.822841763496399, "learning_rate": 6.414305620065025e-06, "loss": 0.8532, "step": 7080 }, { "epoch": 0.42284724710378596, "grad_norm": 2.1575074195861816, "learning_rate": 6.413642094087984e-06, "loss": 0.8844, "step": 7081 }, { "epoch": 0.4229069628568016, "grad_norm": 2.783235549926758, "learning_rate": 6.412978568110942e-06, "loss": 0.8495, "step": 7082 }, { "epoch": 0.4229666786098173, "grad_norm": 2.143998146057129, "learning_rate": 6.4123150421338995e-06, "loss": 0.8607, "step": 7083 }, { "epoch": 0.4230263943628329, "grad_norm": 1.8174598217010498, "learning_rate": 6.4116515161568585e-06, "loss": 0.8741, "step": 7084 }, { "epoch": 0.42308611011584857, "grad_norm": 1.6732885837554932, "learning_rate": 6.410987990179816e-06, "loss": 0.8625, "step": 7085 }, { "epoch": 0.42314582586886423, "grad_norm": 1.9475184679031372, "learning_rate": 6.410324464202774e-06, "loss": 0.9018, "step": 7086 }, { "epoch": 0.42320554162187984, "grad_norm": 2.655714750289917, "learning_rate": 6.409660938225733e-06, "loss": 0.8497, "step": 7087 }, { "epoch": 0.4232652573748955, "grad_norm": 3.035839080810547, "learning_rate": 6.40899741224869e-06, "loss": 0.8621, "step": 7088 }, { "epoch": 0.4233249731279111, "grad_norm": 1.8002707958221436, "learning_rate": 6.408333886271648e-06, "loss": 0.8452, "step": 7089 }, { "epoch": 0.4233846888809268, "grad_norm": 2.0536258220672607, "learning_rate": 6.407670360294605e-06, "loss": 0.8718, "step": 7090 }, { "epoch": 0.42344440463394245, "grad_norm": 1.6580573320388794, "learning_rate": 6.407006834317564e-06, "loss": 0.9012, "step": 7091 }, { "epoch": 0.42350412038695806, "grad_norm": 4.910435199737549, "learning_rate": 6.4063433083405224e-06, "loss": 0.8899, "step": 7092 }, { "epoch": 0.4235638361399737, "grad_norm": 4.889121055603027, "learning_rate": 6.40567978236348e-06, "loss": 0.8764, "step": 7093 }, { "epoch": 0.4236235518929894, "grad_norm": 1.9008934497833252, "learning_rate": 6.405016256386438e-06, "loss": 0.8523, "step": 7094 }, { "epoch": 0.423683267646005, "grad_norm": 1.896384358406067, "learning_rate": 6.404352730409396e-06, "loss": 0.8679, "step": 7095 }, { "epoch": 0.42374298339902067, "grad_norm": 7.657535552978516, "learning_rate": 6.403689204432354e-06, "loss": 0.8813, "step": 7096 }, { "epoch": 0.42380269915203633, "grad_norm": 1.7083462476730347, "learning_rate": 6.403025678455312e-06, "loss": 0.8948, "step": 7097 }, { "epoch": 0.42386241490505194, "grad_norm": 2.153529405593872, "learning_rate": 6.402362152478269e-06, "loss": 0.846, "step": 7098 }, { "epoch": 0.4239221306580676, "grad_norm": 2.13879656791687, "learning_rate": 6.401698626501228e-06, "loss": 0.8581, "step": 7099 }, { "epoch": 0.4239818464110832, "grad_norm": 2.285079002380371, "learning_rate": 6.4010351005241855e-06, "loss": 0.8423, "step": 7100 }, { "epoch": 0.4240415621640989, "grad_norm": 1.962857723236084, "learning_rate": 6.400371574547144e-06, "loss": 0.8677, "step": 7101 }, { "epoch": 0.42410127791711455, "grad_norm": 2.2054994106292725, "learning_rate": 6.399708048570103e-06, "loss": 0.881, "step": 7102 }, { "epoch": 0.42416099367013016, "grad_norm": 3.760129690170288, "learning_rate": 6.39904452259306e-06, "loss": 0.9026, "step": 7103 }, { "epoch": 0.4242207094231458, "grad_norm": 2.0534374713897705, "learning_rate": 6.398380996616018e-06, "loss": 0.8691, "step": 7104 }, { "epoch": 0.4242804251761615, "grad_norm": 2.1622183322906494, "learning_rate": 6.397717470638975e-06, "loss": 0.8918, "step": 7105 }, { "epoch": 0.4243401409291771, "grad_norm": 2.0623724460601807, "learning_rate": 6.397053944661934e-06, "loss": 0.8519, "step": 7106 }, { "epoch": 0.42439985668219277, "grad_norm": 2.497783660888672, "learning_rate": 6.396390418684892e-06, "loss": 0.8531, "step": 7107 }, { "epoch": 0.42445957243520843, "grad_norm": 2.1425633430480957, "learning_rate": 6.3957268927078495e-06, "loss": 0.8635, "step": 7108 }, { "epoch": 0.42451928818822404, "grad_norm": 2.3354742527008057, "learning_rate": 6.3950633667308085e-06, "loss": 0.8747, "step": 7109 }, { "epoch": 0.4245790039412397, "grad_norm": 2.0690956115722656, "learning_rate": 6.394399840753766e-06, "loss": 0.906, "step": 7110 }, { "epoch": 0.4246387196942553, "grad_norm": 3.5237772464752197, "learning_rate": 6.393736314776724e-06, "loss": 0.8409, "step": 7111 }, { "epoch": 0.424698435447271, "grad_norm": 2.640989303588867, "learning_rate": 6.393072788799683e-06, "loss": 0.9268, "step": 7112 }, { "epoch": 0.42475815120028665, "grad_norm": 1.5333235263824463, "learning_rate": 6.39240926282264e-06, "loss": 0.8464, "step": 7113 }, { "epoch": 0.42481786695330226, "grad_norm": 2.520430564880371, "learning_rate": 6.391745736845598e-06, "loss": 0.8846, "step": 7114 }, { "epoch": 0.4248775827063179, "grad_norm": 2.690667152404785, "learning_rate": 6.391082210868555e-06, "loss": 0.8564, "step": 7115 }, { "epoch": 0.4249372984593336, "grad_norm": 1.8118592500686646, "learning_rate": 6.390418684891514e-06, "loss": 0.905, "step": 7116 }, { "epoch": 0.4249970142123492, "grad_norm": 2.0720572471618652, "learning_rate": 6.389755158914472e-06, "loss": 0.8668, "step": 7117 }, { "epoch": 0.42505672996536487, "grad_norm": 2.0084426403045654, "learning_rate": 6.38909163293743e-06, "loss": 0.8854, "step": 7118 }, { "epoch": 0.42511644571838053, "grad_norm": 1.7424052953720093, "learning_rate": 6.388428106960388e-06, "loss": 0.9237, "step": 7119 }, { "epoch": 0.42517616147139614, "grad_norm": 1.893908143043518, "learning_rate": 6.387764580983346e-06, "loss": 0.8942, "step": 7120 }, { "epoch": 0.4252358772244118, "grad_norm": 1.96617591381073, "learning_rate": 6.387101055006304e-06, "loss": 0.8513, "step": 7121 }, { "epoch": 0.4252955929774275, "grad_norm": 2.901327133178711, "learning_rate": 6.386437529029262e-06, "loss": 0.8703, "step": 7122 }, { "epoch": 0.4253553087304431, "grad_norm": 1.6093639135360718, "learning_rate": 6.385774003052219e-06, "loss": 0.8613, "step": 7123 }, { "epoch": 0.42541502448345875, "grad_norm": 2.5125436782836914, "learning_rate": 6.385110477075178e-06, "loss": 0.8821, "step": 7124 }, { "epoch": 0.42547474023647436, "grad_norm": 2.732816457748413, "learning_rate": 6.3844469510981355e-06, "loss": 0.8804, "step": 7125 }, { "epoch": 0.42553445598949, "grad_norm": 3.2040815353393555, "learning_rate": 6.383783425121094e-06, "loss": 0.8482, "step": 7126 }, { "epoch": 0.4255941717425057, "grad_norm": 2.4595751762390137, "learning_rate": 6.3831198991440526e-06, "loss": 0.8895, "step": 7127 }, { "epoch": 0.4256538874955213, "grad_norm": 3.2186856269836426, "learning_rate": 6.38245637316701e-06, "loss": 0.8298, "step": 7128 }, { "epoch": 0.42571360324853696, "grad_norm": 2.619814872741699, "learning_rate": 6.381792847189968e-06, "loss": 0.8774, "step": 7129 }, { "epoch": 0.42577331900155263, "grad_norm": 4.7266364097595215, "learning_rate": 6.381129321212925e-06, "loss": 0.8911, "step": 7130 }, { "epoch": 0.42583303475456824, "grad_norm": 2.0353610515594482, "learning_rate": 6.380465795235884e-06, "loss": 0.8437, "step": 7131 }, { "epoch": 0.4258927505075839, "grad_norm": 2.880692958831787, "learning_rate": 6.379802269258842e-06, "loss": 0.8604, "step": 7132 }, { "epoch": 0.42595246626059957, "grad_norm": 1.660792350769043, "learning_rate": 6.3791387432817995e-06, "loss": 0.8596, "step": 7133 }, { "epoch": 0.4260121820136152, "grad_norm": 2.676267385482788, "learning_rate": 6.378475217304758e-06, "loss": 0.8798, "step": 7134 }, { "epoch": 0.42607189776663085, "grad_norm": 2.701883554458618, "learning_rate": 6.377811691327716e-06, "loss": 0.8316, "step": 7135 }, { "epoch": 0.42613161351964646, "grad_norm": 2.419020175933838, "learning_rate": 6.377148165350674e-06, "loss": 0.8881, "step": 7136 }, { "epoch": 0.4261913292726621, "grad_norm": 2.498821496963501, "learning_rate": 6.376484639373633e-06, "loss": 0.9028, "step": 7137 }, { "epoch": 0.4262510450256778, "grad_norm": 3.055264711380005, "learning_rate": 6.37582111339659e-06, "loss": 0.8637, "step": 7138 }, { "epoch": 0.4263107607786934, "grad_norm": 2.0407068729400635, "learning_rate": 6.375157587419548e-06, "loss": 0.8763, "step": 7139 }, { "epoch": 0.42637047653170906, "grad_norm": 5.745910167694092, "learning_rate": 6.374494061442505e-06, "loss": 0.883, "step": 7140 }, { "epoch": 0.42643019228472473, "grad_norm": 16.072206497192383, "learning_rate": 6.373830535465464e-06, "loss": 0.864, "step": 7141 }, { "epoch": 0.42648990803774034, "grad_norm": 2.935025215148926, "learning_rate": 6.373167009488422e-06, "loss": 0.8573, "step": 7142 }, { "epoch": 0.426549623790756, "grad_norm": 2.0422399044036865, "learning_rate": 6.37250348351138e-06, "loss": 0.8341, "step": 7143 }, { "epoch": 0.42660933954377167, "grad_norm": 2.2142748832702637, "learning_rate": 6.371839957534338e-06, "loss": 0.8544, "step": 7144 }, { "epoch": 0.4266690552967873, "grad_norm": 3.2784104347229004, "learning_rate": 6.371176431557296e-06, "loss": 0.8926, "step": 7145 }, { "epoch": 0.42672877104980295, "grad_norm": 2.231971502304077, "learning_rate": 6.370512905580254e-06, "loss": 0.8372, "step": 7146 }, { "epoch": 0.42678848680281856, "grad_norm": 3.906108856201172, "learning_rate": 6.369849379603212e-06, "loss": 0.9166, "step": 7147 }, { "epoch": 0.4268482025558342, "grad_norm": 4.113705158233643, "learning_rate": 6.369185853626169e-06, "loss": 0.8788, "step": 7148 }, { "epoch": 0.4269079183088499, "grad_norm": 4.883082389831543, "learning_rate": 6.368522327649128e-06, "loss": 0.9007, "step": 7149 }, { "epoch": 0.4269676340618655, "grad_norm": 2.2609758377075195, "learning_rate": 6.3678588016720855e-06, "loss": 0.8716, "step": 7150 }, { "epoch": 0.42702734981488116, "grad_norm": 2.4146242141723633, "learning_rate": 6.367195275695044e-06, "loss": 0.8677, "step": 7151 }, { "epoch": 0.42708706556789683, "grad_norm": 2.872418165206909, "learning_rate": 6.3665317497180025e-06, "loss": 0.8411, "step": 7152 }, { "epoch": 0.42714678132091244, "grad_norm": 2.343352794647217, "learning_rate": 6.36586822374096e-06, "loss": 0.882, "step": 7153 }, { "epoch": 0.4272064970739281, "grad_norm": 2.0218899250030518, "learning_rate": 6.365204697763918e-06, "loss": 0.8661, "step": 7154 }, { "epoch": 0.42726621282694377, "grad_norm": 2.2759130001068115, "learning_rate": 6.364541171786875e-06, "loss": 0.9028, "step": 7155 }, { "epoch": 0.4273259285799594, "grad_norm": 2.357985019683838, "learning_rate": 6.363877645809834e-06, "loss": 0.8829, "step": 7156 }, { "epoch": 0.42738564433297505, "grad_norm": 1.7415802478790283, "learning_rate": 6.363214119832792e-06, "loss": 0.893, "step": 7157 }, { "epoch": 0.4274453600859907, "grad_norm": 2.2245090007781982, "learning_rate": 6.3625505938557494e-06, "loss": 0.9046, "step": 7158 }, { "epoch": 0.4275050758390063, "grad_norm": 2.0168519020080566, "learning_rate": 6.361887067878708e-06, "loss": 0.8554, "step": 7159 }, { "epoch": 0.427564791592022, "grad_norm": 2.999405860900879, "learning_rate": 6.361223541901666e-06, "loss": 0.8619, "step": 7160 }, { "epoch": 0.4276245073450376, "grad_norm": 1.805241346359253, "learning_rate": 6.360560015924624e-06, "loss": 0.8673, "step": 7161 }, { "epoch": 0.42768422309805326, "grad_norm": 2.755265235900879, "learning_rate": 6.359896489947583e-06, "loss": 0.8699, "step": 7162 }, { "epoch": 0.42774393885106893, "grad_norm": 1.9740568399429321, "learning_rate": 6.35923296397054e-06, "loss": 0.9086, "step": 7163 }, { "epoch": 0.42780365460408454, "grad_norm": 2.3625924587249756, "learning_rate": 6.358569437993498e-06, "loss": 0.8584, "step": 7164 }, { "epoch": 0.4278633703571002, "grad_norm": 3.9215917587280273, "learning_rate": 6.357905912016455e-06, "loss": 0.8684, "step": 7165 }, { "epoch": 0.42792308611011587, "grad_norm": 2.2678956985473633, "learning_rate": 6.357242386039414e-06, "loss": 0.8678, "step": 7166 }, { "epoch": 0.4279828018631315, "grad_norm": 1.9697072505950928, "learning_rate": 6.356578860062372e-06, "loss": 0.8864, "step": 7167 }, { "epoch": 0.42804251761614714, "grad_norm": 2.4863810539245605, "learning_rate": 6.35591533408533e-06, "loss": 0.9434, "step": 7168 }, { "epoch": 0.4281022333691628, "grad_norm": 1.6931955814361572, "learning_rate": 6.355251808108288e-06, "loss": 0.8734, "step": 7169 }, { "epoch": 0.4281619491221784, "grad_norm": 1.8539706468582153, "learning_rate": 6.354588282131246e-06, "loss": 0.847, "step": 7170 }, { "epoch": 0.4282216648751941, "grad_norm": 2.497999429702759, "learning_rate": 6.353924756154204e-06, "loss": 0.8974, "step": 7171 }, { "epoch": 0.4282813806282097, "grad_norm": 1.5877822637557983, "learning_rate": 6.353261230177162e-06, "loss": 0.8586, "step": 7172 }, { "epoch": 0.42834109638122536, "grad_norm": 1.6943323612213135, "learning_rate": 6.352597704200119e-06, "loss": 0.8589, "step": 7173 }, { "epoch": 0.428400812134241, "grad_norm": 2.372551441192627, "learning_rate": 6.351934178223078e-06, "loss": 0.8839, "step": 7174 }, { "epoch": 0.42846052788725664, "grad_norm": 3.849395513534546, "learning_rate": 6.3512706522460355e-06, "loss": 0.8593, "step": 7175 }, { "epoch": 0.4285202436402723, "grad_norm": 1.841326117515564, "learning_rate": 6.3506071262689936e-06, "loss": 0.8744, "step": 7176 }, { "epoch": 0.42857995939328797, "grad_norm": 1.7526757717132568, "learning_rate": 6.3499436002919525e-06, "loss": 0.8626, "step": 7177 }, { "epoch": 0.4286396751463036, "grad_norm": 2.7066521644592285, "learning_rate": 6.34928007431491e-06, "loss": 0.8465, "step": 7178 }, { "epoch": 0.42869939089931924, "grad_norm": 3.240865707397461, "learning_rate": 6.348616548337868e-06, "loss": 0.8788, "step": 7179 }, { "epoch": 0.4287591066523349, "grad_norm": 2.3002312183380127, "learning_rate": 6.347953022360825e-06, "loss": 0.8742, "step": 7180 }, { "epoch": 0.4288188224053505, "grad_norm": 2.1904969215393066, "learning_rate": 6.347289496383784e-06, "loss": 0.8663, "step": 7181 }, { "epoch": 0.4288785381583662, "grad_norm": 1.5467761754989624, "learning_rate": 6.346625970406742e-06, "loss": 0.8929, "step": 7182 }, { "epoch": 0.4289382539113818, "grad_norm": 1.717368483543396, "learning_rate": 6.345962444429699e-06, "loss": 0.8499, "step": 7183 }, { "epoch": 0.42899796966439746, "grad_norm": 2.522444725036621, "learning_rate": 6.345298918452658e-06, "loss": 0.8385, "step": 7184 }, { "epoch": 0.4290576854174131, "grad_norm": 1.7422422170639038, "learning_rate": 6.344635392475616e-06, "loss": 0.9032, "step": 7185 }, { "epoch": 0.42911740117042874, "grad_norm": 2.066133499145508, "learning_rate": 6.343971866498574e-06, "loss": 0.8871, "step": 7186 }, { "epoch": 0.4291771169234444, "grad_norm": 2.4521842002868652, "learning_rate": 6.343308340521533e-06, "loss": 0.8885, "step": 7187 }, { "epoch": 0.42923683267646007, "grad_norm": 3.099989175796509, "learning_rate": 6.34264481454449e-06, "loss": 0.8819, "step": 7188 }, { "epoch": 0.4292965484294757, "grad_norm": 2.0825412273406982, "learning_rate": 6.341981288567448e-06, "loss": 0.895, "step": 7189 }, { "epoch": 0.42935626418249134, "grad_norm": 2.0308732986450195, "learning_rate": 6.341317762590405e-06, "loss": 0.8536, "step": 7190 }, { "epoch": 0.429415979935507, "grad_norm": 1.9267926216125488, "learning_rate": 6.340654236613364e-06, "loss": 0.8892, "step": 7191 }, { "epoch": 0.4294756956885226, "grad_norm": 1.5581600666046143, "learning_rate": 6.339990710636322e-06, "loss": 0.8426, "step": 7192 }, { "epoch": 0.4295354114415383, "grad_norm": 2.3763644695281982, "learning_rate": 6.3393271846592796e-06, "loss": 0.8544, "step": 7193 }, { "epoch": 0.42959512719455395, "grad_norm": 2.2589526176452637, "learning_rate": 6.338663658682238e-06, "loss": 0.8765, "step": 7194 }, { "epoch": 0.42965484294756956, "grad_norm": 1.7997486591339111, "learning_rate": 6.338000132705196e-06, "loss": 0.883, "step": 7195 }, { "epoch": 0.4297145587005852, "grad_norm": 1.6705584526062012, "learning_rate": 6.337336606728154e-06, "loss": 0.8517, "step": 7196 }, { "epoch": 0.42977427445360084, "grad_norm": 2.6471121311187744, "learning_rate": 6.336673080751112e-06, "loss": 0.836, "step": 7197 }, { "epoch": 0.4298339902066165, "grad_norm": 1.7748910188674927, "learning_rate": 6.336009554774069e-06, "loss": 0.8615, "step": 7198 }, { "epoch": 0.42989370595963217, "grad_norm": 2.153202533721924, "learning_rate": 6.335346028797028e-06, "loss": 0.8934, "step": 7199 }, { "epoch": 0.4299534217126478, "grad_norm": 1.6749343872070312, "learning_rate": 6.3346825028199854e-06, "loss": 0.8604, "step": 7200 }, { "epoch": 0.4299534217126478, "eval_text_loss": 0.9244801998138428, "eval_text_runtime": 15.1876, "eval_text_samples_per_second": 263.373, "eval_text_steps_per_second": 0.527, "step": 7200 }, { "epoch": 0.4299534217126478, "eval_image_loss": 0.6404801607131958, "eval_image_runtime": 5.0128, "eval_image_samples_per_second": 797.963, "eval_image_steps_per_second": 1.596, "step": 7200 }, { "epoch": 0.4299534217126478, "eval_video_loss": 1.089338779449463, "eval_video_runtime": 77.1371, "eval_video_samples_per_second": 51.856, "eval_video_steps_per_second": 0.104, "step": 7200 }, { "epoch": 0.43001313746566344, "grad_norm": 2.3804852962493896, "learning_rate": 6.3340189768429435e-06, "loss": 0.8685, "step": 7201 }, { "epoch": 0.4300728532186791, "grad_norm": 2.0020205974578857, "learning_rate": 6.3333554508659025e-06, "loss": 0.8888, "step": 7202 }, { "epoch": 0.4301325689716947, "grad_norm": 1.919268250465393, "learning_rate": 6.33269192488886e-06, "loss": 0.8371, "step": 7203 }, { "epoch": 0.4301922847247104, "grad_norm": 1.9256595373153687, "learning_rate": 6.332028398911818e-06, "loss": 0.8516, "step": 7204 }, { "epoch": 0.43025200047772605, "grad_norm": 2.0951929092407227, "learning_rate": 6.331364872934775e-06, "loss": 0.874, "step": 7205 }, { "epoch": 0.43031171623074166, "grad_norm": 1.6008838415145874, "learning_rate": 6.330701346957734e-06, "loss": 0.855, "step": 7206 }, { "epoch": 0.4303714319837573, "grad_norm": 4.753270626068115, "learning_rate": 6.330037820980692e-06, "loss": 0.8538, "step": 7207 }, { "epoch": 0.43043114773677293, "grad_norm": 2.7218217849731445, "learning_rate": 6.329374295003649e-06, "loss": 0.8781, "step": 7208 }, { "epoch": 0.4304908634897886, "grad_norm": 1.988218069076538, "learning_rate": 6.328710769026608e-06, "loss": 0.8706, "step": 7209 }, { "epoch": 0.43055057924280427, "grad_norm": 3.5870378017425537, "learning_rate": 6.328047243049566e-06, "loss": 0.8434, "step": 7210 }, { "epoch": 0.4306102949958199, "grad_norm": 2.1102359294891357, "learning_rate": 6.327383717072524e-06, "loss": 0.8604, "step": 7211 }, { "epoch": 0.43067001074883554, "grad_norm": 3.335672378540039, "learning_rate": 6.326720191095483e-06, "loss": 0.8764, "step": 7212 }, { "epoch": 0.4307297265018512, "grad_norm": 2.2279133796691895, "learning_rate": 6.32605666511844e-06, "loss": 0.8522, "step": 7213 }, { "epoch": 0.4307894422548668, "grad_norm": 1.7363125085830688, "learning_rate": 6.325393139141398e-06, "loss": 0.85, "step": 7214 }, { "epoch": 0.4308491580078825, "grad_norm": 2.6234097480773926, "learning_rate": 6.324729613164355e-06, "loss": 0.8426, "step": 7215 }, { "epoch": 0.43090887376089815, "grad_norm": 1.8741222620010376, "learning_rate": 6.324066087187314e-06, "loss": 0.8522, "step": 7216 }, { "epoch": 0.43096858951391376, "grad_norm": 2.0297231674194336, "learning_rate": 6.323402561210272e-06, "loss": 0.8739, "step": 7217 }, { "epoch": 0.4310283052669294, "grad_norm": 2.1276161670684814, "learning_rate": 6.3227390352332295e-06, "loss": 0.8684, "step": 7218 }, { "epoch": 0.4310880210199451, "grad_norm": 1.9270521402359009, "learning_rate": 6.3220755092561885e-06, "loss": 0.8823, "step": 7219 }, { "epoch": 0.4311477367729607, "grad_norm": 2.6587271690368652, "learning_rate": 6.321411983279146e-06, "loss": 0.909, "step": 7220 }, { "epoch": 0.43120745252597636, "grad_norm": 2.0602071285247803, "learning_rate": 6.320748457302104e-06, "loss": 0.8864, "step": 7221 }, { "epoch": 0.431267168278992, "grad_norm": 3.712783098220825, "learning_rate": 6.320084931325062e-06, "loss": 0.9007, "step": 7222 }, { "epoch": 0.43132688403200764, "grad_norm": 2.869175910949707, "learning_rate": 6.319421405348019e-06, "loss": 0.8737, "step": 7223 }, { "epoch": 0.4313865997850233, "grad_norm": 2.8773903846740723, "learning_rate": 6.318757879370978e-06, "loss": 0.85, "step": 7224 }, { "epoch": 0.4314463155380389, "grad_norm": 2.297489643096924, "learning_rate": 6.318094353393935e-06, "loss": 0.8503, "step": 7225 }, { "epoch": 0.4315060312910546, "grad_norm": 11.130605697631836, "learning_rate": 6.3174308274168935e-06, "loss": 0.8425, "step": 7226 }, { "epoch": 0.43156574704407025, "grad_norm": 4.349084377288818, "learning_rate": 6.3167673014398525e-06, "loss": 0.8364, "step": 7227 }, { "epoch": 0.43162546279708586, "grad_norm": 2.271172285079956, "learning_rate": 6.31610377546281e-06, "loss": 0.8761, "step": 7228 }, { "epoch": 0.4316851785501015, "grad_norm": 1.8010491132736206, "learning_rate": 6.315440249485768e-06, "loss": 0.8595, "step": 7229 }, { "epoch": 0.4317448943031172, "grad_norm": 2.1069743633270264, "learning_rate": 6.314776723508725e-06, "loss": 0.8588, "step": 7230 }, { "epoch": 0.4318046100561328, "grad_norm": 1.9905401468276978, "learning_rate": 6.314113197531684e-06, "loss": 0.8455, "step": 7231 }, { "epoch": 0.43186432580914846, "grad_norm": 1.9822208881378174, "learning_rate": 6.313449671554642e-06, "loss": 0.8977, "step": 7232 }, { "epoch": 0.4319240415621641, "grad_norm": 2.8433151245117188, "learning_rate": 6.312786145577599e-06, "loss": 0.8647, "step": 7233 }, { "epoch": 0.43198375731517974, "grad_norm": 3.5821917057037354, "learning_rate": 6.312122619600558e-06, "loss": 0.8661, "step": 7234 }, { "epoch": 0.4320434730681954, "grad_norm": 2.533318519592285, "learning_rate": 6.3114590936235156e-06, "loss": 0.8795, "step": 7235 }, { "epoch": 0.432103188821211, "grad_norm": 1.8229081630706787, "learning_rate": 6.310795567646474e-06, "loss": 0.8811, "step": 7236 }, { "epoch": 0.4321629045742267, "grad_norm": 2.2343590259552, "learning_rate": 6.310132041669433e-06, "loss": 0.8795, "step": 7237 }, { "epoch": 0.43222262032724235, "grad_norm": 1.6583341360092163, "learning_rate": 6.30946851569239e-06, "loss": 0.8747, "step": 7238 }, { "epoch": 0.43228233608025796, "grad_norm": 2.295956611633301, "learning_rate": 6.308804989715348e-06, "loss": 0.8919, "step": 7239 }, { "epoch": 0.4323420518332736, "grad_norm": 4.017870903015137, "learning_rate": 6.308141463738305e-06, "loss": 0.8605, "step": 7240 }, { "epoch": 0.4324017675862893, "grad_norm": 5.022131443023682, "learning_rate": 6.307477937761264e-06, "loss": 0.8711, "step": 7241 }, { "epoch": 0.4324614833393049, "grad_norm": 2.2935004234313965, "learning_rate": 6.306814411784222e-06, "loss": 0.8859, "step": 7242 }, { "epoch": 0.43252119909232056, "grad_norm": 1.8343620300292969, "learning_rate": 6.3061508858071795e-06, "loss": 0.8724, "step": 7243 }, { "epoch": 0.4325809148453362, "grad_norm": 2.454479455947876, "learning_rate": 6.3054873598301385e-06, "loss": 0.8808, "step": 7244 }, { "epoch": 0.43264063059835184, "grad_norm": 1.6033170223236084, "learning_rate": 6.304823833853096e-06, "loss": 0.8285, "step": 7245 }, { "epoch": 0.4327003463513675, "grad_norm": 3.289591073989868, "learning_rate": 6.304160307876054e-06, "loss": 0.9244, "step": 7246 }, { "epoch": 0.4327600621043831, "grad_norm": 2.282518148422241, "learning_rate": 6.303496781899012e-06, "loss": 0.8787, "step": 7247 }, { "epoch": 0.4328197778573988, "grad_norm": 2.5878186225891113, "learning_rate": 6.302833255921969e-06, "loss": 0.9183, "step": 7248 }, { "epoch": 0.43287949361041445, "grad_norm": 1.8944538831710815, "learning_rate": 6.302169729944928e-06, "loss": 0.8671, "step": 7249 }, { "epoch": 0.43293920936343006, "grad_norm": 4.361506462097168, "learning_rate": 6.301506203967885e-06, "loss": 0.8432, "step": 7250 }, { "epoch": 0.4329989251164457, "grad_norm": 2.1859939098358154, "learning_rate": 6.3008426779908435e-06, "loss": 0.862, "step": 7251 }, { "epoch": 0.4330586408694614, "grad_norm": 2.692549228668213, "learning_rate": 6.300179152013802e-06, "loss": 0.8822, "step": 7252 }, { "epoch": 0.433118356622477, "grad_norm": 2.956681966781616, "learning_rate": 6.29951562603676e-06, "loss": 0.8756, "step": 7253 }, { "epoch": 0.43317807237549266, "grad_norm": 2.242394208908081, "learning_rate": 6.298852100059718e-06, "loss": 0.8733, "step": 7254 }, { "epoch": 0.4332377881285083, "grad_norm": 1.614235758781433, "learning_rate": 6.298188574082675e-06, "loss": 0.8885, "step": 7255 }, { "epoch": 0.43329750388152394, "grad_norm": 2.118375539779663, "learning_rate": 6.297525048105634e-06, "loss": 0.8738, "step": 7256 }, { "epoch": 0.4333572196345396, "grad_norm": 1.8742698431015015, "learning_rate": 6.296861522128592e-06, "loss": 0.8734, "step": 7257 }, { "epoch": 0.4334169353875552, "grad_norm": 1.920965552330017, "learning_rate": 6.296197996151549e-06, "loss": 0.8675, "step": 7258 }, { "epoch": 0.4334766511405709, "grad_norm": 1.8976279497146606, "learning_rate": 6.295534470174508e-06, "loss": 0.8625, "step": 7259 }, { "epoch": 0.43353636689358654, "grad_norm": 1.712606430053711, "learning_rate": 6.2948709441974655e-06, "loss": 0.8436, "step": 7260 }, { "epoch": 0.43359608264660215, "grad_norm": 1.908860683441162, "learning_rate": 6.294207418220424e-06, "loss": 0.8876, "step": 7261 }, { "epoch": 0.4336557983996178, "grad_norm": 3.8596532344818115, "learning_rate": 6.293543892243383e-06, "loss": 0.8978, "step": 7262 }, { "epoch": 0.4337155141526335, "grad_norm": 2.7758255004882812, "learning_rate": 6.29288036626634e-06, "loss": 0.8851, "step": 7263 }, { "epoch": 0.4337752299056491, "grad_norm": 2.162230968475342, "learning_rate": 6.292216840289298e-06, "loss": 0.8976, "step": 7264 }, { "epoch": 0.43383494565866476, "grad_norm": 2.5289108753204346, "learning_rate": 6.291553314312255e-06, "loss": 0.8928, "step": 7265 }, { "epoch": 0.4338946614116804, "grad_norm": 2.1083829402923584, "learning_rate": 6.290889788335214e-06, "loss": 0.8599, "step": 7266 }, { "epoch": 0.43395437716469604, "grad_norm": 1.9760026931762695, "learning_rate": 6.290226262358172e-06, "loss": 0.823, "step": 7267 }, { "epoch": 0.4340140929177117, "grad_norm": 2.689734935760498, "learning_rate": 6.2895627363811295e-06, "loss": 0.8588, "step": 7268 }, { "epoch": 0.4340738086707273, "grad_norm": 2.340452194213867, "learning_rate": 6.2888992104040884e-06, "loss": 0.8947, "step": 7269 }, { "epoch": 0.434133524423743, "grad_norm": 1.9808850288391113, "learning_rate": 6.288235684427046e-06, "loss": 0.8665, "step": 7270 }, { "epoch": 0.43419324017675864, "grad_norm": 3.6593923568725586, "learning_rate": 6.287572158450004e-06, "loss": 0.8477, "step": 7271 }, { "epoch": 0.43425295592977425, "grad_norm": 1.6132993698120117, "learning_rate": 6.286908632472962e-06, "loss": 0.8922, "step": 7272 }, { "epoch": 0.4343126716827899, "grad_norm": 3.2806074619293213, "learning_rate": 6.28624510649592e-06, "loss": 0.8735, "step": 7273 }, { "epoch": 0.4343723874358056, "grad_norm": 2.19791579246521, "learning_rate": 6.285581580518878e-06, "loss": 0.8659, "step": 7274 }, { "epoch": 0.4344321031888212, "grad_norm": 2.2453508377075195, "learning_rate": 6.284918054541835e-06, "loss": 0.8719, "step": 7275 }, { "epoch": 0.43449181894183686, "grad_norm": 2.6240110397338867, "learning_rate": 6.2842545285647934e-06, "loss": 0.8834, "step": 7276 }, { "epoch": 0.4345515346948525, "grad_norm": 1.6875271797180176, "learning_rate": 6.283591002587752e-06, "loss": 0.8919, "step": 7277 }, { "epoch": 0.43461125044786814, "grad_norm": 1.8074941635131836, "learning_rate": 6.28292747661071e-06, "loss": 0.8996, "step": 7278 }, { "epoch": 0.4346709662008838, "grad_norm": 3.4331698417663574, "learning_rate": 6.282263950633668e-06, "loss": 0.9157, "step": 7279 }, { "epoch": 0.4347306819538994, "grad_norm": 1.9571988582611084, "learning_rate": 6.281600424656625e-06, "loss": 0.8726, "step": 7280 }, { "epoch": 0.4347903977069151, "grad_norm": 2.095123291015625, "learning_rate": 6.280936898679584e-06, "loss": 0.8867, "step": 7281 }, { "epoch": 0.43485011345993074, "grad_norm": 2.300706148147583, "learning_rate": 6.280273372702542e-06, "loss": 0.8997, "step": 7282 }, { "epoch": 0.43490982921294635, "grad_norm": 4.9787068367004395, "learning_rate": 6.279609846725499e-06, "loss": 0.835, "step": 7283 }, { "epoch": 0.434969544965962, "grad_norm": 2.5026392936706543, "learning_rate": 6.278946320748458e-06, "loss": 0.8753, "step": 7284 }, { "epoch": 0.4350292607189777, "grad_norm": 1.9531782865524292, "learning_rate": 6.2782827947714155e-06, "loss": 0.8614, "step": 7285 }, { "epoch": 0.4350889764719933, "grad_norm": 1.662092685699463, "learning_rate": 6.277619268794374e-06, "loss": 0.8236, "step": 7286 }, { "epoch": 0.43514869222500896, "grad_norm": 2.102635622024536, "learning_rate": 6.2769557428173325e-06, "loss": 0.8623, "step": 7287 }, { "epoch": 0.4352084079780246, "grad_norm": 2.601602077484131, "learning_rate": 6.27629221684029e-06, "loss": 0.8734, "step": 7288 }, { "epoch": 0.43526812373104024, "grad_norm": 2.1163313388824463, "learning_rate": 6.275628690863248e-06, "loss": 0.8945, "step": 7289 }, { "epoch": 0.4353278394840559, "grad_norm": 2.6752617359161377, "learning_rate": 6.274965164886205e-06, "loss": 0.8917, "step": 7290 }, { "epoch": 0.43538755523707157, "grad_norm": 2.5369253158569336, "learning_rate": 6.274301638909164e-06, "loss": 0.8617, "step": 7291 }, { "epoch": 0.4354472709900872, "grad_norm": 2.321913719177246, "learning_rate": 6.273638112932122e-06, "loss": 0.8653, "step": 7292 }, { "epoch": 0.43550698674310284, "grad_norm": 1.945444107055664, "learning_rate": 6.2729745869550795e-06, "loss": 0.9011, "step": 7293 }, { "epoch": 0.43556670249611845, "grad_norm": 2.944507360458374, "learning_rate": 6.272311060978038e-06, "loss": 0.8559, "step": 7294 }, { "epoch": 0.4356264182491341, "grad_norm": 2.4012420177459717, "learning_rate": 6.271647535000996e-06, "loss": 0.8365, "step": 7295 }, { "epoch": 0.4356861340021498, "grad_norm": 2.340282678604126, "learning_rate": 6.270984009023954e-06, "loss": 0.8734, "step": 7296 }, { "epoch": 0.4357458497551654, "grad_norm": 2.0856235027313232, "learning_rate": 6.270320483046912e-06, "loss": 0.8381, "step": 7297 }, { "epoch": 0.43580556550818106, "grad_norm": 1.890572190284729, "learning_rate": 6.26965695706987e-06, "loss": 0.89, "step": 7298 }, { "epoch": 0.4358652812611967, "grad_norm": 1.9027962684631348, "learning_rate": 6.268993431092828e-06, "loss": 0.907, "step": 7299 }, { "epoch": 0.43592499701421233, "grad_norm": 2.837148427963257, "learning_rate": 6.268329905115785e-06, "loss": 0.875, "step": 7300 }, { "epoch": 0.435984712767228, "grad_norm": 3.0093929767608643, "learning_rate": 6.267666379138743e-06, "loss": 0.8747, "step": 7301 }, { "epoch": 0.43604442852024367, "grad_norm": 2.3237781524658203, "learning_rate": 6.267002853161702e-06, "loss": 0.8865, "step": 7302 }, { "epoch": 0.4361041442732593, "grad_norm": 1.642158031463623, "learning_rate": 6.26633932718466e-06, "loss": 0.8544, "step": 7303 }, { "epoch": 0.43616386002627494, "grad_norm": 1.9772933721542358, "learning_rate": 6.265675801207618e-06, "loss": 0.8678, "step": 7304 }, { "epoch": 0.43622357577929055, "grad_norm": 1.9540221691131592, "learning_rate": 6.265012275230575e-06, "loss": 0.8894, "step": 7305 }, { "epoch": 0.4362832915323062, "grad_norm": 2.748483419418335, "learning_rate": 6.264348749253534e-06, "loss": 0.8979, "step": 7306 }, { "epoch": 0.4363430072853219, "grad_norm": 2.3223495483398438, "learning_rate": 6.263685223276492e-06, "loss": 0.9088, "step": 7307 }, { "epoch": 0.4364027230383375, "grad_norm": 1.818184733390808, "learning_rate": 6.263021697299449e-06, "loss": 0.8974, "step": 7308 }, { "epoch": 0.43646243879135316, "grad_norm": 3.005125045776367, "learning_rate": 6.262358171322408e-06, "loss": 0.8952, "step": 7309 }, { "epoch": 0.4365221545443688, "grad_norm": 2.26125431060791, "learning_rate": 6.2616946453453655e-06, "loss": 0.8632, "step": 7310 }, { "epoch": 0.43658187029738443, "grad_norm": 1.9831078052520752, "learning_rate": 6.2610311193683236e-06, "loss": 0.8492, "step": 7311 }, { "epoch": 0.4366415860504001, "grad_norm": 2.5731256008148193, "learning_rate": 6.2603675933912825e-06, "loss": 0.8447, "step": 7312 }, { "epoch": 0.43670130180341576, "grad_norm": 1.9767054319381714, "learning_rate": 6.25970406741424e-06, "loss": 0.8958, "step": 7313 }, { "epoch": 0.4367610175564314, "grad_norm": 2.0977911949157715, "learning_rate": 6.259040541437198e-06, "loss": 0.8563, "step": 7314 }, { "epoch": 0.43682073330944704, "grad_norm": 2.2862250804901123, "learning_rate": 6.258377015460155e-06, "loss": 0.846, "step": 7315 }, { "epoch": 0.43688044906246265, "grad_norm": 3.799561023712158, "learning_rate": 6.257713489483114e-06, "loss": 0.8658, "step": 7316 }, { "epoch": 0.4369401648154783, "grad_norm": 2.5210297107696533, "learning_rate": 6.257049963506072e-06, "loss": 0.8748, "step": 7317 }, { "epoch": 0.436999880568494, "grad_norm": 1.9736965894699097, "learning_rate": 6.2563864375290294e-06, "loss": 0.8868, "step": 7318 }, { "epoch": 0.4370595963215096, "grad_norm": 2.7302303314208984, "learning_rate": 6.255722911551988e-06, "loss": 0.8912, "step": 7319 }, { "epoch": 0.43711931207452526, "grad_norm": 1.7633535861968994, "learning_rate": 6.255059385574946e-06, "loss": 0.8707, "step": 7320 }, { "epoch": 0.4371790278275409, "grad_norm": 2.1816163063049316, "learning_rate": 6.254395859597904e-06, "loss": 0.8829, "step": 7321 }, { "epoch": 0.43723874358055653, "grad_norm": 2.016871929168701, "learning_rate": 6.253732333620862e-06, "loss": 0.8467, "step": 7322 }, { "epoch": 0.4372984593335722, "grad_norm": 1.954235315322876, "learning_rate": 6.25306880764382e-06, "loss": 0.8737, "step": 7323 }, { "epoch": 0.43735817508658786, "grad_norm": 3.522538423538208, "learning_rate": 6.252405281666778e-06, "loss": 0.855, "step": 7324 }, { "epoch": 0.4374178908396035, "grad_norm": 1.715425729751587, "learning_rate": 6.251741755689735e-06, "loss": 0.8791, "step": 7325 }, { "epoch": 0.43747760659261914, "grad_norm": 1.8590749502182007, "learning_rate": 6.251078229712693e-06, "loss": 0.9066, "step": 7326 }, { "epoch": 0.4375373223456348, "grad_norm": 2.074317455291748, "learning_rate": 6.250414703735652e-06, "loss": 0.8029, "step": 7327 }, { "epoch": 0.4375970380986504, "grad_norm": 2.0167453289031982, "learning_rate": 6.24975117775861e-06, "loss": 0.868, "step": 7328 }, { "epoch": 0.4376567538516661, "grad_norm": 2.984030246734619, "learning_rate": 6.249087651781568e-06, "loss": 0.8526, "step": 7329 }, { "epoch": 0.4377164696046817, "grad_norm": 3.5834381580352783, "learning_rate": 6.248424125804525e-06, "loss": 0.8727, "step": 7330 }, { "epoch": 0.43777618535769736, "grad_norm": 2.3153462409973145, "learning_rate": 6.247760599827484e-06, "loss": 0.8734, "step": 7331 }, { "epoch": 0.437835901110713, "grad_norm": 3.088811159133911, "learning_rate": 6.247097073850442e-06, "loss": 0.9112, "step": 7332 }, { "epoch": 0.43789561686372863, "grad_norm": 5.425642490386963, "learning_rate": 6.246433547873399e-06, "loss": 0.8384, "step": 7333 }, { "epoch": 0.4379553326167443, "grad_norm": 1.6300420761108398, "learning_rate": 6.245770021896358e-06, "loss": 0.8413, "step": 7334 }, { "epoch": 0.43801504836975996, "grad_norm": 2.347898244857788, "learning_rate": 6.2451064959193154e-06, "loss": 0.8817, "step": 7335 }, { "epoch": 0.4380747641227756, "grad_norm": 2.3882102966308594, "learning_rate": 6.2444429699422735e-06, "loss": 0.877, "step": 7336 }, { "epoch": 0.43813447987579124, "grad_norm": 2.4666049480438232, "learning_rate": 6.2437794439652325e-06, "loss": 0.8881, "step": 7337 }, { "epoch": 0.4381941956288069, "grad_norm": 2.265481948852539, "learning_rate": 6.24311591798819e-06, "loss": 0.8613, "step": 7338 }, { "epoch": 0.4382539113818225, "grad_norm": 1.9596887826919556, "learning_rate": 6.242452392011148e-06, "loss": 0.8543, "step": 7339 }, { "epoch": 0.4383136271348382, "grad_norm": 2.5429959297180176, "learning_rate": 6.241788866034105e-06, "loss": 0.8732, "step": 7340 }, { "epoch": 0.4383733428878538, "grad_norm": 2.101534366607666, "learning_rate": 6.241125340057064e-06, "loss": 0.865, "step": 7341 }, { "epoch": 0.43843305864086946, "grad_norm": 2.0724613666534424, "learning_rate": 6.240461814080022e-06, "loss": 0.9028, "step": 7342 }, { "epoch": 0.4384927743938851, "grad_norm": 2.540856122970581, "learning_rate": 6.239798288102979e-06, "loss": 0.8723, "step": 7343 }, { "epoch": 0.43855249014690073, "grad_norm": 2.164311408996582, "learning_rate": 6.239134762125938e-06, "loss": 0.9076, "step": 7344 }, { "epoch": 0.4386122058999164, "grad_norm": 4.440585613250732, "learning_rate": 6.238471236148896e-06, "loss": 0.8841, "step": 7345 }, { "epoch": 0.43867192165293206, "grad_norm": 1.9450103044509888, "learning_rate": 6.237807710171854e-06, "loss": 0.8339, "step": 7346 }, { "epoch": 0.43873163740594767, "grad_norm": 1.781630277633667, "learning_rate": 6.237144184194812e-06, "loss": 0.8783, "step": 7347 }, { "epoch": 0.43879135315896334, "grad_norm": 1.854337215423584, "learning_rate": 6.23648065821777e-06, "loss": 0.9115, "step": 7348 }, { "epoch": 0.438851068911979, "grad_norm": 2.203354597091675, "learning_rate": 6.235817132240728e-06, "loss": 0.8988, "step": 7349 }, { "epoch": 0.4389107846649946, "grad_norm": 2.531738042831421, "learning_rate": 6.235153606263685e-06, "loss": 0.8505, "step": 7350 }, { "epoch": 0.4389705004180103, "grad_norm": 2.145955801010132, "learning_rate": 6.234490080286643e-06, "loss": 0.9113, "step": 7351 }, { "epoch": 0.43903021617102594, "grad_norm": 4.028310298919678, "learning_rate": 6.233826554309602e-06, "loss": 0.8712, "step": 7352 }, { "epoch": 0.43908993192404155, "grad_norm": 1.6985360383987427, "learning_rate": 6.2331630283325596e-06, "loss": 0.8732, "step": 7353 }, { "epoch": 0.4391496476770572, "grad_norm": 2.6450681686401367, "learning_rate": 6.232499502355518e-06, "loss": 0.8318, "step": 7354 }, { "epoch": 0.43920936343007283, "grad_norm": 1.9633946418762207, "learning_rate": 6.231835976378475e-06, "loss": 0.8949, "step": 7355 }, { "epoch": 0.4392690791830885, "grad_norm": 1.731244444847107, "learning_rate": 6.231172450401434e-06, "loss": 0.8715, "step": 7356 }, { "epoch": 0.43932879493610416, "grad_norm": 1.9620585441589355, "learning_rate": 6.230508924424392e-06, "loss": 0.8719, "step": 7357 }, { "epoch": 0.43938851068911977, "grad_norm": 1.979527235031128, "learning_rate": 6.229845398447349e-06, "loss": 0.8834, "step": 7358 }, { "epoch": 0.43944822644213544, "grad_norm": 2.6121633052825928, "learning_rate": 6.229181872470308e-06, "loss": 0.8687, "step": 7359 }, { "epoch": 0.4395079421951511, "grad_norm": 1.9586971998214722, "learning_rate": 6.228518346493265e-06, "loss": 0.8639, "step": 7360 }, { "epoch": 0.4395676579481667, "grad_norm": 1.5954560041427612, "learning_rate": 6.2278548205162235e-06, "loss": 0.8599, "step": 7361 }, { "epoch": 0.4396273737011824, "grad_norm": 2.4811697006225586, "learning_rate": 6.2271912945391825e-06, "loss": 0.8565, "step": 7362 }, { "epoch": 0.43968708945419804, "grad_norm": 1.8596173524856567, "learning_rate": 6.22652776856214e-06, "loss": 0.8951, "step": 7363 }, { "epoch": 0.43974680520721365, "grad_norm": 1.6850037574768066, "learning_rate": 6.225864242585098e-06, "loss": 0.8821, "step": 7364 }, { "epoch": 0.4398065209602293, "grad_norm": 1.871821641921997, "learning_rate": 6.225200716608055e-06, "loss": 0.8225, "step": 7365 }, { "epoch": 0.43986623671324493, "grad_norm": 1.8449705839157104, "learning_rate": 6.224537190631014e-06, "loss": 0.8929, "step": 7366 }, { "epoch": 0.4399259524662606, "grad_norm": 1.7042055130004883, "learning_rate": 6.223873664653972e-06, "loss": 0.868, "step": 7367 }, { "epoch": 0.43998566821927626, "grad_norm": 1.8582127094268799, "learning_rate": 6.223210138676929e-06, "loss": 0.885, "step": 7368 }, { "epoch": 0.44004538397229187, "grad_norm": 3.909336805343628, "learning_rate": 6.222546612699888e-06, "loss": 0.8445, "step": 7369 }, { "epoch": 0.44010509972530754, "grad_norm": 2.32188081741333, "learning_rate": 6.2218830867228456e-06, "loss": 0.8526, "step": 7370 }, { "epoch": 0.4401648154783232, "grad_norm": 1.5439774990081787, "learning_rate": 6.221219560745804e-06, "loss": 0.8164, "step": 7371 }, { "epoch": 0.4402245312313388, "grad_norm": 4.114083766937256, "learning_rate": 6.220556034768762e-06, "loss": 0.8867, "step": 7372 }, { "epoch": 0.4402842469843545, "grad_norm": 3.0818803310394287, "learning_rate": 6.21989250879172e-06, "loss": 0.8842, "step": 7373 }, { "epoch": 0.44034396273737014, "grad_norm": 1.9131335020065308, "learning_rate": 6.219228982814678e-06, "loss": 0.8374, "step": 7374 }, { "epoch": 0.44040367849038575, "grad_norm": 2.0723507404327393, "learning_rate": 6.218565456837635e-06, "loss": 0.8608, "step": 7375 }, { "epoch": 0.4404633942434014, "grad_norm": 2.4866349697113037, "learning_rate": 6.217901930860593e-06, "loss": 0.8622, "step": 7376 }, { "epoch": 0.44052310999641703, "grad_norm": 1.6187396049499512, "learning_rate": 6.217238404883552e-06, "loss": 0.853, "step": 7377 }, { "epoch": 0.4405828257494327, "grad_norm": 3.075613498687744, "learning_rate": 6.2165748789065095e-06, "loss": 0.8786, "step": 7378 }, { "epoch": 0.44064254150244836, "grad_norm": 2.6892316341400146, "learning_rate": 6.215911352929468e-06, "loss": 0.8737, "step": 7379 }, { "epoch": 0.44070225725546397, "grad_norm": 2.691781997680664, "learning_rate": 6.215247826952425e-06, "loss": 0.8626, "step": 7380 }, { "epoch": 0.44076197300847964, "grad_norm": 2.063382148742676, "learning_rate": 6.214584300975384e-06, "loss": 0.8729, "step": 7381 }, { "epoch": 0.4408216887614953, "grad_norm": 1.5082978010177612, "learning_rate": 6.213920774998342e-06, "loss": 0.8797, "step": 7382 }, { "epoch": 0.4408814045145109, "grad_norm": 1.8918180465698242, "learning_rate": 6.213257249021299e-06, "loss": 0.8709, "step": 7383 }, { "epoch": 0.4409411202675266, "grad_norm": 2.8795526027679443, "learning_rate": 6.212593723044258e-06, "loss": 0.8435, "step": 7384 }, { "epoch": 0.44100083602054224, "grad_norm": 3.2606585025787354, "learning_rate": 6.211930197067215e-06, "loss": 0.9017, "step": 7385 }, { "epoch": 0.44106055177355785, "grad_norm": 2.28719162940979, "learning_rate": 6.2112666710901735e-06, "loss": 0.8726, "step": 7386 }, { "epoch": 0.4411202675265735, "grad_norm": 1.90237557888031, "learning_rate": 6.2106031451131324e-06, "loss": 0.8641, "step": 7387 }, { "epoch": 0.4411799832795892, "grad_norm": 1.8989533185958862, "learning_rate": 6.20993961913609e-06, "loss": 0.8255, "step": 7388 }, { "epoch": 0.4412396990326048, "grad_norm": 2.3019657135009766, "learning_rate": 6.209276093159048e-06, "loss": 0.8651, "step": 7389 }, { "epoch": 0.44129941478562046, "grad_norm": 2.356813430786133, "learning_rate": 6.208612567182005e-06, "loss": 0.8761, "step": 7390 }, { "epoch": 0.44135913053863607, "grad_norm": 3.8153531551361084, "learning_rate": 6.207949041204964e-06, "loss": 0.875, "step": 7391 }, { "epoch": 0.44141884629165173, "grad_norm": 2.5536649227142334, "learning_rate": 6.207285515227922e-06, "loss": 0.8788, "step": 7392 }, { "epoch": 0.4414785620446674, "grad_norm": 2.274484157562256, "learning_rate": 6.206621989250879e-06, "loss": 0.8602, "step": 7393 }, { "epoch": 0.441538277797683, "grad_norm": 3.5427730083465576, "learning_rate": 6.205958463273838e-06, "loss": 0.8765, "step": 7394 }, { "epoch": 0.4415979935506987, "grad_norm": 2.8100202083587646, "learning_rate": 6.2052949372967955e-06, "loss": 0.8762, "step": 7395 }, { "epoch": 0.44165770930371434, "grad_norm": 5.814951419830322, "learning_rate": 6.204631411319754e-06, "loss": 0.8955, "step": 7396 }, { "epoch": 0.44171742505672995, "grad_norm": 1.8758800029754639, "learning_rate": 6.203967885342712e-06, "loss": 0.8334, "step": 7397 }, { "epoch": 0.4417771408097456, "grad_norm": 5.189061164855957, "learning_rate": 6.20330435936567e-06, "loss": 0.8975, "step": 7398 }, { "epoch": 0.4418368565627613, "grad_norm": 9.949007034301758, "learning_rate": 6.202640833388628e-06, "loss": 0.8763, "step": 7399 }, { "epoch": 0.4418965723157769, "grad_norm": 1.9861406087875366, "learning_rate": 6.201977307411585e-06, "loss": 0.8738, "step": 7400 }, { "epoch": 0.4418965723157769, "eval_text_loss": 0.922359824180603, "eval_text_runtime": 15.1902, "eval_text_samples_per_second": 263.327, "eval_text_steps_per_second": 0.527, "step": 7400 }, { "epoch": 0.4418965723157769, "eval_image_loss": 0.6397277116775513, "eval_image_runtime": 5.0256, "eval_image_samples_per_second": 795.917, "eval_image_steps_per_second": 1.592, "step": 7400 }, { "epoch": 0.4418965723157769, "eval_video_loss": 1.0858876705169678, "eval_video_runtime": 76.4278, "eval_video_samples_per_second": 52.337, "eval_video_steps_per_second": 0.105, "step": 7400 }, { "epoch": 0.44195628806879256, "grad_norm": 2.309213638305664, "learning_rate": 6.201313781434543e-06, "loss": 0.8725, "step": 7401 }, { "epoch": 0.44201600382180817, "grad_norm": 2.375865936279297, "learning_rate": 6.200650255457502e-06, "loss": 0.8593, "step": 7402 }, { "epoch": 0.44207571957482383, "grad_norm": 3.231814384460449, "learning_rate": 6.1999867294804595e-06, "loss": 0.8735, "step": 7403 }, { "epoch": 0.4421354353278395, "grad_norm": 2.9776079654693604, "learning_rate": 6.199323203503418e-06, "loss": 0.8659, "step": 7404 }, { "epoch": 0.4421951510808551, "grad_norm": 2.119438409805298, "learning_rate": 6.198659677526375e-06, "loss": 0.9059, "step": 7405 }, { "epoch": 0.4422548668338708, "grad_norm": 2.11309814453125, "learning_rate": 6.197996151549334e-06, "loss": 0.8685, "step": 7406 }, { "epoch": 0.44231458258688644, "grad_norm": 1.9137617349624634, "learning_rate": 6.197332625572292e-06, "loss": 0.8796, "step": 7407 }, { "epoch": 0.44237429833990205, "grad_norm": 1.9936531782150269, "learning_rate": 6.196669099595249e-06, "loss": 0.8834, "step": 7408 }, { "epoch": 0.4424340140929177, "grad_norm": 2.3358962535858154, "learning_rate": 6.196005573618208e-06, "loss": 0.8775, "step": 7409 }, { "epoch": 0.4424937298459334, "grad_norm": 2.050128221511841, "learning_rate": 6.195342047641165e-06, "loss": 0.8379, "step": 7410 }, { "epoch": 0.442553445598949, "grad_norm": 7.161962509155273, "learning_rate": 6.1946785216641235e-06, "loss": 0.8397, "step": 7411 }, { "epoch": 0.44261316135196466, "grad_norm": 2.7572243213653564, "learning_rate": 6.194014995687082e-06, "loss": 0.8635, "step": 7412 }, { "epoch": 0.44267287710498027, "grad_norm": 2.487877607345581, "learning_rate": 6.19335146971004e-06, "loss": 0.8888, "step": 7413 }, { "epoch": 0.44273259285799593, "grad_norm": 2.7524547576904297, "learning_rate": 6.192687943732998e-06, "loss": 0.8991, "step": 7414 }, { "epoch": 0.4427923086110116, "grad_norm": 1.887420654296875, "learning_rate": 6.192024417755955e-06, "loss": 0.8349, "step": 7415 }, { "epoch": 0.4428520243640272, "grad_norm": 2.1126577854156494, "learning_rate": 6.191360891778914e-06, "loss": 0.8823, "step": 7416 }, { "epoch": 0.4429117401170429, "grad_norm": 2.6657261848449707, "learning_rate": 6.190697365801872e-06, "loss": 0.876, "step": 7417 }, { "epoch": 0.44297145587005854, "grad_norm": 3.217233657836914, "learning_rate": 6.190033839824829e-06, "loss": 0.9204, "step": 7418 }, { "epoch": 0.44303117162307415, "grad_norm": 2.4694559574127197, "learning_rate": 6.189370313847788e-06, "loss": 0.8997, "step": 7419 }, { "epoch": 0.4430908873760898, "grad_norm": 2.910538673400879, "learning_rate": 6.1887067878707455e-06, "loss": 0.8411, "step": 7420 }, { "epoch": 0.4431506031291055, "grad_norm": 2.476982593536377, "learning_rate": 6.188043261893704e-06, "loss": 0.8824, "step": 7421 }, { "epoch": 0.4432103188821211, "grad_norm": 1.8448339700698853, "learning_rate": 6.187379735916662e-06, "loss": 0.8828, "step": 7422 }, { "epoch": 0.44327003463513676, "grad_norm": 2.1469926834106445, "learning_rate": 6.18671620993962e-06, "loss": 0.8899, "step": 7423 }, { "epoch": 0.4433297503881524, "grad_norm": 2.103447198867798, "learning_rate": 6.186052683962578e-06, "loss": 0.8573, "step": 7424 }, { "epoch": 0.44338946614116803, "grad_norm": 2.9608747959136963, "learning_rate": 6.185389157985535e-06, "loss": 0.8292, "step": 7425 }, { "epoch": 0.4434491818941837, "grad_norm": 1.9227564334869385, "learning_rate": 6.184725632008493e-06, "loss": 0.8589, "step": 7426 }, { "epoch": 0.4435088976471993, "grad_norm": 1.9260613918304443, "learning_rate": 6.184062106031452e-06, "loss": 0.8513, "step": 7427 }, { "epoch": 0.443568613400215, "grad_norm": 1.5872772932052612, "learning_rate": 6.1833985800544095e-06, "loss": 0.9303, "step": 7428 }, { "epoch": 0.44362832915323064, "grad_norm": 2.3097705841064453, "learning_rate": 6.1827350540773676e-06, "loss": 0.8581, "step": 7429 }, { "epoch": 0.44368804490624625, "grad_norm": 1.771742582321167, "learning_rate": 6.182071528100325e-06, "loss": 0.8966, "step": 7430 }, { "epoch": 0.4437477606592619, "grad_norm": 2.21323561668396, "learning_rate": 6.181408002123284e-06, "loss": 0.8416, "step": 7431 }, { "epoch": 0.4438074764122776, "grad_norm": 2.029975175857544, "learning_rate": 6.180744476146242e-06, "loss": 0.8714, "step": 7432 }, { "epoch": 0.4438671921652932, "grad_norm": 2.7903201580047607, "learning_rate": 6.180080950169199e-06, "loss": 0.8654, "step": 7433 }, { "epoch": 0.44392690791830886, "grad_norm": 2.337644338607788, "learning_rate": 6.179417424192158e-06, "loss": 0.8758, "step": 7434 }, { "epoch": 0.4439866236713245, "grad_norm": 2.5375537872314453, "learning_rate": 6.178753898215115e-06, "loss": 0.8828, "step": 7435 }, { "epoch": 0.44404633942434013, "grad_norm": 2.201573610305786, "learning_rate": 6.1780903722380734e-06, "loss": 0.8589, "step": 7436 }, { "epoch": 0.4441060551773558, "grad_norm": 1.982338547706604, "learning_rate": 6.177426846261032e-06, "loss": 0.8404, "step": 7437 }, { "epoch": 0.4441657709303714, "grad_norm": 2.6957883834838867, "learning_rate": 6.17676332028399e-06, "loss": 0.8991, "step": 7438 }, { "epoch": 0.44422548668338707, "grad_norm": 2.8955841064453125, "learning_rate": 6.176099794306948e-06, "loss": 0.8583, "step": 7439 }, { "epoch": 0.44428520243640274, "grad_norm": 2.0140280723571777, "learning_rate": 6.175436268329905e-06, "loss": 0.8929, "step": 7440 }, { "epoch": 0.44434491818941835, "grad_norm": 2.173698663711548, "learning_rate": 6.174772742352864e-06, "loss": 0.8845, "step": 7441 }, { "epoch": 0.444404633942434, "grad_norm": 2.1821582317352295, "learning_rate": 6.174109216375822e-06, "loss": 0.8837, "step": 7442 }, { "epoch": 0.4444643496954497, "grad_norm": 2.2416365146636963, "learning_rate": 6.173445690398779e-06, "loss": 0.8906, "step": 7443 }, { "epoch": 0.4445240654484653, "grad_norm": 1.6908146142959595, "learning_rate": 6.172782164421738e-06, "loss": 0.8642, "step": 7444 }, { "epoch": 0.44458378120148095, "grad_norm": 3.464730978012085, "learning_rate": 6.1721186384446955e-06, "loss": 0.8747, "step": 7445 }, { "epoch": 0.4446434969544966, "grad_norm": 2.9306488037109375, "learning_rate": 6.171455112467654e-06, "loss": 0.8701, "step": 7446 }, { "epoch": 0.44470321270751223, "grad_norm": 2.4094948768615723, "learning_rate": 6.170791586490612e-06, "loss": 0.8826, "step": 7447 }, { "epoch": 0.4447629284605279, "grad_norm": 2.4248852729797363, "learning_rate": 6.17012806051357e-06, "loss": 0.9148, "step": 7448 }, { "epoch": 0.4448226442135435, "grad_norm": 1.7614039182662964, "learning_rate": 6.169464534536528e-06, "loss": 0.8885, "step": 7449 }, { "epoch": 0.44488235996655917, "grad_norm": 4.121366500854492, "learning_rate": 6.168801008559485e-06, "loss": 0.8801, "step": 7450 }, { "epoch": 0.44494207571957484, "grad_norm": 2.971468448638916, "learning_rate": 6.168137482582443e-06, "loss": 0.885, "step": 7451 }, { "epoch": 0.44500179147259045, "grad_norm": 2.741344928741455, "learning_rate": 6.167473956605402e-06, "loss": 0.8525, "step": 7452 }, { "epoch": 0.4450615072256061, "grad_norm": 2.8751730918884277, "learning_rate": 6.1668104306283594e-06, "loss": 0.8633, "step": 7453 }, { "epoch": 0.4451212229786218, "grad_norm": 2.405378818511963, "learning_rate": 6.1661469046513175e-06, "loss": 0.8432, "step": 7454 }, { "epoch": 0.4451809387316374, "grad_norm": 2.2730586528778076, "learning_rate": 6.165483378674275e-06, "loss": 0.8537, "step": 7455 }, { "epoch": 0.44524065448465305, "grad_norm": 5.256786346435547, "learning_rate": 6.164819852697234e-06, "loss": 0.8533, "step": 7456 }, { "epoch": 0.4453003702376687, "grad_norm": 2.22236967086792, "learning_rate": 6.164156326720192e-06, "loss": 0.8885, "step": 7457 }, { "epoch": 0.44536008599068433, "grad_norm": 2.1972315311431885, "learning_rate": 6.163492800743149e-06, "loss": 0.8678, "step": 7458 }, { "epoch": 0.4454198017437, "grad_norm": 2.6969103813171387, "learning_rate": 6.162829274766108e-06, "loss": 0.8923, "step": 7459 }, { "epoch": 0.44547951749671566, "grad_norm": 2.1350183486938477, "learning_rate": 6.162165748789065e-06, "loss": 0.8848, "step": 7460 }, { "epoch": 0.44553923324973127, "grad_norm": 2.3471217155456543, "learning_rate": 6.161502222812023e-06, "loss": 0.8478, "step": 7461 }, { "epoch": 0.44559894900274694, "grad_norm": 3.1574621200561523, "learning_rate": 6.160838696834982e-06, "loss": 0.8335, "step": 7462 }, { "epoch": 0.44565866475576255, "grad_norm": 2.1039092540740967, "learning_rate": 6.16017517085794e-06, "loss": 0.8622, "step": 7463 }, { "epoch": 0.4457183805087782, "grad_norm": 2.254565477371216, "learning_rate": 6.159511644880898e-06, "loss": 0.8931, "step": 7464 }, { "epoch": 0.4457780962617939, "grad_norm": 1.7745378017425537, "learning_rate": 6.158848118903855e-06, "loss": 0.87, "step": 7465 }, { "epoch": 0.4458378120148095, "grad_norm": 3.4726691246032715, "learning_rate": 6.158184592926814e-06, "loss": 0.8958, "step": 7466 }, { "epoch": 0.44589752776782515, "grad_norm": 1.939192295074463, "learning_rate": 6.157521066949772e-06, "loss": 0.8729, "step": 7467 }, { "epoch": 0.4459572435208408, "grad_norm": 1.8635917901992798, "learning_rate": 6.156857540972729e-06, "loss": 0.8895, "step": 7468 }, { "epoch": 0.44601695927385643, "grad_norm": 6.932642936706543, "learning_rate": 6.156194014995688e-06, "loss": 0.8569, "step": 7469 }, { "epoch": 0.4460766750268721, "grad_norm": 2.0826125144958496, "learning_rate": 6.1555304890186455e-06, "loss": 0.8577, "step": 7470 }, { "epoch": 0.44613639077988776, "grad_norm": 2.8473432064056396, "learning_rate": 6.1548669630416036e-06, "loss": 0.8572, "step": 7471 }, { "epoch": 0.44619610653290337, "grad_norm": 3.918567419052124, "learning_rate": 6.154203437064562e-06, "loss": 0.8549, "step": 7472 }, { "epoch": 0.44625582228591903, "grad_norm": 2.100893259048462, "learning_rate": 6.15353991108752e-06, "loss": 0.8749, "step": 7473 }, { "epoch": 0.44631553803893464, "grad_norm": 1.8229864835739136, "learning_rate": 6.152876385110478e-06, "loss": 0.8059, "step": 7474 }, { "epoch": 0.4463752537919503, "grad_norm": 2.6846659183502197, "learning_rate": 6.152212859133435e-06, "loss": 0.9241, "step": 7475 }, { "epoch": 0.446434969544966, "grad_norm": 4.73315954208374, "learning_rate": 6.151549333156393e-06, "loss": 0.8973, "step": 7476 }, { "epoch": 0.4464946852979816, "grad_norm": 3.368108034133911, "learning_rate": 6.150885807179352e-06, "loss": 0.8576, "step": 7477 }, { "epoch": 0.44655440105099725, "grad_norm": 2.0101869106292725, "learning_rate": 6.150222281202309e-06, "loss": 0.835, "step": 7478 }, { "epoch": 0.4466141168040129, "grad_norm": 2.028043270111084, "learning_rate": 6.1495587552252675e-06, "loss": 0.8582, "step": 7479 }, { "epoch": 0.4466738325570285, "grad_norm": 3.794536590576172, "learning_rate": 6.148895229248225e-06, "loss": 0.8844, "step": 7480 }, { "epoch": 0.4467335483100442, "grad_norm": 1.6455591917037964, "learning_rate": 6.148231703271184e-06, "loss": 0.8522, "step": 7481 }, { "epoch": 0.44679326406305986, "grad_norm": 2.4778804779052734, "learning_rate": 6.147568177294142e-06, "loss": 0.8852, "step": 7482 }, { "epoch": 0.44685297981607547, "grad_norm": 3.645054340362549, "learning_rate": 6.146904651317099e-06, "loss": 0.8595, "step": 7483 }, { "epoch": 0.44691269556909113, "grad_norm": 1.69173264503479, "learning_rate": 6.146241125340058e-06, "loss": 0.8812, "step": 7484 }, { "epoch": 0.44697241132210674, "grad_norm": 3.30808687210083, "learning_rate": 6.145577599363015e-06, "loss": 0.8806, "step": 7485 }, { "epoch": 0.4470321270751224, "grad_norm": 3.269578456878662, "learning_rate": 6.144914073385973e-06, "loss": 0.8591, "step": 7486 }, { "epoch": 0.4470918428281381, "grad_norm": 2.505519390106201, "learning_rate": 6.144250547408932e-06, "loss": 0.8727, "step": 7487 }, { "epoch": 0.4471515585811537, "grad_norm": 3.2320339679718018, "learning_rate": 6.1435870214318896e-06, "loss": 0.8823, "step": 7488 }, { "epoch": 0.44721127433416935, "grad_norm": 1.6873736381530762, "learning_rate": 6.142923495454848e-06, "loss": 0.8729, "step": 7489 }, { "epoch": 0.447270990087185, "grad_norm": 2.040938138961792, "learning_rate": 6.142259969477805e-06, "loss": 0.8297, "step": 7490 }, { "epoch": 0.4473307058402006, "grad_norm": 2.6178669929504395, "learning_rate": 6.141596443500764e-06, "loss": 0.8786, "step": 7491 }, { "epoch": 0.4473904215932163, "grad_norm": 2.6198530197143555, "learning_rate": 6.140932917523722e-06, "loss": 0.8351, "step": 7492 }, { "epoch": 0.44745013734623196, "grad_norm": 2.573093891143799, "learning_rate": 6.140269391546679e-06, "loss": 0.8969, "step": 7493 }, { "epoch": 0.44750985309924757, "grad_norm": 3.291623830795288, "learning_rate": 6.139605865569638e-06, "loss": 0.8602, "step": 7494 }, { "epoch": 0.44756956885226323, "grad_norm": 2.517108917236328, "learning_rate": 6.1389423395925954e-06, "loss": 0.8764, "step": 7495 }, { "epoch": 0.4476292846052789, "grad_norm": 1.8596855401992798, "learning_rate": 6.1382788136155535e-06, "loss": 0.8883, "step": 7496 }, { "epoch": 0.4476890003582945, "grad_norm": 3.0944478511810303, "learning_rate": 6.137615287638512e-06, "loss": 0.8711, "step": 7497 }, { "epoch": 0.4477487161113102, "grad_norm": 2.305936813354492, "learning_rate": 6.13695176166147e-06, "loss": 0.8808, "step": 7498 }, { "epoch": 0.4478084318643258, "grad_norm": 4.520614147186279, "learning_rate": 6.136288235684428e-06, "loss": 0.862, "step": 7499 }, { "epoch": 0.44786814761734145, "grad_norm": 2.784806251525879, "learning_rate": 6.135624709707385e-06, "loss": 0.8521, "step": 7500 }, { "epoch": 0.4479278633703571, "grad_norm": 2.484251022338867, "learning_rate": 6.134961183730343e-06, "loss": 0.8884, "step": 7501 }, { "epoch": 0.4479875791233727, "grad_norm": 1.669439435005188, "learning_rate": 6.134297657753302e-06, "loss": 0.8839, "step": 7502 }, { "epoch": 0.4480472948763884, "grad_norm": 2.254732370376587, "learning_rate": 6.133634131776259e-06, "loss": 0.8391, "step": 7503 }, { "epoch": 0.44810701062940406, "grad_norm": 1.7394651174545288, "learning_rate": 6.1329706057992175e-06, "loss": 0.8781, "step": 7504 }, { "epoch": 0.44816672638241967, "grad_norm": 2.2260119915008545, "learning_rate": 6.132307079822175e-06, "loss": 0.8544, "step": 7505 }, { "epoch": 0.44822644213543533, "grad_norm": 1.8643723726272583, "learning_rate": 6.131643553845134e-06, "loss": 0.8818, "step": 7506 }, { "epoch": 0.448286157888451, "grad_norm": 2.1082828044891357, "learning_rate": 6.130980027868092e-06, "loss": 0.837, "step": 7507 }, { "epoch": 0.4483458736414666, "grad_norm": 2.3583202362060547, "learning_rate": 6.130316501891049e-06, "loss": 0.8703, "step": 7508 }, { "epoch": 0.4484055893944823, "grad_norm": 2.354224681854248, "learning_rate": 6.129652975914008e-06, "loss": 0.8584, "step": 7509 }, { "epoch": 0.4484653051474979, "grad_norm": 3.534569025039673, "learning_rate": 6.128989449936965e-06, "loss": 0.9377, "step": 7510 }, { "epoch": 0.44852502090051355, "grad_norm": 2.567786455154419, "learning_rate": 6.128325923959923e-06, "loss": 0.8953, "step": 7511 }, { "epoch": 0.4485847366535292, "grad_norm": 2.1612229347229004, "learning_rate": 6.127662397982882e-06, "loss": 0.869, "step": 7512 }, { "epoch": 0.4486444524065448, "grad_norm": 1.722033977508545, "learning_rate": 6.1269988720058395e-06, "loss": 0.8138, "step": 7513 }, { "epoch": 0.4487041681595605, "grad_norm": 2.140798330307007, "learning_rate": 6.126335346028798e-06, "loss": 0.8667, "step": 7514 }, { "epoch": 0.44876388391257616, "grad_norm": 1.9811638593673706, "learning_rate": 6.125671820051755e-06, "loss": 0.8641, "step": 7515 }, { "epoch": 0.44882359966559177, "grad_norm": 3.4082679748535156, "learning_rate": 6.125008294074714e-06, "loss": 0.8549, "step": 7516 }, { "epoch": 0.44888331541860743, "grad_norm": 2.3486831188201904, "learning_rate": 6.124344768097672e-06, "loss": 0.9116, "step": 7517 }, { "epoch": 0.4489430311716231, "grad_norm": 2.0042059421539307, "learning_rate": 6.123681242120629e-06, "loss": 0.8856, "step": 7518 }, { "epoch": 0.4490027469246387, "grad_norm": 1.867626667022705, "learning_rate": 6.123017716143588e-06, "loss": 0.8695, "step": 7519 }, { "epoch": 0.4490624626776544, "grad_norm": 2.4342575073242188, "learning_rate": 6.122354190166545e-06, "loss": 0.837, "step": 7520 }, { "epoch": 0.44912217843067004, "grad_norm": 1.9898200035095215, "learning_rate": 6.1216906641895035e-06, "loss": 0.8613, "step": 7521 }, { "epoch": 0.44918189418368565, "grad_norm": 2.6146974563598633, "learning_rate": 6.121027138212462e-06, "loss": 0.8968, "step": 7522 }, { "epoch": 0.4492416099367013, "grad_norm": 1.652020812034607, "learning_rate": 6.12036361223542e-06, "loss": 0.8382, "step": 7523 }, { "epoch": 0.4493013256897169, "grad_norm": 2.0574371814727783, "learning_rate": 6.119700086258378e-06, "loss": 0.8927, "step": 7524 }, { "epoch": 0.4493610414427326, "grad_norm": 3.7501132488250732, "learning_rate": 6.119036560281335e-06, "loss": 0.8445, "step": 7525 }, { "epoch": 0.44942075719574825, "grad_norm": 2.4111714363098145, "learning_rate": 6.118373034304293e-06, "loss": 0.8772, "step": 7526 }, { "epoch": 0.44948047294876387, "grad_norm": 2.2284340858459473, "learning_rate": 6.117709508327252e-06, "loss": 0.8611, "step": 7527 }, { "epoch": 0.44954018870177953, "grad_norm": 2.30730938911438, "learning_rate": 6.117045982350209e-06, "loss": 0.8818, "step": 7528 }, { "epoch": 0.4495999044547952, "grad_norm": 2.6859960556030273, "learning_rate": 6.1163824563731675e-06, "loss": 0.9208, "step": 7529 }, { "epoch": 0.4496596202078108, "grad_norm": 3.0109169483184814, "learning_rate": 6.115718930396125e-06, "loss": 0.8881, "step": 7530 }, { "epoch": 0.44971933596082647, "grad_norm": 2.0177664756774902, "learning_rate": 6.115055404419084e-06, "loss": 0.8868, "step": 7531 }, { "epoch": 0.44977905171384214, "grad_norm": 4.461753845214844, "learning_rate": 6.114391878442042e-06, "loss": 0.8613, "step": 7532 }, { "epoch": 0.44983876746685775, "grad_norm": 1.7812403440475464, "learning_rate": 6.113728352464999e-06, "loss": 0.9044, "step": 7533 }, { "epoch": 0.4498984832198734, "grad_norm": 2.466763734817505, "learning_rate": 6.113064826487958e-06, "loss": 0.8609, "step": 7534 }, { "epoch": 0.449958198972889, "grad_norm": 2.793163537979126, "learning_rate": 6.112401300510915e-06, "loss": 0.8457, "step": 7535 }, { "epoch": 0.4500179147259047, "grad_norm": 3.524378538131714, "learning_rate": 6.111737774533873e-06, "loss": 0.8923, "step": 7536 }, { "epoch": 0.45007763047892035, "grad_norm": 1.7315858602523804, "learning_rate": 6.111074248556832e-06, "loss": 0.8223, "step": 7537 }, { "epoch": 0.45013734623193596, "grad_norm": 1.985500693321228, "learning_rate": 6.1104107225797895e-06, "loss": 0.8937, "step": 7538 }, { "epoch": 0.45019706198495163, "grad_norm": 2.416494369506836, "learning_rate": 6.109747196602748e-06, "loss": 0.888, "step": 7539 }, { "epoch": 0.4502567777379673, "grad_norm": 1.8814730644226074, "learning_rate": 6.109083670625705e-06, "loss": 0.8732, "step": 7540 }, { "epoch": 0.4503164934909829, "grad_norm": 2.051778793334961, "learning_rate": 6.108420144648664e-06, "loss": 0.8633, "step": 7541 }, { "epoch": 0.45037620924399857, "grad_norm": 2.2897162437438965, "learning_rate": 6.107756618671622e-06, "loss": 0.8481, "step": 7542 }, { "epoch": 0.45043592499701424, "grad_norm": 1.8818426132202148, "learning_rate": 6.107093092694579e-06, "loss": 0.8504, "step": 7543 }, { "epoch": 0.45049564075002985, "grad_norm": 2.570969581604004, "learning_rate": 6.106429566717538e-06, "loss": 0.8865, "step": 7544 }, { "epoch": 0.4505553565030455, "grad_norm": 2.254542589187622, "learning_rate": 6.105766040740495e-06, "loss": 0.8316, "step": 7545 }, { "epoch": 0.4506150722560611, "grad_norm": 2.575713634490967, "learning_rate": 6.1051025147634535e-06, "loss": 0.8972, "step": 7546 }, { "epoch": 0.4506747880090768, "grad_norm": 2.4452626705169678, "learning_rate": 6.1044389887864116e-06, "loss": 0.903, "step": 7547 }, { "epoch": 0.45073450376209245, "grad_norm": 2.6401801109313965, "learning_rate": 6.10377546280937e-06, "loss": 0.8774, "step": 7548 }, { "epoch": 0.45079421951510806, "grad_norm": 3.4144959449768066, "learning_rate": 6.103111936832328e-06, "loss": 0.8587, "step": 7549 }, { "epoch": 0.45085393526812373, "grad_norm": 2.6958436965942383, "learning_rate": 6.102448410855285e-06, "loss": 0.8545, "step": 7550 }, { "epoch": 0.4509136510211394, "grad_norm": 1.7017236948013306, "learning_rate": 6.101784884878243e-06, "loss": 0.8523, "step": 7551 }, { "epoch": 0.450973366774155, "grad_norm": 6.236533164978027, "learning_rate": 6.101121358901202e-06, "loss": 0.831, "step": 7552 }, { "epoch": 0.45103308252717067, "grad_norm": 2.5359528064727783, "learning_rate": 6.100457832924159e-06, "loss": 0.8654, "step": 7553 }, { "epoch": 0.45109279828018634, "grad_norm": 2.0584638118743896, "learning_rate": 6.0997943069471174e-06, "loss": 0.8653, "step": 7554 }, { "epoch": 0.45115251403320195, "grad_norm": 2.646784543991089, "learning_rate": 6.099130780970075e-06, "loss": 0.8904, "step": 7555 }, { "epoch": 0.4512122297862176, "grad_norm": 1.9221711158752441, "learning_rate": 6.098467254993034e-06, "loss": 0.8707, "step": 7556 }, { "epoch": 0.4512719455392333, "grad_norm": 1.9480873346328735, "learning_rate": 6.097803729015992e-06, "loss": 0.8677, "step": 7557 }, { "epoch": 0.4513316612922489, "grad_norm": 2.726731061935425, "learning_rate": 6.097140203038949e-06, "loss": 0.892, "step": 7558 }, { "epoch": 0.45139137704526455, "grad_norm": 2.135385274887085, "learning_rate": 6.096476677061908e-06, "loss": 0.8794, "step": 7559 }, { "epoch": 0.45145109279828016, "grad_norm": 2.136807680130005, "learning_rate": 6.095813151084865e-06, "loss": 0.8574, "step": 7560 }, { "epoch": 0.45151080855129583, "grad_norm": 4.308504581451416, "learning_rate": 6.095149625107823e-06, "loss": 0.8708, "step": 7561 }, { "epoch": 0.4515705243043115, "grad_norm": 3.2837002277374268, "learning_rate": 6.094486099130782e-06, "loss": 0.8861, "step": 7562 }, { "epoch": 0.4516302400573271, "grad_norm": 1.91788911819458, "learning_rate": 6.0938225731537395e-06, "loss": 0.8762, "step": 7563 }, { "epoch": 0.45168995581034277, "grad_norm": 1.8266561031341553, "learning_rate": 6.093159047176698e-06, "loss": 0.858, "step": 7564 }, { "epoch": 0.45174967156335843, "grad_norm": 2.1135661602020264, "learning_rate": 6.092495521199655e-06, "loss": 0.8734, "step": 7565 }, { "epoch": 0.45180938731637404, "grad_norm": 1.924076795578003, "learning_rate": 6.091831995222614e-06, "loss": 0.8608, "step": 7566 }, { "epoch": 0.4518691030693897, "grad_norm": 2.181657314300537, "learning_rate": 6.091168469245572e-06, "loss": 0.9193, "step": 7567 }, { "epoch": 0.4519288188224054, "grad_norm": 2.646217107772827, "learning_rate": 6.090504943268529e-06, "loss": 0.8749, "step": 7568 }, { "epoch": 0.451988534575421, "grad_norm": 1.9250658750534058, "learning_rate": 6.089841417291488e-06, "loss": 0.8589, "step": 7569 }, { "epoch": 0.45204825032843665, "grad_norm": 3.4747936725616455, "learning_rate": 6.089177891314445e-06, "loss": 0.8957, "step": 7570 }, { "epoch": 0.45210796608145226, "grad_norm": 2.292757987976074, "learning_rate": 6.0885143653374034e-06, "loss": 0.8879, "step": 7571 }, { "epoch": 0.4521676818344679, "grad_norm": 4.196349620819092, "learning_rate": 6.0878508393603615e-06, "loss": 0.8533, "step": 7572 }, { "epoch": 0.4522273975874836, "grad_norm": 3.6462924480438232, "learning_rate": 6.08718731338332e-06, "loss": 0.8656, "step": 7573 }, { "epoch": 0.4522871133404992, "grad_norm": 2.850078582763672, "learning_rate": 6.086523787406278e-06, "loss": 0.8677, "step": 7574 }, { "epoch": 0.45234682909351487, "grad_norm": 2.298779249191284, "learning_rate": 6.085860261429235e-06, "loss": 0.8844, "step": 7575 }, { "epoch": 0.45240654484653053, "grad_norm": 2.2382850646972656, "learning_rate": 6.085196735452193e-06, "loss": 0.8567, "step": 7576 }, { "epoch": 0.45246626059954614, "grad_norm": 1.8945235013961792, "learning_rate": 6.084533209475152e-06, "loss": 0.8505, "step": 7577 }, { "epoch": 0.4525259763525618, "grad_norm": 1.6862807273864746, "learning_rate": 6.083869683498109e-06, "loss": 0.8895, "step": 7578 }, { "epoch": 0.4525856921055775, "grad_norm": 2.4031622409820557, "learning_rate": 6.083206157521067e-06, "loss": 0.8897, "step": 7579 }, { "epoch": 0.4526454078585931, "grad_norm": 5.771906852722168, "learning_rate": 6.082542631544025e-06, "loss": 0.8835, "step": 7580 }, { "epoch": 0.45270512361160875, "grad_norm": 2.083159923553467, "learning_rate": 6.081879105566984e-06, "loss": 0.867, "step": 7581 }, { "epoch": 0.45276483936462436, "grad_norm": 3.704641342163086, "learning_rate": 6.081215579589942e-06, "loss": 0.881, "step": 7582 }, { "epoch": 0.45282455511764, "grad_norm": 2.067140817642212, "learning_rate": 6.080552053612899e-06, "loss": 0.8726, "step": 7583 }, { "epoch": 0.4528842708706557, "grad_norm": 1.6864057779312134, "learning_rate": 6.079888527635858e-06, "loss": 0.9059, "step": 7584 }, { "epoch": 0.4529439866236713, "grad_norm": 3.58808970451355, "learning_rate": 6.079225001658815e-06, "loss": 0.8854, "step": 7585 }, { "epoch": 0.45300370237668697, "grad_norm": 2.8953158855438232, "learning_rate": 6.078561475681773e-06, "loss": 0.8675, "step": 7586 }, { "epoch": 0.45306341812970263, "grad_norm": 2.3625965118408203, "learning_rate": 6.077897949704732e-06, "loss": 0.8399, "step": 7587 }, { "epoch": 0.45312313388271824, "grad_norm": 2.209000825881958, "learning_rate": 6.0772344237276895e-06, "loss": 0.8679, "step": 7588 }, { "epoch": 0.4531828496357339, "grad_norm": 3.393507480621338, "learning_rate": 6.0765708977506476e-06, "loss": 0.8233, "step": 7589 }, { "epoch": 0.4532425653887496, "grad_norm": 2.5197196006774902, "learning_rate": 6.075907371773605e-06, "loss": 0.8641, "step": 7590 }, { "epoch": 0.4533022811417652, "grad_norm": 2.06373929977417, "learning_rate": 6.075243845796564e-06, "loss": 0.8669, "step": 7591 }, { "epoch": 0.45336199689478085, "grad_norm": 2.7083990573883057, "learning_rate": 6.074580319819522e-06, "loss": 0.8693, "step": 7592 }, { "epoch": 0.4534217126477965, "grad_norm": 4.785548686981201, "learning_rate": 6.073916793842479e-06, "loss": 0.8615, "step": 7593 }, { "epoch": 0.4534814284008121, "grad_norm": 2.8506176471710205, "learning_rate": 6.073253267865438e-06, "loss": 0.8739, "step": 7594 }, { "epoch": 0.4535411441538278, "grad_norm": 2.4587113857269287, "learning_rate": 6.072589741888395e-06, "loss": 0.8914, "step": 7595 }, { "epoch": 0.4536008599068434, "grad_norm": 3.351914882659912, "learning_rate": 6.071926215911353e-06, "loss": 0.8768, "step": 7596 }, { "epoch": 0.45366057565985907, "grad_norm": 1.6602783203125, "learning_rate": 6.0712626899343115e-06, "loss": 0.8633, "step": 7597 }, { "epoch": 0.45372029141287473, "grad_norm": 2.169963836669922, "learning_rate": 6.07059916395727e-06, "loss": 0.8562, "step": 7598 }, { "epoch": 0.45378000716589034, "grad_norm": 2.796149969100952, "learning_rate": 6.069935637980228e-06, "loss": 0.8997, "step": 7599 }, { "epoch": 0.453839722918906, "grad_norm": 1.8225486278533936, "learning_rate": 6.069272112003185e-06, "loss": 0.8649, "step": 7600 }, { "epoch": 0.453839722918906, "eval_text_loss": 0.9205514192581177, "eval_text_runtime": 15.2209, "eval_text_samples_per_second": 262.797, "eval_text_steps_per_second": 0.526, "step": 7600 }, { "epoch": 0.453839722918906, "eval_image_loss": 0.6337801814079285, "eval_image_runtime": 4.9931, "eval_image_samples_per_second": 801.103, "eval_image_steps_per_second": 1.602, "step": 7600 }, { "epoch": 0.453839722918906, "eval_video_loss": 1.0818519592285156, "eval_video_runtime": 76.499, "eval_video_samples_per_second": 52.288, "eval_video_steps_per_second": 0.105, "step": 7600 }, { "epoch": 0.4538994386719217, "grad_norm": 3.140958547592163, "learning_rate": 6.068608586026143e-06, "loss": 0.8944, "step": 7601 }, { "epoch": 0.4539591544249373, "grad_norm": 2.6414690017700195, "learning_rate": 6.067945060049102e-06, "loss": 0.8503, "step": 7602 }, { "epoch": 0.45401887017795295, "grad_norm": 4.9548187255859375, "learning_rate": 6.067281534072059e-06, "loss": 0.855, "step": 7603 }, { "epoch": 0.4540785859309686, "grad_norm": 2.054368495941162, "learning_rate": 6.066618008095017e-06, "loss": 0.8541, "step": 7604 }, { "epoch": 0.4541383016839842, "grad_norm": 1.9541560411453247, "learning_rate": 6.065954482117975e-06, "loss": 0.8716, "step": 7605 }, { "epoch": 0.4541980174369999, "grad_norm": 2.5310163497924805, "learning_rate": 6.0652909561409336e-06, "loss": 0.8572, "step": 7606 }, { "epoch": 0.4542577331900155, "grad_norm": 2.036125421524048, "learning_rate": 6.064627430163892e-06, "loss": 0.8651, "step": 7607 }, { "epoch": 0.45431744894303117, "grad_norm": 2.656125545501709, "learning_rate": 6.063963904186849e-06, "loss": 0.8573, "step": 7608 }, { "epoch": 0.45437716469604683, "grad_norm": 2.0279653072357178, "learning_rate": 6.063300378209808e-06, "loss": 0.85, "step": 7609 }, { "epoch": 0.45443688044906244, "grad_norm": 2.697942018508911, "learning_rate": 6.062636852232765e-06, "loss": 0.897, "step": 7610 }, { "epoch": 0.4544965962020781, "grad_norm": 3.307377815246582, "learning_rate": 6.061973326255723e-06, "loss": 0.887, "step": 7611 }, { "epoch": 0.4545563119550938, "grad_norm": 1.9465174674987793, "learning_rate": 6.061309800278682e-06, "loss": 0.8436, "step": 7612 }, { "epoch": 0.4546160277081094, "grad_norm": 2.2504611015319824, "learning_rate": 6.0606462743016394e-06, "loss": 0.8451, "step": 7613 }, { "epoch": 0.45467574346112505, "grad_norm": 4.8043694496154785, "learning_rate": 6.0599827483245975e-06, "loss": 0.8886, "step": 7614 }, { "epoch": 0.4547354592141407, "grad_norm": 2.5590460300445557, "learning_rate": 6.059319222347555e-06, "loss": 0.8482, "step": 7615 }, { "epoch": 0.4547951749671563, "grad_norm": 1.8893553018569946, "learning_rate": 6.058655696370514e-06, "loss": 0.8762, "step": 7616 }, { "epoch": 0.454854890720172, "grad_norm": 3.0265707969665527, "learning_rate": 6.057992170393472e-06, "loss": 0.8351, "step": 7617 }, { "epoch": 0.4549146064731876, "grad_norm": 7.154285430908203, "learning_rate": 6.057328644416429e-06, "loss": 0.8563, "step": 7618 }, { "epoch": 0.45497432222620326, "grad_norm": 1.75090491771698, "learning_rate": 6.056665118439388e-06, "loss": 0.8637, "step": 7619 }, { "epoch": 0.45503403797921893, "grad_norm": 2.5179569721221924, "learning_rate": 6.056001592462345e-06, "loss": 0.8619, "step": 7620 }, { "epoch": 0.45509375373223454, "grad_norm": 3.0509724617004395, "learning_rate": 6.055338066485303e-06, "loss": 0.8487, "step": 7621 }, { "epoch": 0.4551534694852502, "grad_norm": 1.974805474281311, "learning_rate": 6.0546745405082615e-06, "loss": 0.8775, "step": 7622 }, { "epoch": 0.45521318523826587, "grad_norm": 2.8283755779266357, "learning_rate": 6.05401101453122e-06, "loss": 0.8501, "step": 7623 }, { "epoch": 0.4552729009912815, "grad_norm": 3.31282377243042, "learning_rate": 6.053347488554178e-06, "loss": 0.8708, "step": 7624 }, { "epoch": 0.45533261674429715, "grad_norm": 2.7816691398620605, "learning_rate": 6.052683962577135e-06, "loss": 0.8627, "step": 7625 }, { "epoch": 0.4553923324973128, "grad_norm": 2.2455506324768066, "learning_rate": 6.052020436600093e-06, "loss": 0.8726, "step": 7626 }, { "epoch": 0.4554520482503284, "grad_norm": 2.2792115211486816, "learning_rate": 6.051356910623052e-06, "loss": 0.8609, "step": 7627 }, { "epoch": 0.4555117640033441, "grad_norm": 2.371974229812622, "learning_rate": 6.050693384646009e-06, "loss": 0.8926, "step": 7628 }, { "epoch": 0.45557147975635975, "grad_norm": 2.1168720722198486, "learning_rate": 6.050029858668967e-06, "loss": 0.8577, "step": 7629 }, { "epoch": 0.45563119550937536, "grad_norm": 2.3299663066864014, "learning_rate": 6.049366332691925e-06, "loss": 0.8423, "step": 7630 }, { "epoch": 0.45569091126239103, "grad_norm": 2.178417205810547, "learning_rate": 6.0487028067148835e-06, "loss": 0.9112, "step": 7631 }, { "epoch": 0.45575062701540664, "grad_norm": 2.4113903045654297, "learning_rate": 6.048039280737842e-06, "loss": 0.8571, "step": 7632 }, { "epoch": 0.4558103427684223, "grad_norm": 4.585979461669922, "learning_rate": 6.047375754760799e-06, "loss": 0.8715, "step": 7633 }, { "epoch": 0.45587005852143797, "grad_norm": 2.7515616416931152, "learning_rate": 6.046712228783758e-06, "loss": 0.8856, "step": 7634 }, { "epoch": 0.4559297742744536, "grad_norm": 2.001302480697632, "learning_rate": 6.046048702806715e-06, "loss": 0.8483, "step": 7635 }, { "epoch": 0.45598949002746925, "grad_norm": 2.008774518966675, "learning_rate": 6.045385176829673e-06, "loss": 0.876, "step": 7636 }, { "epoch": 0.4560492057804849, "grad_norm": 1.9936184883117676, "learning_rate": 6.044721650852632e-06, "loss": 0.8619, "step": 7637 }, { "epoch": 0.4561089215335005, "grad_norm": 2.487299919128418, "learning_rate": 6.044058124875589e-06, "loss": 0.8596, "step": 7638 }, { "epoch": 0.4561686372865162, "grad_norm": 1.603619933128357, "learning_rate": 6.0433945988985475e-06, "loss": 0.853, "step": 7639 }, { "epoch": 0.45622835303953185, "grad_norm": 2.5090837478637695, "learning_rate": 6.042731072921505e-06, "loss": 0.8609, "step": 7640 }, { "epoch": 0.45628806879254746, "grad_norm": 1.890799880027771, "learning_rate": 6.042067546944464e-06, "loss": 0.8529, "step": 7641 }, { "epoch": 0.45634778454556313, "grad_norm": 1.7195302248001099, "learning_rate": 6.041404020967422e-06, "loss": 0.8502, "step": 7642 }, { "epoch": 0.45640750029857874, "grad_norm": 1.9058115482330322, "learning_rate": 6.040740494990379e-06, "loss": 0.8602, "step": 7643 }, { "epoch": 0.4564672160515944, "grad_norm": 2.5429155826568604, "learning_rate": 6.040076969013338e-06, "loss": 0.9082, "step": 7644 }, { "epoch": 0.45652693180461007, "grad_norm": 1.9323630332946777, "learning_rate": 6.039413443036295e-06, "loss": 0.8698, "step": 7645 }, { "epoch": 0.4565866475576257, "grad_norm": 1.8810241222381592, "learning_rate": 6.038749917059253e-06, "loss": 0.8637, "step": 7646 }, { "epoch": 0.45664636331064135, "grad_norm": 2.525954484939575, "learning_rate": 6.0380863910822115e-06, "loss": 0.8804, "step": 7647 }, { "epoch": 0.456706079063657, "grad_norm": 2.6257104873657227, "learning_rate": 6.0374228651051696e-06, "loss": 0.8667, "step": 7648 }, { "epoch": 0.4567657948166726, "grad_norm": 5.303109169006348, "learning_rate": 6.036759339128128e-06, "loss": 0.8545, "step": 7649 }, { "epoch": 0.4568255105696883, "grad_norm": 2.3300275802612305, "learning_rate": 6.036095813151085e-06, "loss": 0.8674, "step": 7650 }, { "epoch": 0.45688522632270395, "grad_norm": 1.9026862382888794, "learning_rate": 6.035432287174043e-06, "loss": 0.8817, "step": 7651 }, { "epoch": 0.45694494207571956, "grad_norm": 2.3267276287078857, "learning_rate": 6.034768761197002e-06, "loss": 0.8979, "step": 7652 }, { "epoch": 0.45700465782873523, "grad_norm": 2.107490301132202, "learning_rate": 6.034105235219959e-06, "loss": 0.8627, "step": 7653 }, { "epoch": 0.4570643735817509, "grad_norm": 1.8559398651123047, "learning_rate": 6.033441709242917e-06, "loss": 0.8504, "step": 7654 }, { "epoch": 0.4571240893347665, "grad_norm": 2.1353919506073, "learning_rate": 6.0327781832658746e-06, "loss": 0.8483, "step": 7655 }, { "epoch": 0.45718380508778217, "grad_norm": 1.7229630947113037, "learning_rate": 6.0321146572888335e-06, "loss": 0.8534, "step": 7656 }, { "epoch": 0.4572435208407978, "grad_norm": 2.001718759536743, "learning_rate": 6.031451131311792e-06, "loss": 0.8469, "step": 7657 }, { "epoch": 0.45730323659381344, "grad_norm": 1.9625550508499146, "learning_rate": 6.030787605334749e-06, "loss": 0.8903, "step": 7658 }, { "epoch": 0.4573629523468291, "grad_norm": 2.062251329421997, "learning_rate": 6.030124079357708e-06, "loss": 0.8492, "step": 7659 }, { "epoch": 0.4574226680998447, "grad_norm": 2.4886109828948975, "learning_rate": 6.029460553380665e-06, "loss": 0.8614, "step": 7660 }, { "epoch": 0.4574823838528604, "grad_norm": 3.1961989402770996, "learning_rate": 6.028797027403623e-06, "loss": 0.8679, "step": 7661 }, { "epoch": 0.45754209960587605, "grad_norm": 10.371278762817383, "learning_rate": 6.028133501426582e-06, "loss": 0.8594, "step": 7662 }, { "epoch": 0.45760181535889166, "grad_norm": 3.77791690826416, "learning_rate": 6.027469975449539e-06, "loss": 0.8472, "step": 7663 }, { "epoch": 0.4576615311119073, "grad_norm": 3.084934711456299, "learning_rate": 6.0268064494724975e-06, "loss": 0.86, "step": 7664 }, { "epoch": 0.457721246864923, "grad_norm": 3.45723557472229, "learning_rate": 6.026142923495455e-06, "loss": 0.8936, "step": 7665 }, { "epoch": 0.4577809626179386, "grad_norm": 2.0820260047912598, "learning_rate": 6.025479397518414e-06, "loss": 0.8672, "step": 7666 }, { "epoch": 0.45784067837095427, "grad_norm": 2.26114821434021, "learning_rate": 6.024815871541372e-06, "loss": 0.8471, "step": 7667 }, { "epoch": 0.4579003941239699, "grad_norm": 2.3542494773864746, "learning_rate": 6.024152345564329e-06, "loss": 0.877, "step": 7668 }, { "epoch": 0.45796010987698554, "grad_norm": 2.0366883277893066, "learning_rate": 6.023488819587288e-06, "loss": 0.8775, "step": 7669 }, { "epoch": 0.4580198256300012, "grad_norm": 2.199531078338623, "learning_rate": 6.022825293610245e-06, "loss": 0.8687, "step": 7670 }, { "epoch": 0.4580795413830168, "grad_norm": 2.2514190673828125, "learning_rate": 6.022161767633203e-06, "loss": 0.8548, "step": 7671 }, { "epoch": 0.4581392571360325, "grad_norm": 1.9645403623580933, "learning_rate": 6.021498241656161e-06, "loss": 0.8611, "step": 7672 }, { "epoch": 0.45819897288904815, "grad_norm": 2.3648226261138916, "learning_rate": 6.0208347156791195e-06, "loss": 0.8722, "step": 7673 }, { "epoch": 0.45825868864206376, "grad_norm": 1.8261152505874634, "learning_rate": 6.020171189702078e-06, "loss": 0.833, "step": 7674 }, { "epoch": 0.4583184043950794, "grad_norm": 2.956766366958618, "learning_rate": 6.019507663725035e-06, "loss": 0.8704, "step": 7675 }, { "epoch": 0.4583781201480951, "grad_norm": 2.2621469497680664, "learning_rate": 6.018844137747993e-06, "loss": 0.8589, "step": 7676 }, { "epoch": 0.4584378359011107, "grad_norm": 1.6421514749526978, "learning_rate": 6.018180611770952e-06, "loss": 0.8686, "step": 7677 }, { "epoch": 0.45849755165412637, "grad_norm": 4.800352096557617, "learning_rate": 6.017517085793909e-06, "loss": 0.8817, "step": 7678 }, { "epoch": 0.458557267407142, "grad_norm": 2.4696996212005615, "learning_rate": 6.016853559816867e-06, "loss": 0.8379, "step": 7679 }, { "epoch": 0.45861698316015764, "grad_norm": 1.6105706691741943, "learning_rate": 6.0161900338398245e-06, "loss": 0.8685, "step": 7680 }, { "epoch": 0.4586766989131733, "grad_norm": 2.5335450172424316, "learning_rate": 6.0155265078627835e-06, "loss": 0.8432, "step": 7681 }, { "epoch": 0.4587364146661889, "grad_norm": 1.7438900470733643, "learning_rate": 6.014862981885742e-06, "loss": 0.8731, "step": 7682 }, { "epoch": 0.4587961304192046, "grad_norm": 4.050075531005859, "learning_rate": 6.014199455908699e-06, "loss": 0.8517, "step": 7683 }, { "epoch": 0.45885584617222025, "grad_norm": 5.529186248779297, "learning_rate": 6.013535929931658e-06, "loss": 0.8957, "step": 7684 }, { "epoch": 0.45891556192523586, "grad_norm": 3.5244929790496826, "learning_rate": 6.012872403954615e-06, "loss": 0.8424, "step": 7685 }, { "epoch": 0.4589752776782515, "grad_norm": 2.261990785598755, "learning_rate": 6.012208877977573e-06, "loss": 0.8709, "step": 7686 }, { "epoch": 0.4590349934312672, "grad_norm": 2.080869197845459, "learning_rate": 6.011545352000532e-06, "loss": 0.8573, "step": 7687 }, { "epoch": 0.4590947091842828, "grad_norm": 2.129107713699341, "learning_rate": 6.010881826023489e-06, "loss": 0.8567, "step": 7688 }, { "epoch": 0.45915442493729847, "grad_norm": 2.256028175354004, "learning_rate": 6.0102183000464474e-06, "loss": 0.845, "step": 7689 }, { "epoch": 0.45921414069031413, "grad_norm": 2.2411587238311768, "learning_rate": 6.009554774069405e-06, "loss": 0.8791, "step": 7690 }, { "epoch": 0.45927385644332974, "grad_norm": 1.9718016386032104, "learning_rate": 6.008891248092364e-06, "loss": 0.8594, "step": 7691 }, { "epoch": 0.4593335721963454, "grad_norm": 12.754433631896973, "learning_rate": 6.008227722115322e-06, "loss": 0.8927, "step": 7692 }, { "epoch": 0.459393287949361, "grad_norm": 3.7014694213867188, "learning_rate": 6.007564196138279e-06, "loss": 0.9009, "step": 7693 }, { "epoch": 0.4594530037023767, "grad_norm": 1.8368544578552246, "learning_rate": 6.006900670161238e-06, "loss": 0.88, "step": 7694 }, { "epoch": 0.45951271945539235, "grad_norm": 1.709170937538147, "learning_rate": 6.006237144184195e-06, "loss": 0.8505, "step": 7695 }, { "epoch": 0.45957243520840796, "grad_norm": 2.1149158477783203, "learning_rate": 6.005573618207153e-06, "loss": 0.8487, "step": 7696 }, { "epoch": 0.4596321509614236, "grad_norm": 1.602742075920105, "learning_rate": 6.004910092230111e-06, "loss": 0.8544, "step": 7697 }, { "epoch": 0.4596918667144393, "grad_norm": 1.8650983572006226, "learning_rate": 6.0042465662530695e-06, "loss": 0.8465, "step": 7698 }, { "epoch": 0.4597515824674549, "grad_norm": 2.0002827644348145, "learning_rate": 6.003583040276028e-06, "loss": 0.8799, "step": 7699 }, { "epoch": 0.45981129822047057, "grad_norm": 1.6071958541870117, "learning_rate": 6.002919514298985e-06, "loss": 0.8687, "step": 7700 }, { "epoch": 0.45987101397348623, "grad_norm": 2.059541940689087, "learning_rate": 6.002255988321943e-06, "loss": 0.8422, "step": 7701 }, { "epoch": 0.45993072972650184, "grad_norm": 1.8272483348846436, "learning_rate": 6.001592462344902e-06, "loss": 0.8673, "step": 7702 }, { "epoch": 0.4599904454795175, "grad_norm": 2.162996530532837, "learning_rate": 6.000928936367859e-06, "loss": 0.8711, "step": 7703 }, { "epoch": 0.4600501612325331, "grad_norm": 2.6862003803253174, "learning_rate": 6.000265410390817e-06, "loss": 0.8567, "step": 7704 }, { "epoch": 0.4601098769855488, "grad_norm": 1.7970290184020996, "learning_rate": 5.9996018844137745e-06, "loss": 0.8442, "step": 7705 }, { "epoch": 0.46016959273856445, "grad_norm": 3.449141502380371, "learning_rate": 5.9989383584367335e-06, "loss": 0.8431, "step": 7706 }, { "epoch": 0.46022930849158006, "grad_norm": 3.8502233028411865, "learning_rate": 5.9982748324596916e-06, "loss": 0.8914, "step": 7707 }, { "epoch": 0.4602890242445957, "grad_norm": 3.29555082321167, "learning_rate": 5.997611306482649e-06, "loss": 0.8603, "step": 7708 }, { "epoch": 0.4603487399976114, "grad_norm": 2.4968509674072266, "learning_rate": 5.996947780505608e-06, "loss": 0.856, "step": 7709 }, { "epoch": 0.460408455750627, "grad_norm": 1.9985421895980835, "learning_rate": 5.996284254528565e-06, "loss": 0.9095, "step": 7710 }, { "epoch": 0.46046817150364266, "grad_norm": 1.8240796327590942, "learning_rate": 5.995620728551523e-06, "loss": 0.8654, "step": 7711 }, { "epoch": 0.46052788725665833, "grad_norm": 2.4487082958221436, "learning_rate": 5.994957202574482e-06, "loss": 0.8653, "step": 7712 }, { "epoch": 0.46058760300967394, "grad_norm": 3.281864881515503, "learning_rate": 5.994293676597439e-06, "loss": 0.8801, "step": 7713 }, { "epoch": 0.4606473187626896, "grad_norm": 1.7121272087097168, "learning_rate": 5.993630150620397e-06, "loss": 0.8618, "step": 7714 }, { "epoch": 0.4607070345157052, "grad_norm": 1.8124762773513794, "learning_rate": 5.992966624643355e-06, "loss": 0.8434, "step": 7715 }, { "epoch": 0.4607667502687209, "grad_norm": 2.1458749771118164, "learning_rate": 5.992303098666314e-06, "loss": 0.8619, "step": 7716 }, { "epoch": 0.46082646602173655, "grad_norm": 3.469823122024536, "learning_rate": 5.991639572689272e-06, "loss": 0.837, "step": 7717 }, { "epoch": 0.46088618177475216, "grad_norm": 1.9864517450332642, "learning_rate": 5.990976046712229e-06, "loss": 0.8625, "step": 7718 }, { "epoch": 0.4609458975277678, "grad_norm": 2.047598123550415, "learning_rate": 5.990312520735188e-06, "loss": 0.8331, "step": 7719 }, { "epoch": 0.4610056132807835, "grad_norm": 1.9645090103149414, "learning_rate": 5.989648994758145e-06, "loss": 0.8876, "step": 7720 }, { "epoch": 0.4610653290337991, "grad_norm": 2.267091989517212, "learning_rate": 5.988985468781103e-06, "loss": 0.8472, "step": 7721 }, { "epoch": 0.46112504478681476, "grad_norm": 2.2025904655456543, "learning_rate": 5.988321942804061e-06, "loss": 0.8897, "step": 7722 }, { "epoch": 0.46118476053983043, "grad_norm": 2.21693754196167, "learning_rate": 5.9876584168270195e-06, "loss": 0.8546, "step": 7723 }, { "epoch": 0.46124447629284604, "grad_norm": 1.8859622478485107, "learning_rate": 5.9869948908499776e-06, "loss": 0.8775, "step": 7724 }, { "epoch": 0.4613041920458617, "grad_norm": 2.0703420639038086, "learning_rate": 5.986331364872935e-06, "loss": 0.913, "step": 7725 }, { "epoch": 0.46136390779887737, "grad_norm": 2.087721109390259, "learning_rate": 5.985667838895893e-06, "loss": 0.8482, "step": 7726 }, { "epoch": 0.461423623551893, "grad_norm": 4.07293701171875, "learning_rate": 5.985004312918852e-06, "loss": 0.8676, "step": 7727 }, { "epoch": 0.46148333930490865, "grad_norm": 2.3525795936584473, "learning_rate": 5.984340786941809e-06, "loss": 0.8805, "step": 7728 }, { "epoch": 0.46154305505792426, "grad_norm": 2.6309568881988525, "learning_rate": 5.983677260964767e-06, "loss": 0.8349, "step": 7729 }, { "epoch": 0.4616027708109399, "grad_norm": 3.231464385986328, "learning_rate": 5.9830137349877245e-06, "loss": 0.8746, "step": 7730 }, { "epoch": 0.4616624865639556, "grad_norm": 2.587195873260498, "learning_rate": 5.982350209010683e-06, "loss": 0.9017, "step": 7731 }, { "epoch": 0.4617222023169712, "grad_norm": 2.775073289871216, "learning_rate": 5.9816866830336415e-06, "loss": 0.875, "step": 7732 }, { "epoch": 0.46178191806998686, "grad_norm": 2.721832752227783, "learning_rate": 5.981023157056599e-06, "loss": 0.9018, "step": 7733 }, { "epoch": 0.46184163382300253, "grad_norm": 3.2095935344696045, "learning_rate": 5.980359631079558e-06, "loss": 0.8801, "step": 7734 }, { "epoch": 0.46190134957601814, "grad_norm": 16.796689987182617, "learning_rate": 5.979696105102515e-06, "loss": 0.8708, "step": 7735 }, { "epoch": 0.4619610653290338, "grad_norm": 1.9631526470184326, "learning_rate": 5.979032579125473e-06, "loss": 0.8644, "step": 7736 }, { "epoch": 0.46202078108204947, "grad_norm": 2.5669262409210205, "learning_rate": 5.978369053148432e-06, "loss": 0.8469, "step": 7737 }, { "epoch": 0.4620804968350651, "grad_norm": 1.9472637176513672, "learning_rate": 5.977705527171389e-06, "loss": 0.8568, "step": 7738 }, { "epoch": 0.46214021258808075, "grad_norm": 2.245954751968384, "learning_rate": 5.977042001194347e-06, "loss": 0.8667, "step": 7739 }, { "epoch": 0.46219992834109636, "grad_norm": 1.955807089805603, "learning_rate": 5.976378475217305e-06, "loss": 0.8932, "step": 7740 }, { "epoch": 0.462259644094112, "grad_norm": 2.768568277359009, "learning_rate": 5.975714949240264e-06, "loss": 0.9137, "step": 7741 }, { "epoch": 0.4623193598471277, "grad_norm": 6.471825122833252, "learning_rate": 5.975051423263222e-06, "loss": 0.8802, "step": 7742 }, { "epoch": 0.4623790756001433, "grad_norm": 2.7544593811035156, "learning_rate": 5.974387897286179e-06, "loss": 0.852, "step": 7743 }, { "epoch": 0.46243879135315896, "grad_norm": 2.298720121383667, "learning_rate": 5.973724371309138e-06, "loss": 0.8849, "step": 7744 }, { "epoch": 0.46249850710617463, "grad_norm": 1.845261812210083, "learning_rate": 5.973060845332095e-06, "loss": 0.8595, "step": 7745 }, { "epoch": 0.46255822285919024, "grad_norm": 2.5803189277648926, "learning_rate": 5.972397319355053e-06, "loss": 0.9067, "step": 7746 }, { "epoch": 0.4626179386122059, "grad_norm": 3.247265100479126, "learning_rate": 5.971733793378011e-06, "loss": 0.8622, "step": 7747 }, { "epoch": 0.46267765436522157, "grad_norm": 2.3888604640960693, "learning_rate": 5.9710702674009694e-06, "loss": 0.8651, "step": 7748 }, { "epoch": 0.4627373701182372, "grad_norm": 4.472591876983643, "learning_rate": 5.9704067414239275e-06, "loss": 0.8641, "step": 7749 }, { "epoch": 0.46279708587125284, "grad_norm": 3.3108503818511963, "learning_rate": 5.969743215446885e-06, "loss": 0.8205, "step": 7750 }, { "epoch": 0.46285680162426845, "grad_norm": 2.804304599761963, "learning_rate": 5.969079689469843e-06, "loss": 0.8733, "step": 7751 }, { "epoch": 0.4629165173772841, "grad_norm": 2.0795953273773193, "learning_rate": 5.968416163492802e-06, "loss": 0.8886, "step": 7752 }, { "epoch": 0.4629762331302998, "grad_norm": 2.8639419078826904, "learning_rate": 5.967752637515759e-06, "loss": 0.8664, "step": 7753 }, { "epoch": 0.4630359488833154, "grad_norm": 3.2103676795959473, "learning_rate": 5.967089111538717e-06, "loss": 0.8681, "step": 7754 }, { "epoch": 0.46309566463633106, "grad_norm": 2.8392035961151123, "learning_rate": 5.9664255855616744e-06, "loss": 0.8798, "step": 7755 }, { "epoch": 0.4631553803893467, "grad_norm": 2.11125111579895, "learning_rate": 5.965762059584633e-06, "loss": 0.8686, "step": 7756 }, { "epoch": 0.46321509614236234, "grad_norm": 3.4015541076660156, "learning_rate": 5.9650985336075915e-06, "loss": 0.8846, "step": 7757 }, { "epoch": 0.463274811895378, "grad_norm": 2.1905274391174316, "learning_rate": 5.964435007630549e-06, "loss": 0.8965, "step": 7758 }, { "epoch": 0.46333452764839367, "grad_norm": 2.396261215209961, "learning_rate": 5.963771481653508e-06, "loss": 0.8671, "step": 7759 }, { "epoch": 0.4633942434014093, "grad_norm": 2.4229562282562256, "learning_rate": 5.963107955676465e-06, "loss": 0.8681, "step": 7760 }, { "epoch": 0.46345395915442494, "grad_norm": 1.8903862237930298, "learning_rate": 5.962444429699423e-06, "loss": 0.8932, "step": 7761 }, { "epoch": 0.4635136749074406, "grad_norm": 3.2318625450134277, "learning_rate": 5.961780903722382e-06, "loss": 0.8585, "step": 7762 }, { "epoch": 0.4635733906604562, "grad_norm": 2.777470350265503, "learning_rate": 5.961117377745339e-06, "loss": 0.8428, "step": 7763 }, { "epoch": 0.4636331064134719, "grad_norm": 2.41721248626709, "learning_rate": 5.960453851768297e-06, "loss": 0.8965, "step": 7764 }, { "epoch": 0.4636928221664875, "grad_norm": 2.0954887866973877, "learning_rate": 5.959790325791255e-06, "loss": 0.8489, "step": 7765 }, { "epoch": 0.46375253791950316, "grad_norm": 3.4038212299346924, "learning_rate": 5.9591267998142136e-06, "loss": 0.8372, "step": 7766 }, { "epoch": 0.4638122536725188, "grad_norm": 2.2443313598632812, "learning_rate": 5.958463273837172e-06, "loss": 0.8867, "step": 7767 }, { "epoch": 0.46387196942553444, "grad_norm": 2.0358543395996094, "learning_rate": 5.957799747860129e-06, "loss": 0.9079, "step": 7768 }, { "epoch": 0.4639316851785501, "grad_norm": 3.206054449081421, "learning_rate": 5.957136221883088e-06, "loss": 0.8928, "step": 7769 }, { "epoch": 0.46399140093156577, "grad_norm": 2.194612503051758, "learning_rate": 5.956472695906045e-06, "loss": 0.8823, "step": 7770 }, { "epoch": 0.4640511166845814, "grad_norm": 2.5749430656433105, "learning_rate": 5.955809169929003e-06, "loss": 0.8282, "step": 7771 }, { "epoch": 0.46411083243759704, "grad_norm": 2.0866899490356445, "learning_rate": 5.955145643951961e-06, "loss": 0.9072, "step": 7772 }, { "epoch": 0.4641705481906127, "grad_norm": 1.9732751846313477, "learning_rate": 5.954482117974919e-06, "loss": 0.8462, "step": 7773 }, { "epoch": 0.4642302639436283, "grad_norm": 1.6706182956695557, "learning_rate": 5.9538185919978775e-06, "loss": 0.8372, "step": 7774 }, { "epoch": 0.464289979696644, "grad_norm": 2.3190367221832275, "learning_rate": 5.953155066020835e-06, "loss": 0.9026, "step": 7775 }, { "epoch": 0.4643496954496596, "grad_norm": 3.179250478744507, "learning_rate": 5.952491540043793e-06, "loss": 0.883, "step": 7776 }, { "epoch": 0.46440941120267526, "grad_norm": 2.300395965576172, "learning_rate": 5.951828014066752e-06, "loss": 0.8737, "step": 7777 }, { "epoch": 0.4644691269556909, "grad_norm": 2.273806095123291, "learning_rate": 5.951164488089709e-06, "loss": 0.89, "step": 7778 }, { "epoch": 0.46452884270870654, "grad_norm": 2.860581159591675, "learning_rate": 5.950500962112667e-06, "loss": 0.8946, "step": 7779 }, { "epoch": 0.4645885584617222, "grad_norm": 1.9640554189682007, "learning_rate": 5.949837436135624e-06, "loss": 0.8539, "step": 7780 }, { "epoch": 0.46464827421473787, "grad_norm": 2.223034143447876, "learning_rate": 5.949173910158583e-06, "loss": 0.8445, "step": 7781 }, { "epoch": 0.4647079899677535, "grad_norm": 2.856818199157715, "learning_rate": 5.9485103841815415e-06, "loss": 0.8772, "step": 7782 }, { "epoch": 0.46476770572076914, "grad_norm": 1.6901978254318237, "learning_rate": 5.947846858204499e-06, "loss": 0.8902, "step": 7783 }, { "epoch": 0.4648274214737848, "grad_norm": 2.8654608726501465, "learning_rate": 5.947183332227458e-06, "loss": 0.8683, "step": 7784 }, { "epoch": 0.4648871372268004, "grad_norm": 5.458835124969482, "learning_rate": 5.946519806250415e-06, "loss": 0.819, "step": 7785 }, { "epoch": 0.4649468529798161, "grad_norm": 1.8625450134277344, "learning_rate": 5.945856280273373e-06, "loss": 0.8378, "step": 7786 }, { "epoch": 0.46500656873283175, "grad_norm": 2.309983730316162, "learning_rate": 5.945192754296332e-06, "loss": 0.8887, "step": 7787 }, { "epoch": 0.46506628448584736, "grad_norm": 3.4663164615631104, "learning_rate": 5.944529228319289e-06, "loss": 0.8944, "step": 7788 }, { "epoch": 0.465126000238863, "grad_norm": 2.2984821796417236, "learning_rate": 5.943865702342247e-06, "loss": 0.8792, "step": 7789 }, { "epoch": 0.46518571599187863, "grad_norm": 2.1850452423095703, "learning_rate": 5.9432021763652046e-06, "loss": 0.865, "step": 7790 }, { "epoch": 0.4652454317448943, "grad_norm": 2.010575771331787, "learning_rate": 5.9425386503881635e-06, "loss": 0.8626, "step": 7791 }, { "epoch": 0.46530514749790997, "grad_norm": 2.820158004760742, "learning_rate": 5.941875124411122e-06, "loss": 0.8971, "step": 7792 }, { "epoch": 0.4653648632509256, "grad_norm": 2.2544848918914795, "learning_rate": 5.941211598434079e-06, "loss": 0.8376, "step": 7793 }, { "epoch": 0.46542457900394124, "grad_norm": 2.690717935562134, "learning_rate": 5.940548072457038e-06, "loss": 0.8844, "step": 7794 }, { "epoch": 0.4654842947569569, "grad_norm": 3.693751335144043, "learning_rate": 5.939884546479995e-06, "loss": 0.8565, "step": 7795 }, { "epoch": 0.4655440105099725, "grad_norm": 4.0062947273254395, "learning_rate": 5.939221020502953e-06, "loss": 0.8486, "step": 7796 }, { "epoch": 0.4656037262629882, "grad_norm": 2.0910298824310303, "learning_rate": 5.938557494525911e-06, "loss": 0.8794, "step": 7797 }, { "epoch": 0.46566344201600385, "grad_norm": 3.6892361640930176, "learning_rate": 5.937893968548869e-06, "loss": 0.8576, "step": 7798 }, { "epoch": 0.46572315776901946, "grad_norm": 1.5159666538238525, "learning_rate": 5.9372304425718275e-06, "loss": 0.8665, "step": 7799 }, { "epoch": 0.4657828735220351, "grad_norm": 2.097139358520508, "learning_rate": 5.936566916594785e-06, "loss": 0.8507, "step": 7800 }, { "epoch": 0.4657828735220351, "eval_text_loss": 0.919540286064148, "eval_text_runtime": 15.1869, "eval_text_samples_per_second": 263.386, "eval_text_steps_per_second": 0.527, "step": 7800 }, { "epoch": 0.4657828735220351, "eval_image_loss": 0.6318371295928955, "eval_image_runtime": 5.0434, "eval_image_samples_per_second": 793.112, "eval_image_steps_per_second": 1.586, "step": 7800 }, { "epoch": 0.4657828735220351, "eval_video_loss": 1.0788297653198242, "eval_video_runtime": 76.3863, "eval_video_samples_per_second": 52.365, "eval_video_steps_per_second": 0.105, "step": 7800 }, { "epoch": 0.46584258927505073, "grad_norm": 1.9383735656738281, "learning_rate": 5.935903390617743e-06, "loss": 0.8688, "step": 7801 }, { "epoch": 0.4659023050280664, "grad_norm": 7.451169967651367, "learning_rate": 5.935239864640702e-06, "loss": 0.8535, "step": 7802 }, { "epoch": 0.46596202078108206, "grad_norm": 1.8165489435195923, "learning_rate": 5.934576338663659e-06, "loss": 0.858, "step": 7803 }, { "epoch": 0.4660217365340977, "grad_norm": 2.4832704067230225, "learning_rate": 5.933912812686617e-06, "loss": 0.8847, "step": 7804 }, { "epoch": 0.46608145228711334, "grad_norm": 4.165403842926025, "learning_rate": 5.933249286709574e-06, "loss": 0.8966, "step": 7805 }, { "epoch": 0.466141168040129, "grad_norm": 2.5217771530151367, "learning_rate": 5.932585760732533e-06, "loss": 0.8734, "step": 7806 }, { "epoch": 0.4662008837931446, "grad_norm": 1.9862112998962402, "learning_rate": 5.9319222347554914e-06, "loss": 0.8836, "step": 7807 }, { "epoch": 0.4662605995461603, "grad_norm": 2.1136527061462402, "learning_rate": 5.931258708778449e-06, "loss": 0.8602, "step": 7808 }, { "epoch": 0.46632031529917595, "grad_norm": 2.770185708999634, "learning_rate": 5.930595182801408e-06, "loss": 0.8795, "step": 7809 }, { "epoch": 0.46638003105219156, "grad_norm": 2.1885080337524414, "learning_rate": 5.929931656824365e-06, "loss": 0.8766, "step": 7810 }, { "epoch": 0.4664397468052072, "grad_norm": 2.0597169399261475, "learning_rate": 5.929268130847323e-06, "loss": 0.8732, "step": 7811 }, { "epoch": 0.46649946255822283, "grad_norm": 2.639951467514038, "learning_rate": 5.928604604870282e-06, "loss": 0.8675, "step": 7812 }, { "epoch": 0.4665591783112385, "grad_norm": 2.1540534496307373, "learning_rate": 5.927941078893239e-06, "loss": 0.8819, "step": 7813 }, { "epoch": 0.46661889406425416, "grad_norm": 2.510462522506714, "learning_rate": 5.927277552916197e-06, "loss": 0.8679, "step": 7814 }, { "epoch": 0.4666786098172698, "grad_norm": 2.1644749641418457, "learning_rate": 5.9266140269391545e-06, "loss": 0.8915, "step": 7815 }, { "epoch": 0.46673832557028544, "grad_norm": 2.553804874420166, "learning_rate": 5.9259505009621135e-06, "loss": 0.8309, "step": 7816 }, { "epoch": 0.4667980413233011, "grad_norm": 1.6867388486862183, "learning_rate": 5.925286974985072e-06, "loss": 0.8546, "step": 7817 }, { "epoch": 0.4668577570763167, "grad_norm": 1.906008243560791, "learning_rate": 5.924623449008029e-06, "loss": 0.8518, "step": 7818 }, { "epoch": 0.4669174728293324, "grad_norm": 2.9278409481048584, "learning_rate": 5.923959923030988e-06, "loss": 0.8399, "step": 7819 }, { "epoch": 0.46697718858234805, "grad_norm": 1.7531328201293945, "learning_rate": 5.923296397053945e-06, "loss": 0.9052, "step": 7820 }, { "epoch": 0.46703690433536366, "grad_norm": 2.192507743835449, "learning_rate": 5.922632871076903e-06, "loss": 0.8874, "step": 7821 }, { "epoch": 0.4670966200883793, "grad_norm": 2.4137110710144043, "learning_rate": 5.921969345099861e-06, "loss": 0.8744, "step": 7822 }, { "epoch": 0.467156335841395, "grad_norm": 2.5865869522094727, "learning_rate": 5.921305819122819e-06, "loss": 0.8811, "step": 7823 }, { "epoch": 0.4672160515944106, "grad_norm": 2.611863374710083, "learning_rate": 5.9206422931457774e-06, "loss": 0.8263, "step": 7824 }, { "epoch": 0.46727576734742626, "grad_norm": 2.2877964973449707, "learning_rate": 5.919978767168735e-06, "loss": 0.8855, "step": 7825 }, { "epoch": 0.4673354831004419, "grad_norm": 1.7725858688354492, "learning_rate": 5.919315241191693e-06, "loss": 0.857, "step": 7826 }, { "epoch": 0.46739519885345754, "grad_norm": 2.866506814956665, "learning_rate": 5.918651715214652e-06, "loss": 0.8446, "step": 7827 }, { "epoch": 0.4674549146064732, "grad_norm": 3.4777464866638184, "learning_rate": 5.917988189237609e-06, "loss": 0.8946, "step": 7828 }, { "epoch": 0.4675146303594888, "grad_norm": 2.19498872756958, "learning_rate": 5.917324663260567e-06, "loss": 0.851, "step": 7829 }, { "epoch": 0.4675743461125045, "grad_norm": 2.7006447315216064, "learning_rate": 5.916661137283524e-06, "loss": 0.889, "step": 7830 }, { "epoch": 0.46763406186552015, "grad_norm": 2.334080934524536, "learning_rate": 5.915997611306483e-06, "loss": 0.8212, "step": 7831 }, { "epoch": 0.46769377761853576, "grad_norm": 2.1614856719970703, "learning_rate": 5.915334085329441e-06, "loss": 0.8465, "step": 7832 }, { "epoch": 0.4677534933715514, "grad_norm": 2.2592437267303467, "learning_rate": 5.914670559352399e-06, "loss": 0.8691, "step": 7833 }, { "epoch": 0.4678132091245671, "grad_norm": 2.0352864265441895, "learning_rate": 5.914007033375358e-06, "loss": 0.8732, "step": 7834 }, { "epoch": 0.4678729248775827, "grad_norm": 2.914944887161255, "learning_rate": 5.913343507398315e-06, "loss": 0.8618, "step": 7835 }, { "epoch": 0.46793264063059836, "grad_norm": 1.884353756904602, "learning_rate": 5.912679981421273e-06, "loss": 0.8629, "step": 7836 }, { "epoch": 0.46799235638361397, "grad_norm": 3.006152629852295, "learning_rate": 5.912016455444232e-06, "loss": 0.8544, "step": 7837 }, { "epoch": 0.46805207213662964, "grad_norm": 3.8706836700439453, "learning_rate": 5.911352929467189e-06, "loss": 0.8514, "step": 7838 }, { "epoch": 0.4681117878896453, "grad_norm": 2.064714193344116, "learning_rate": 5.910689403490147e-06, "loss": 0.8926, "step": 7839 }, { "epoch": 0.4681715036426609, "grad_norm": 2.1610894203186035, "learning_rate": 5.9100258775131045e-06, "loss": 0.8365, "step": 7840 }, { "epoch": 0.4682312193956766, "grad_norm": 5.295045375823975, "learning_rate": 5.9093623515360635e-06, "loss": 0.882, "step": 7841 }, { "epoch": 0.46829093514869224, "grad_norm": 2.3115549087524414, "learning_rate": 5.9086988255590216e-06, "loss": 0.8222, "step": 7842 }, { "epoch": 0.46835065090170785, "grad_norm": 2.332789182662964, "learning_rate": 5.908035299581979e-06, "loss": 0.8939, "step": 7843 }, { "epoch": 0.4684103666547235, "grad_norm": 2.519679546356201, "learning_rate": 5.907371773604938e-06, "loss": 0.8392, "step": 7844 }, { "epoch": 0.4684700824077392, "grad_norm": 2.172837257385254, "learning_rate": 5.906708247627895e-06, "loss": 0.8382, "step": 7845 }, { "epoch": 0.4685297981607548, "grad_norm": 2.643332004547119, "learning_rate": 5.906044721650853e-06, "loss": 0.926, "step": 7846 }, { "epoch": 0.46858951391377046, "grad_norm": 2.1913700103759766, "learning_rate": 5.905381195673811e-06, "loss": 0.8771, "step": 7847 }, { "epoch": 0.46864922966678607, "grad_norm": 1.7121716737747192, "learning_rate": 5.904717669696769e-06, "loss": 0.8673, "step": 7848 }, { "epoch": 0.46870894541980174, "grad_norm": 2.7268857955932617, "learning_rate": 5.904054143719727e-06, "loss": 0.8944, "step": 7849 }, { "epoch": 0.4687686611728174, "grad_norm": 1.8280237913131714, "learning_rate": 5.903390617742685e-06, "loss": 0.8665, "step": 7850 }, { "epoch": 0.468828376925833, "grad_norm": 2.646151065826416, "learning_rate": 5.902727091765643e-06, "loss": 0.8945, "step": 7851 }, { "epoch": 0.4688880926788487, "grad_norm": 2.14870548248291, "learning_rate": 5.902063565788602e-06, "loss": 0.8465, "step": 7852 }, { "epoch": 0.46894780843186434, "grad_norm": 3.4105286598205566, "learning_rate": 5.901400039811559e-06, "loss": 0.8304, "step": 7853 }, { "epoch": 0.46900752418487995, "grad_norm": 2.0102226734161377, "learning_rate": 5.900736513834517e-06, "loss": 0.8342, "step": 7854 }, { "epoch": 0.4690672399378956, "grad_norm": 4.812119483947754, "learning_rate": 5.900072987857474e-06, "loss": 0.8806, "step": 7855 }, { "epoch": 0.4691269556909113, "grad_norm": 2.3307266235351562, "learning_rate": 5.899409461880433e-06, "loss": 0.8482, "step": 7856 }, { "epoch": 0.4691866714439269, "grad_norm": 2.1547374725341797, "learning_rate": 5.898745935903391e-06, "loss": 0.8487, "step": 7857 }, { "epoch": 0.46924638719694256, "grad_norm": 3.0950372219085693, "learning_rate": 5.898082409926349e-06, "loss": 0.8686, "step": 7858 }, { "epoch": 0.4693061029499582, "grad_norm": 3.602855920791626, "learning_rate": 5.897418883949308e-06, "loss": 0.8774, "step": 7859 }, { "epoch": 0.46936581870297384, "grad_norm": 1.6294209957122803, "learning_rate": 5.896755357972265e-06, "loss": 0.8664, "step": 7860 }, { "epoch": 0.4694255344559895, "grad_norm": 1.7608906030654907, "learning_rate": 5.896091831995223e-06, "loss": 0.8521, "step": 7861 }, { "epoch": 0.4694852502090051, "grad_norm": 2.1755828857421875, "learning_rate": 5.895428306018182e-06, "loss": 0.8548, "step": 7862 }, { "epoch": 0.4695449659620208, "grad_norm": 2.189607620239258, "learning_rate": 5.894764780041139e-06, "loss": 0.8468, "step": 7863 }, { "epoch": 0.46960468171503644, "grad_norm": 2.5705018043518066, "learning_rate": 5.894101254064097e-06, "loss": 0.8775, "step": 7864 }, { "epoch": 0.46966439746805205, "grad_norm": 1.9042247533798218, "learning_rate": 5.8934377280870545e-06, "loss": 0.868, "step": 7865 }, { "epoch": 0.4697241132210677, "grad_norm": 1.8166751861572266, "learning_rate": 5.8927742021100134e-06, "loss": 0.8613, "step": 7866 }, { "epoch": 0.4697838289740834, "grad_norm": 3.838561773300171, "learning_rate": 5.8921106761329715e-06, "loss": 0.8674, "step": 7867 }, { "epoch": 0.469843544727099, "grad_norm": 1.8758666515350342, "learning_rate": 5.891447150155929e-06, "loss": 0.8486, "step": 7868 }, { "epoch": 0.46990326048011466, "grad_norm": 2.786724090576172, "learning_rate": 5.890783624178888e-06, "loss": 0.8998, "step": 7869 }, { "epoch": 0.4699629762331303, "grad_norm": 2.169926881790161, "learning_rate": 5.890120098201845e-06, "loss": 0.8344, "step": 7870 }, { "epoch": 0.47002269198614594, "grad_norm": 2.02256178855896, "learning_rate": 5.889456572224803e-06, "loss": 0.8646, "step": 7871 }, { "epoch": 0.4700824077391616, "grad_norm": 2.381359338760376, "learning_rate": 5.888793046247761e-06, "loss": 0.8876, "step": 7872 }, { "epoch": 0.4701421234921772, "grad_norm": 1.9289602041244507, "learning_rate": 5.888129520270719e-06, "loss": 0.8447, "step": 7873 }, { "epoch": 0.4702018392451929, "grad_norm": 1.9255914688110352, "learning_rate": 5.887465994293677e-06, "loss": 0.8433, "step": 7874 }, { "epoch": 0.47026155499820854, "grad_norm": 3.4675228595733643, "learning_rate": 5.886802468316635e-06, "loss": 0.9003, "step": 7875 }, { "epoch": 0.47032127075122415, "grad_norm": 2.0125651359558105, "learning_rate": 5.886138942339593e-06, "loss": 0.8634, "step": 7876 }, { "epoch": 0.4703809865042398, "grad_norm": 4.511218070983887, "learning_rate": 5.885475416362552e-06, "loss": 0.8877, "step": 7877 }, { "epoch": 0.4704407022572555, "grad_norm": 2.0839321613311768, "learning_rate": 5.884811890385509e-06, "loss": 0.8572, "step": 7878 }, { "epoch": 0.4705004180102711, "grad_norm": 2.891357660293579, "learning_rate": 5.884148364408467e-06, "loss": 0.8632, "step": 7879 }, { "epoch": 0.47056013376328676, "grad_norm": 1.5758012533187866, "learning_rate": 5.883484838431424e-06, "loss": 0.8675, "step": 7880 }, { "epoch": 0.4706198495163024, "grad_norm": 2.121274948120117, "learning_rate": 5.882821312454383e-06, "loss": 0.8724, "step": 7881 }, { "epoch": 0.47067956526931803, "grad_norm": 2.2783918380737305, "learning_rate": 5.882157786477341e-06, "loss": 0.9032, "step": 7882 }, { "epoch": 0.4707392810223337, "grad_norm": 2.44358491897583, "learning_rate": 5.881494260500299e-06, "loss": 0.8854, "step": 7883 }, { "epoch": 0.4707989967753493, "grad_norm": 2.1830766201019287, "learning_rate": 5.8808307345232575e-06, "loss": 0.8608, "step": 7884 }, { "epoch": 0.470858712528365, "grad_norm": 2.2458689212799072, "learning_rate": 5.880167208546215e-06, "loss": 0.8848, "step": 7885 }, { "epoch": 0.47091842828138064, "grad_norm": 2.3476006984710693, "learning_rate": 5.879503682569173e-06, "loss": 0.8491, "step": 7886 }, { "epoch": 0.47097814403439625, "grad_norm": 1.8965119123458862, "learning_rate": 5.878840156592132e-06, "loss": 0.855, "step": 7887 }, { "epoch": 0.4710378597874119, "grad_norm": 2.1081862449645996, "learning_rate": 5.878176630615089e-06, "loss": 0.8697, "step": 7888 }, { "epoch": 0.4710975755404276, "grad_norm": 2.369734287261963, "learning_rate": 5.877513104638047e-06, "loss": 0.8408, "step": 7889 }, { "epoch": 0.4711572912934432, "grad_norm": 2.6518125534057617, "learning_rate": 5.8768495786610045e-06, "loss": 0.8666, "step": 7890 }, { "epoch": 0.47121700704645886, "grad_norm": 2.1390068531036377, "learning_rate": 5.876186052683963e-06, "loss": 0.8985, "step": 7891 }, { "epoch": 0.4712767227994745, "grad_norm": 2.252246618270874, "learning_rate": 5.8755225267069215e-06, "loss": 0.8799, "step": 7892 }, { "epoch": 0.47133643855249013, "grad_norm": 1.719351053237915, "learning_rate": 5.874859000729879e-06, "loss": 0.8456, "step": 7893 }, { "epoch": 0.4713961543055058, "grad_norm": 2.8347482681274414, "learning_rate": 5.874195474752838e-06, "loss": 0.8661, "step": 7894 }, { "epoch": 0.47145587005852146, "grad_norm": 3.0124802589416504, "learning_rate": 5.873531948775795e-06, "loss": 0.8907, "step": 7895 }, { "epoch": 0.4715155858115371, "grad_norm": 4.30218505859375, "learning_rate": 5.872868422798753e-06, "loss": 0.8889, "step": 7896 }, { "epoch": 0.47157530156455274, "grad_norm": 1.8851251602172852, "learning_rate": 5.872204896821711e-06, "loss": 0.8822, "step": 7897 }, { "epoch": 0.47163501731756835, "grad_norm": 3.2014520168304443, "learning_rate": 5.871541370844669e-06, "loss": 0.8785, "step": 7898 }, { "epoch": 0.471694733070584, "grad_norm": 1.73438560962677, "learning_rate": 5.870877844867627e-06, "loss": 0.8918, "step": 7899 }, { "epoch": 0.4717544488235997, "grad_norm": 5.856024742126465, "learning_rate": 5.870214318890585e-06, "loss": 0.8801, "step": 7900 }, { "epoch": 0.4718141645766153, "grad_norm": 2.2063777446746826, "learning_rate": 5.869550792913543e-06, "loss": 0.8783, "step": 7901 }, { "epoch": 0.47187388032963096, "grad_norm": 2.458400249481201, "learning_rate": 5.868887266936502e-06, "loss": 0.8446, "step": 7902 }, { "epoch": 0.4719335960826466, "grad_norm": 1.6801555156707764, "learning_rate": 5.868223740959459e-06, "loss": 0.8446, "step": 7903 }, { "epoch": 0.47199331183566223, "grad_norm": 2.839902877807617, "learning_rate": 5.867560214982417e-06, "loss": 0.8491, "step": 7904 }, { "epoch": 0.4720530275886779, "grad_norm": 1.9476908445358276, "learning_rate": 5.866896689005374e-06, "loss": 0.8688, "step": 7905 }, { "epoch": 0.47211274334169356, "grad_norm": 1.934896469116211, "learning_rate": 5.866233163028333e-06, "loss": 0.8734, "step": 7906 }, { "epoch": 0.4721724590947092, "grad_norm": 2.2205090522766113, "learning_rate": 5.865569637051291e-06, "loss": 0.8773, "step": 7907 }, { "epoch": 0.47223217484772484, "grad_norm": 3.189537525177002, "learning_rate": 5.8649061110742486e-06, "loss": 0.862, "step": 7908 }, { "epoch": 0.47229189060074045, "grad_norm": 6.455615997314453, "learning_rate": 5.8642425850972075e-06, "loss": 0.8647, "step": 7909 }, { "epoch": 0.4723516063537561, "grad_norm": 2.691322088241577, "learning_rate": 5.863579059120165e-06, "loss": 0.9118, "step": 7910 }, { "epoch": 0.4724113221067718, "grad_norm": 2.258867025375366, "learning_rate": 5.862915533143123e-06, "loss": 0.9003, "step": 7911 }, { "epoch": 0.4724710378597874, "grad_norm": 3.4884567260742188, "learning_rate": 5.862252007166082e-06, "loss": 0.8463, "step": 7912 }, { "epoch": 0.47253075361280306, "grad_norm": 4.726525783538818, "learning_rate": 5.861588481189039e-06, "loss": 0.87, "step": 7913 }, { "epoch": 0.4725904693658187, "grad_norm": 3.9930503368377686, "learning_rate": 5.860924955211997e-06, "loss": 0.8632, "step": 7914 }, { "epoch": 0.47265018511883433, "grad_norm": 2.3154373168945312, "learning_rate": 5.8602614292349544e-06, "loss": 0.8519, "step": 7915 }, { "epoch": 0.47270990087185, "grad_norm": 1.7554512023925781, "learning_rate": 5.859597903257913e-06, "loss": 0.857, "step": 7916 }, { "epoch": 0.47276961662486566, "grad_norm": 2.5897576808929443, "learning_rate": 5.8589343772808715e-06, "loss": 0.8477, "step": 7917 }, { "epoch": 0.4728293323778813, "grad_norm": 2.4879069328308105, "learning_rate": 5.858270851303829e-06, "loss": 0.8788, "step": 7918 }, { "epoch": 0.47288904813089694, "grad_norm": 1.87102210521698, "learning_rate": 5.857607325326788e-06, "loss": 0.8508, "step": 7919 }, { "epoch": 0.47294876388391255, "grad_norm": 2.713702917098999, "learning_rate": 5.856943799349745e-06, "loss": 0.8967, "step": 7920 }, { "epoch": 0.4730084796369282, "grad_norm": 2.527711868286133, "learning_rate": 5.856280273372703e-06, "loss": 0.8635, "step": 7921 }, { "epoch": 0.4730681953899439, "grad_norm": 2.597630023956299, "learning_rate": 5.855616747395661e-06, "loss": 0.8935, "step": 7922 }, { "epoch": 0.4731279111429595, "grad_norm": 2.0117602348327637, "learning_rate": 5.854953221418619e-06, "loss": 0.8682, "step": 7923 }, { "epoch": 0.47318762689597516, "grad_norm": 2.7217178344726562, "learning_rate": 5.854289695441577e-06, "loss": 0.8775, "step": 7924 }, { "epoch": 0.4732473426489908, "grad_norm": 2.5726845264434814, "learning_rate": 5.853626169464535e-06, "loss": 0.8516, "step": 7925 }, { "epoch": 0.47330705840200643, "grad_norm": 1.9797401428222656, "learning_rate": 5.852962643487493e-06, "loss": 0.8473, "step": 7926 }, { "epoch": 0.4733667741550221, "grad_norm": 2.2918426990509033, "learning_rate": 5.852299117510452e-06, "loss": 0.869, "step": 7927 }, { "epoch": 0.47342648990803776, "grad_norm": 2.5485153198242188, "learning_rate": 5.851635591533409e-06, "loss": 0.8483, "step": 7928 }, { "epoch": 0.47348620566105337, "grad_norm": 2.6141717433929443, "learning_rate": 5.850972065556367e-06, "loss": 0.8555, "step": 7929 }, { "epoch": 0.47354592141406904, "grad_norm": 2.2377676963806152, "learning_rate": 5.850308539579324e-06, "loss": 0.8629, "step": 7930 }, { "epoch": 0.4736056371670847, "grad_norm": 2.2387309074401855, "learning_rate": 5.849645013602283e-06, "loss": 0.8983, "step": 7931 }, { "epoch": 0.4736653529201003, "grad_norm": 1.8268860578536987, "learning_rate": 5.848981487625241e-06, "loss": 0.8796, "step": 7932 }, { "epoch": 0.473725068673116, "grad_norm": 1.7709970474243164, "learning_rate": 5.8483179616481985e-06, "loss": 0.8365, "step": 7933 }, { "epoch": 0.4737847844261316, "grad_norm": 2.2087509632110596, "learning_rate": 5.8476544356711575e-06, "loss": 0.8332, "step": 7934 }, { "epoch": 0.47384450017914725, "grad_norm": 1.895995020866394, "learning_rate": 5.846990909694115e-06, "loss": 0.8358, "step": 7935 }, { "epoch": 0.4739042159321629, "grad_norm": 2.3078339099884033, "learning_rate": 5.846327383717073e-06, "loss": 0.8833, "step": 7936 }, { "epoch": 0.47396393168517853, "grad_norm": 2.285313367843628, "learning_rate": 5.845663857740032e-06, "loss": 0.8882, "step": 7937 }, { "epoch": 0.4740236474381942, "grad_norm": 6.22983980178833, "learning_rate": 5.845000331762989e-06, "loss": 0.8807, "step": 7938 }, { "epoch": 0.47408336319120986, "grad_norm": 2.218187093734741, "learning_rate": 5.844336805785947e-06, "loss": 0.889, "step": 7939 }, { "epoch": 0.47414307894422547, "grad_norm": 1.8145455121994019, "learning_rate": 5.843673279808904e-06, "loss": 0.8452, "step": 7940 }, { "epoch": 0.47420279469724114, "grad_norm": 2.2459535598754883, "learning_rate": 5.843009753831863e-06, "loss": 0.8419, "step": 7941 }, { "epoch": 0.4742625104502568, "grad_norm": 2.2601773738861084, "learning_rate": 5.8423462278548214e-06, "loss": 0.8792, "step": 7942 }, { "epoch": 0.4743222262032724, "grad_norm": 2.42758846282959, "learning_rate": 5.841682701877779e-06, "loss": 0.8686, "step": 7943 }, { "epoch": 0.4743819419562881, "grad_norm": 2.5985162258148193, "learning_rate": 5.841019175900738e-06, "loss": 0.8661, "step": 7944 }, { "epoch": 0.4744416577093037, "grad_norm": 4.2374267578125, "learning_rate": 5.840355649923695e-06, "loss": 0.8922, "step": 7945 }, { "epoch": 0.47450137346231935, "grad_norm": 2.3292033672332764, "learning_rate": 5.839692123946653e-06, "loss": 0.8712, "step": 7946 }, { "epoch": 0.474561089215335, "grad_norm": 1.9216095209121704, "learning_rate": 5.839028597969611e-06, "loss": 0.8765, "step": 7947 }, { "epoch": 0.47462080496835063, "grad_norm": 2.259767532348633, "learning_rate": 5.838365071992569e-06, "loss": 0.8186, "step": 7948 }, { "epoch": 0.4746805207213663, "grad_norm": 1.6524674892425537, "learning_rate": 5.837701546015527e-06, "loss": 0.8452, "step": 7949 }, { "epoch": 0.47474023647438196, "grad_norm": 2.380688190460205, "learning_rate": 5.8370380200384846e-06, "loss": 0.8457, "step": 7950 }, { "epoch": 0.47479995222739757, "grad_norm": 1.8149250745773315, "learning_rate": 5.836374494061443e-06, "loss": 0.8704, "step": 7951 }, { "epoch": 0.47485966798041324, "grad_norm": 3.0254852771759033, "learning_rate": 5.835710968084402e-06, "loss": 0.8633, "step": 7952 }, { "epoch": 0.4749193837334289, "grad_norm": 2.9502038955688477, "learning_rate": 5.835047442107359e-06, "loss": 0.8419, "step": 7953 }, { "epoch": 0.4749790994864445, "grad_norm": 2.695713996887207, "learning_rate": 5.834383916130317e-06, "loss": 0.8512, "step": 7954 }, { "epoch": 0.4750388152394602, "grad_norm": 1.9451206922531128, "learning_rate": 5.833720390153274e-06, "loss": 0.8642, "step": 7955 }, { "epoch": 0.47509853099247584, "grad_norm": 2.1904900074005127, "learning_rate": 5.833056864176233e-06, "loss": 0.8602, "step": 7956 }, { "epoch": 0.47515824674549145, "grad_norm": 1.8459429740905762, "learning_rate": 5.832393338199191e-06, "loss": 0.904, "step": 7957 }, { "epoch": 0.4752179624985071, "grad_norm": 2.395801305770874, "learning_rate": 5.8317298122221485e-06, "loss": 0.8626, "step": 7958 }, { "epoch": 0.47527767825152273, "grad_norm": 2.005404233932495, "learning_rate": 5.8310662862451075e-06, "loss": 0.8158, "step": 7959 }, { "epoch": 0.4753373940045384, "grad_norm": 2.5531651973724365, "learning_rate": 5.830402760268065e-06, "loss": 0.8967, "step": 7960 }, { "epoch": 0.47539710975755406, "grad_norm": 2.36476469039917, "learning_rate": 5.829739234291023e-06, "loss": 0.9018, "step": 7961 }, { "epoch": 0.47545682551056967, "grad_norm": 1.6822359561920166, "learning_rate": 5.829075708313982e-06, "loss": 0.8584, "step": 7962 }, { "epoch": 0.47551654126358534, "grad_norm": 1.945364236831665, "learning_rate": 5.828412182336939e-06, "loss": 0.8567, "step": 7963 }, { "epoch": 0.475576257016601, "grad_norm": 2.086080312728882, "learning_rate": 5.827748656359897e-06, "loss": 0.8325, "step": 7964 }, { "epoch": 0.4756359727696166, "grad_norm": 3.7822234630584717, "learning_rate": 5.827085130382854e-06, "loss": 0.862, "step": 7965 }, { "epoch": 0.4756956885226323, "grad_norm": 2.3284807205200195, "learning_rate": 5.826421604405813e-06, "loss": 0.8782, "step": 7966 }, { "epoch": 0.47575540427564794, "grad_norm": 1.841712474822998, "learning_rate": 5.825758078428771e-06, "loss": 0.8697, "step": 7967 }, { "epoch": 0.47581512002866355, "grad_norm": 2.993645191192627, "learning_rate": 5.825094552451729e-06, "loss": 0.8501, "step": 7968 }, { "epoch": 0.4758748357816792, "grad_norm": 2.359464406967163, "learning_rate": 5.824431026474688e-06, "loss": 0.9055, "step": 7969 }, { "epoch": 0.4759345515346948, "grad_norm": 1.9902544021606445, "learning_rate": 5.823767500497645e-06, "loss": 0.8161, "step": 7970 }, { "epoch": 0.4759942672877105, "grad_norm": 2.1919710636138916, "learning_rate": 5.823103974520603e-06, "loss": 0.8517, "step": 7971 }, { "epoch": 0.47605398304072616, "grad_norm": 1.6376185417175293, "learning_rate": 5.822440448543561e-06, "loss": 0.8534, "step": 7972 }, { "epoch": 0.47611369879374177, "grad_norm": 5.672286033630371, "learning_rate": 5.821776922566519e-06, "loss": 0.9166, "step": 7973 }, { "epoch": 0.47617341454675743, "grad_norm": 2.6872751712799072, "learning_rate": 5.821113396589477e-06, "loss": 0.8446, "step": 7974 }, { "epoch": 0.4762331302997731, "grad_norm": 2.4694595336914062, "learning_rate": 5.8204498706124345e-06, "loss": 0.8611, "step": 7975 }, { "epoch": 0.4762928460527887, "grad_norm": 2.6590123176574707, "learning_rate": 5.819786344635393e-06, "loss": 0.8577, "step": 7976 }, { "epoch": 0.4763525618058044, "grad_norm": 2.040705442428589, "learning_rate": 5.8191228186583516e-06, "loss": 0.8595, "step": 7977 }, { "epoch": 0.47641227755882004, "grad_norm": 1.6912221908569336, "learning_rate": 5.818459292681309e-06, "loss": 0.8209, "step": 7978 }, { "epoch": 0.47647199331183565, "grad_norm": 1.482411503791809, "learning_rate": 5.817795766704267e-06, "loss": 0.8594, "step": 7979 }, { "epoch": 0.4765317090648513, "grad_norm": 1.9583629369735718, "learning_rate": 5.817132240727224e-06, "loss": 0.8561, "step": 7980 }, { "epoch": 0.4765914248178669, "grad_norm": 3.206202983856201, "learning_rate": 5.816468714750183e-06, "loss": 0.8305, "step": 7981 }, { "epoch": 0.4766511405708826, "grad_norm": 2.1918222904205322, "learning_rate": 5.815805188773141e-06, "loss": 0.882, "step": 7982 }, { "epoch": 0.47671085632389826, "grad_norm": 2.1501874923706055, "learning_rate": 5.8151416627960985e-06, "loss": 0.8611, "step": 7983 }, { "epoch": 0.47677057207691387, "grad_norm": 2.6378631591796875, "learning_rate": 5.8144781368190574e-06, "loss": 0.87, "step": 7984 }, { "epoch": 0.47683028782992953, "grad_norm": 2.2350380420684814, "learning_rate": 5.813814610842015e-06, "loss": 0.8764, "step": 7985 }, { "epoch": 0.4768900035829452, "grad_norm": 2.1868810653686523, "learning_rate": 5.813151084864973e-06, "loss": 0.8795, "step": 7986 }, { "epoch": 0.4769497193359608, "grad_norm": 2.0074071884155273, "learning_rate": 5.812487558887932e-06, "loss": 0.8541, "step": 7987 }, { "epoch": 0.4770094350889765, "grad_norm": 2.0655550956726074, "learning_rate": 5.811824032910889e-06, "loss": 0.8648, "step": 7988 }, { "epoch": 0.47706915084199214, "grad_norm": 1.619466781616211, "learning_rate": 5.811160506933847e-06, "loss": 0.8288, "step": 7989 }, { "epoch": 0.47712886659500775, "grad_norm": 2.490501642227173, "learning_rate": 5.810496980956804e-06, "loss": 0.8225, "step": 7990 }, { "epoch": 0.4771885823480234, "grad_norm": 2.389615058898926, "learning_rate": 5.809833454979763e-06, "loss": 0.8608, "step": 7991 }, { "epoch": 0.4772482981010391, "grad_norm": 2.0762975215911865, "learning_rate": 5.809169929002721e-06, "loss": 0.8972, "step": 7992 }, { "epoch": 0.4773080138540547, "grad_norm": 1.9488582611083984, "learning_rate": 5.808506403025679e-06, "loss": 0.8537, "step": 7993 }, { "epoch": 0.47736772960707036, "grad_norm": 1.914475440979004, "learning_rate": 5.807842877048638e-06, "loss": 0.8776, "step": 7994 }, { "epoch": 0.47742744536008597, "grad_norm": 3.027836322784424, "learning_rate": 5.807179351071595e-06, "loss": 0.9065, "step": 7995 }, { "epoch": 0.47748716111310163, "grad_norm": 1.753490924835205, "learning_rate": 5.806515825094553e-06, "loss": 0.8527, "step": 7996 }, { "epoch": 0.4775468768661173, "grad_norm": 2.00355863571167, "learning_rate": 5.805852299117512e-06, "loss": 0.8908, "step": 7997 }, { "epoch": 0.4776065926191329, "grad_norm": 2.719393253326416, "learning_rate": 5.805188773140469e-06, "loss": 0.8767, "step": 7998 }, { "epoch": 0.4776663083721486, "grad_norm": 2.7887277603149414, "learning_rate": 5.804525247163427e-06, "loss": 0.8298, "step": 7999 }, { "epoch": 0.47772602412516424, "grad_norm": 5.399857997894287, "learning_rate": 5.8038617211863845e-06, "loss": 0.87, "step": 8000 }, { "epoch": 0.47772602412516424, "eval_text_loss": 0.9173658490180969, "eval_text_runtime": 15.2216, "eval_text_samples_per_second": 262.784, "eval_text_steps_per_second": 0.526, "step": 8000 }, { "epoch": 0.47772602412516424, "eval_image_loss": 0.6315731406211853, "eval_image_runtime": 4.9886, "eval_image_samples_per_second": 801.826, "eval_image_steps_per_second": 1.604, "step": 8000 }, { "epoch": 0.47772602412516424, "eval_video_loss": 1.0770761966705322, "eval_video_runtime": 76.6807, "eval_video_samples_per_second": 52.164, "eval_video_steps_per_second": 0.104, "step": 8000 }, { "epoch": 0.47778573987817985, "grad_norm": 2.311121702194214, "learning_rate": 5.803198195209343e-06, "loss": 0.8318, "step": 8001 }, { "epoch": 0.4778454556311955, "grad_norm": 1.867639183998108, "learning_rate": 5.8025346692323015e-06, "loss": 0.8349, "step": 8002 }, { "epoch": 0.4779051713842112, "grad_norm": 2.99684476852417, "learning_rate": 5.801871143255259e-06, "loss": 0.8417, "step": 8003 }, { "epoch": 0.4779648871372268, "grad_norm": 2.0955724716186523, "learning_rate": 5.801207617278217e-06, "loss": 0.8581, "step": 8004 }, { "epoch": 0.47802460289024246, "grad_norm": 1.862640619277954, "learning_rate": 5.800544091301174e-06, "loss": 0.9124, "step": 8005 }, { "epoch": 0.47808431864325807, "grad_norm": 2.4292967319488525, "learning_rate": 5.799880565324133e-06, "loss": 0.8621, "step": 8006 }, { "epoch": 0.47814403439627373, "grad_norm": 2.0773134231567383, "learning_rate": 5.799217039347091e-06, "loss": 0.8628, "step": 8007 }, { "epoch": 0.4782037501492894, "grad_norm": 2.4636964797973633, "learning_rate": 5.7985535133700485e-06, "loss": 0.8335, "step": 8008 }, { "epoch": 0.478263465902305, "grad_norm": 2.53839111328125, "learning_rate": 5.797889987393007e-06, "loss": 0.8714, "step": 8009 }, { "epoch": 0.4783231816553207, "grad_norm": 2.7300775051116943, "learning_rate": 5.797226461415965e-06, "loss": 0.8614, "step": 8010 }, { "epoch": 0.47838289740833634, "grad_norm": 1.5691125392913818, "learning_rate": 5.796562935438923e-06, "loss": 0.855, "step": 8011 }, { "epoch": 0.47844261316135195, "grad_norm": 8.418724060058594, "learning_rate": 5.795899409461882e-06, "loss": 0.9102, "step": 8012 }, { "epoch": 0.4785023289143676, "grad_norm": 2.3359267711639404, "learning_rate": 5.795235883484839e-06, "loss": 0.9002, "step": 8013 }, { "epoch": 0.4785620446673833, "grad_norm": 2.493208885192871, "learning_rate": 5.794572357507797e-06, "loss": 0.8656, "step": 8014 }, { "epoch": 0.4786217604203989, "grad_norm": 4.2308669090271, "learning_rate": 5.793908831530754e-06, "loss": 0.867, "step": 8015 }, { "epoch": 0.47868147617341456, "grad_norm": 3.0382344722747803, "learning_rate": 5.793245305553713e-06, "loss": 0.8449, "step": 8016 }, { "epoch": 0.47874119192643017, "grad_norm": 4.248488426208496, "learning_rate": 5.792581779576671e-06, "loss": 0.8714, "step": 8017 }, { "epoch": 0.47880090767944583, "grad_norm": 2.6642236709594727, "learning_rate": 5.791918253599629e-06, "loss": 0.8652, "step": 8018 }, { "epoch": 0.4788606234324615, "grad_norm": 4.4742841720581055, "learning_rate": 5.7912547276225876e-06, "loss": 0.8474, "step": 8019 }, { "epoch": 0.4789203391854771, "grad_norm": 2.8064041137695312, "learning_rate": 5.790591201645545e-06, "loss": 0.8598, "step": 8020 }, { "epoch": 0.47898005493849277, "grad_norm": 3.3015244007110596, "learning_rate": 5.789927675668503e-06, "loss": 0.8724, "step": 8021 }, { "epoch": 0.47903977069150844, "grad_norm": 2.0141611099243164, "learning_rate": 5.789264149691462e-06, "loss": 0.8778, "step": 8022 }, { "epoch": 0.47909948644452405, "grad_norm": 1.6813567876815796, "learning_rate": 5.788600623714419e-06, "loss": 0.8687, "step": 8023 }, { "epoch": 0.4791592021975397, "grad_norm": 2.858900785446167, "learning_rate": 5.787937097737377e-06, "loss": 0.8565, "step": 8024 }, { "epoch": 0.4792189179505554, "grad_norm": 2.0100483894348145, "learning_rate": 5.7872735717603345e-06, "loss": 0.8756, "step": 8025 }, { "epoch": 0.479278633703571, "grad_norm": 2.046232223510742, "learning_rate": 5.786610045783293e-06, "loss": 0.8403, "step": 8026 }, { "epoch": 0.47933834945658665, "grad_norm": 1.9255180358886719, "learning_rate": 5.7859465198062515e-06, "loss": 0.8398, "step": 8027 }, { "epoch": 0.4793980652096023, "grad_norm": 2.3425464630126953, "learning_rate": 5.785282993829209e-06, "loss": 0.8722, "step": 8028 }, { "epoch": 0.47945778096261793, "grad_norm": 2.7100419998168945, "learning_rate": 5.784619467852167e-06, "loss": 0.8715, "step": 8029 }, { "epoch": 0.4795174967156336, "grad_norm": 2.164332151412964, "learning_rate": 5.783955941875124e-06, "loss": 0.8533, "step": 8030 }, { "epoch": 0.4795772124686492, "grad_norm": 2.099386692047119, "learning_rate": 5.783292415898083e-06, "loss": 0.8412, "step": 8031 }, { "epoch": 0.47963692822166487, "grad_norm": 2.4473536014556885, "learning_rate": 5.782628889921041e-06, "loss": 0.8832, "step": 8032 }, { "epoch": 0.47969664397468054, "grad_norm": 1.6226425170898438, "learning_rate": 5.7819653639439984e-06, "loss": 0.852, "step": 8033 }, { "epoch": 0.47975635972769615, "grad_norm": 2.267683267593384, "learning_rate": 5.781301837966957e-06, "loss": 0.8669, "step": 8034 }, { "epoch": 0.4798160754807118, "grad_norm": 3.175917148590088, "learning_rate": 5.780638311989915e-06, "loss": 0.8336, "step": 8035 }, { "epoch": 0.4798757912337275, "grad_norm": 2.118208408355713, "learning_rate": 5.779974786012873e-06, "loss": 0.871, "step": 8036 }, { "epoch": 0.4799355069867431, "grad_norm": 1.7861312627792358, "learning_rate": 5.779311260035832e-06, "loss": 0.8606, "step": 8037 }, { "epoch": 0.47999522273975875, "grad_norm": 2.1748435497283936, "learning_rate": 5.778647734058789e-06, "loss": 0.8475, "step": 8038 }, { "epoch": 0.4800549384927744, "grad_norm": 2.021761894226074, "learning_rate": 5.777984208081747e-06, "loss": 0.861, "step": 8039 }, { "epoch": 0.48011465424579003, "grad_norm": 2.2492151260375977, "learning_rate": 5.777320682104704e-06, "loss": 0.8877, "step": 8040 }, { "epoch": 0.4801743699988057, "grad_norm": 3.62384295463562, "learning_rate": 5.776657156127663e-06, "loss": 0.8532, "step": 8041 }, { "epoch": 0.4802340857518213, "grad_norm": 2.464005708694458, "learning_rate": 5.775993630150621e-06, "loss": 0.8558, "step": 8042 }, { "epoch": 0.48029380150483697, "grad_norm": 4.3057475090026855, "learning_rate": 5.775330104173579e-06, "loss": 0.8866, "step": 8043 }, { "epoch": 0.48035351725785264, "grad_norm": 1.7494782209396362, "learning_rate": 5.7746665781965375e-06, "loss": 0.8294, "step": 8044 }, { "epoch": 0.48041323301086825, "grad_norm": 2.575873613357544, "learning_rate": 5.774003052219495e-06, "loss": 0.8662, "step": 8045 }, { "epoch": 0.4804729487638839, "grad_norm": 2.039466381072998, "learning_rate": 5.773339526242453e-06, "loss": 0.8571, "step": 8046 }, { "epoch": 0.4805326645168996, "grad_norm": 2.2961905002593994, "learning_rate": 5.772676000265412e-06, "loss": 0.8619, "step": 8047 }, { "epoch": 0.4805923802699152, "grad_norm": 2.1255459785461426, "learning_rate": 5.772012474288369e-06, "loss": 0.8623, "step": 8048 }, { "epoch": 0.48065209602293085, "grad_norm": 3.9835429191589355, "learning_rate": 5.771348948311327e-06, "loss": 0.8395, "step": 8049 }, { "epoch": 0.4807118117759465, "grad_norm": 2.39933705329895, "learning_rate": 5.7706854223342844e-06, "loss": 0.8885, "step": 8050 }, { "epoch": 0.48077152752896213, "grad_norm": 2.8461334705352783, "learning_rate": 5.770021896357243e-06, "loss": 0.8512, "step": 8051 }, { "epoch": 0.4808312432819778, "grad_norm": 2.1693973541259766, "learning_rate": 5.7693583703802015e-06, "loss": 0.8505, "step": 8052 }, { "epoch": 0.4808909590349934, "grad_norm": 2.007904052734375, "learning_rate": 5.768694844403159e-06, "loss": 0.8644, "step": 8053 }, { "epoch": 0.48095067478800907, "grad_norm": 2.2110519409179688, "learning_rate": 5.768031318426117e-06, "loss": 0.8446, "step": 8054 }, { "epoch": 0.48101039054102473, "grad_norm": 2.561474084854126, "learning_rate": 5.767367792449075e-06, "loss": 0.8666, "step": 8055 }, { "epoch": 0.48107010629404034, "grad_norm": 2.6202638149261475, "learning_rate": 5.766704266472033e-06, "loss": 0.8558, "step": 8056 }, { "epoch": 0.481129822047056, "grad_norm": 3.7846100330352783, "learning_rate": 5.76604074049499e-06, "loss": 0.8626, "step": 8057 }, { "epoch": 0.4811895378000717, "grad_norm": 1.7353177070617676, "learning_rate": 5.765377214517948e-06, "loss": 0.8408, "step": 8058 }, { "epoch": 0.4812492535530873, "grad_norm": 4.554354667663574, "learning_rate": 5.764713688540907e-06, "loss": 0.8412, "step": 8059 }, { "epoch": 0.48130896930610295, "grad_norm": 2.5694079399108887, "learning_rate": 5.764050162563865e-06, "loss": 0.8691, "step": 8060 }, { "epoch": 0.4813686850591186, "grad_norm": 2.1548829078674316, "learning_rate": 5.763386636586823e-06, "loss": 0.8642, "step": 8061 }, { "epoch": 0.4814284008121342, "grad_norm": 1.9835646152496338, "learning_rate": 5.76272311060978e-06, "loss": 0.8383, "step": 8062 }, { "epoch": 0.4814881165651499, "grad_norm": 2.2109625339508057, "learning_rate": 5.762059584632739e-06, "loss": 0.8641, "step": 8063 }, { "epoch": 0.48154783231816556, "grad_norm": 4.289899826049805, "learning_rate": 5.761396058655697e-06, "loss": 0.8373, "step": 8064 }, { "epoch": 0.48160754807118117, "grad_norm": 2.0937297344207764, "learning_rate": 5.760732532678654e-06, "loss": 0.8726, "step": 8065 }, { "epoch": 0.48166726382419683, "grad_norm": 2.702005386352539, "learning_rate": 5.760069006701613e-06, "loss": 0.8742, "step": 8066 }, { "epoch": 0.48172697957721244, "grad_norm": 1.7819195985794067, "learning_rate": 5.7594054807245705e-06, "loss": 0.8584, "step": 8067 }, { "epoch": 0.4817866953302281, "grad_norm": 1.923742651939392, "learning_rate": 5.7587419547475286e-06, "loss": 0.8438, "step": 8068 }, { "epoch": 0.4818464110832438, "grad_norm": 1.639920711517334, "learning_rate": 5.7580784287704875e-06, "loss": 0.876, "step": 8069 }, { "epoch": 0.4819061268362594, "grad_norm": 3.1244521141052246, "learning_rate": 5.757414902793445e-06, "loss": 0.8676, "step": 8070 }, { "epoch": 0.48196584258927505, "grad_norm": 3.7412595748901367, "learning_rate": 5.756751376816403e-06, "loss": 0.8675, "step": 8071 }, { "epoch": 0.4820255583422907, "grad_norm": 2.0096516609191895, "learning_rate": 5.75608785083936e-06, "loss": 0.8758, "step": 8072 }, { "epoch": 0.4820852740953063, "grad_norm": 2.330897331237793, "learning_rate": 5.755424324862319e-06, "loss": 0.808, "step": 8073 }, { "epoch": 0.482144989848322, "grad_norm": 2.222585678100586, "learning_rate": 5.754760798885277e-06, "loss": 0.8484, "step": 8074 }, { "epoch": 0.48220470560133766, "grad_norm": 2.7000250816345215, "learning_rate": 5.754097272908234e-06, "loss": 0.895, "step": 8075 }, { "epoch": 0.48226442135435327, "grad_norm": 2.869256019592285, "learning_rate": 5.753433746931193e-06, "loss": 0.9309, "step": 8076 }, { "epoch": 0.48232413710736893, "grad_norm": 1.72825026512146, "learning_rate": 5.752770220954151e-06, "loss": 0.8286, "step": 8077 }, { "epoch": 0.48238385286038454, "grad_norm": 2.0337746143341064, "learning_rate": 5.752106694977109e-06, "loss": 0.8955, "step": 8078 }, { "epoch": 0.4824435686134002, "grad_norm": 2.7802751064300537, "learning_rate": 5.751443169000067e-06, "loss": 0.8785, "step": 8079 }, { "epoch": 0.4825032843664159, "grad_norm": 1.688410758972168, "learning_rate": 5.750779643023025e-06, "loss": 0.839, "step": 8080 }, { "epoch": 0.4825630001194315, "grad_norm": 1.7643073797225952, "learning_rate": 5.750116117045983e-06, "loss": 0.8425, "step": 8081 }, { "epoch": 0.48262271587244715, "grad_norm": 1.8050223588943481, "learning_rate": 5.74945259106894e-06, "loss": 0.8332, "step": 8082 }, { "epoch": 0.4826824316254628, "grad_norm": 5.313632965087891, "learning_rate": 5.748789065091898e-06, "loss": 0.8485, "step": 8083 }, { "epoch": 0.4827421473784784, "grad_norm": 2.6913373470306396, "learning_rate": 5.748125539114857e-06, "loss": 0.8489, "step": 8084 }, { "epoch": 0.4828018631314941, "grad_norm": 3.3289999961853027, "learning_rate": 5.7474620131378146e-06, "loss": 0.8896, "step": 8085 }, { "epoch": 0.48286157888450976, "grad_norm": 1.9843086004257202, "learning_rate": 5.746798487160773e-06, "loss": 0.864, "step": 8086 }, { "epoch": 0.48292129463752537, "grad_norm": 2.5083014965057373, "learning_rate": 5.74613496118373e-06, "loss": 0.8783, "step": 8087 }, { "epoch": 0.48298101039054103, "grad_norm": 3.3872008323669434, "learning_rate": 5.745471435206689e-06, "loss": 0.9, "step": 8088 }, { "epoch": 0.4830407261435567, "grad_norm": 2.0035717487335205, "learning_rate": 5.744807909229647e-06, "loss": 0.8613, "step": 8089 }, { "epoch": 0.4831004418965723, "grad_norm": 2.1178159713745117, "learning_rate": 5.744144383252604e-06, "loss": 0.8559, "step": 8090 }, { "epoch": 0.483160157649588, "grad_norm": 2.8554584980010986, "learning_rate": 5.743480857275563e-06, "loss": 0.8431, "step": 8091 }, { "epoch": 0.4832198734026036, "grad_norm": 1.791074514389038, "learning_rate": 5.7428173312985204e-06, "loss": 0.8562, "step": 8092 }, { "epoch": 0.48327958915561925, "grad_norm": 3.0981764793395996, "learning_rate": 5.7421538053214785e-06, "loss": 0.9221, "step": 8093 }, { "epoch": 0.4833393049086349, "grad_norm": 2.181407928466797, "learning_rate": 5.7414902793444375e-06, "loss": 0.8522, "step": 8094 }, { "epoch": 0.4833990206616505, "grad_norm": 3.020956516265869, "learning_rate": 5.740826753367395e-06, "loss": 0.8676, "step": 8095 }, { "epoch": 0.4834587364146662, "grad_norm": 3.45751690864563, "learning_rate": 5.740163227390353e-06, "loss": 0.8169, "step": 8096 }, { "epoch": 0.48351845216768186, "grad_norm": 1.6207202672958374, "learning_rate": 5.73949970141331e-06, "loss": 0.8427, "step": 8097 }, { "epoch": 0.48357816792069747, "grad_norm": 2.882619857788086, "learning_rate": 5.738836175436269e-06, "loss": 0.888, "step": 8098 }, { "epoch": 0.48363788367371313, "grad_norm": 2.0668230056762695, "learning_rate": 5.738172649459227e-06, "loss": 0.92, "step": 8099 }, { "epoch": 0.4836975994267288, "grad_norm": 1.6025652885437012, "learning_rate": 5.737509123482184e-06, "loss": 0.8424, "step": 8100 }, { "epoch": 0.4837573151797444, "grad_norm": 2.536482572555542, "learning_rate": 5.736845597505143e-06, "loss": 0.8256, "step": 8101 }, { "epoch": 0.4838170309327601, "grad_norm": 2.4316883087158203, "learning_rate": 5.736182071528101e-06, "loss": 0.8639, "step": 8102 }, { "epoch": 0.4838767466857757, "grad_norm": 3.289393424987793, "learning_rate": 5.735518545551059e-06, "loss": 0.8775, "step": 8103 }, { "epoch": 0.48393646243879135, "grad_norm": 14.270493507385254, "learning_rate": 5.734855019574017e-06, "loss": 0.8734, "step": 8104 }, { "epoch": 0.483996178191807, "grad_norm": 3.145148754119873, "learning_rate": 5.734191493596975e-06, "loss": 0.8832, "step": 8105 }, { "epoch": 0.4840558939448226, "grad_norm": 2.6306192874908447, "learning_rate": 5.733527967619933e-06, "loss": 0.8385, "step": 8106 }, { "epoch": 0.4841156096978383, "grad_norm": 2.7809274196624756, "learning_rate": 5.73286444164289e-06, "loss": 0.8636, "step": 8107 }, { "epoch": 0.48417532545085396, "grad_norm": 3.405046224594116, "learning_rate": 5.732200915665848e-06, "loss": 0.9119, "step": 8108 }, { "epoch": 0.48423504120386957, "grad_norm": 1.9878957271575928, "learning_rate": 5.731537389688807e-06, "loss": 0.8204, "step": 8109 }, { "epoch": 0.48429475695688523, "grad_norm": 2.8036556243896484, "learning_rate": 5.7308738637117645e-06, "loss": 0.8735, "step": 8110 }, { "epoch": 0.4843544727099009, "grad_norm": 2.1585793495178223, "learning_rate": 5.730210337734723e-06, "loss": 0.8618, "step": 8111 }, { "epoch": 0.4844141884629165, "grad_norm": 2.339660406112671, "learning_rate": 5.72954681175768e-06, "loss": 0.8634, "step": 8112 }, { "epoch": 0.48447390421593217, "grad_norm": 2.7594046592712402, "learning_rate": 5.728883285780639e-06, "loss": 0.8619, "step": 8113 }, { "epoch": 0.4845336199689478, "grad_norm": 2.3645436763763428, "learning_rate": 5.728219759803597e-06, "loss": 0.8892, "step": 8114 }, { "epoch": 0.48459333572196345, "grad_norm": 2.496351480484009, "learning_rate": 5.727556233826554e-06, "loss": 0.8345, "step": 8115 }, { "epoch": 0.4846530514749791, "grad_norm": 3.4749908447265625, "learning_rate": 5.726892707849513e-06, "loss": 0.8764, "step": 8116 }, { "epoch": 0.4847127672279947, "grad_norm": 1.8779025077819824, "learning_rate": 5.72622918187247e-06, "loss": 0.8481, "step": 8117 }, { "epoch": 0.4847724829810104, "grad_norm": 2.5562832355499268, "learning_rate": 5.7255656558954285e-06, "loss": 0.8427, "step": 8118 }, { "epoch": 0.48483219873402605, "grad_norm": 3.5747628211975098, "learning_rate": 5.7249021299183874e-06, "loss": 0.8678, "step": 8119 }, { "epoch": 0.48489191448704166, "grad_norm": 2.1412651538848877, "learning_rate": 5.724238603941345e-06, "loss": 0.882, "step": 8120 }, { "epoch": 0.48495163024005733, "grad_norm": 1.6901613473892212, "learning_rate": 5.723575077964303e-06, "loss": 0.8799, "step": 8121 }, { "epoch": 0.485011345993073, "grad_norm": 3.7891101837158203, "learning_rate": 5.72291155198726e-06, "loss": 0.8302, "step": 8122 }, { "epoch": 0.4850710617460886, "grad_norm": 2.8618392944335938, "learning_rate": 5.722248026010219e-06, "loss": 0.9043, "step": 8123 }, { "epoch": 0.48513077749910427, "grad_norm": 3.5845303535461426, "learning_rate": 5.721584500033177e-06, "loss": 0.8398, "step": 8124 }, { "epoch": 0.48519049325211994, "grad_norm": 2.4701809883117676, "learning_rate": 5.720920974056134e-06, "loss": 0.8587, "step": 8125 }, { "epoch": 0.48525020900513555, "grad_norm": 4.2707953453063965, "learning_rate": 5.720257448079093e-06, "loss": 0.8697, "step": 8126 }, { "epoch": 0.4853099247581512, "grad_norm": 2.0165185928344727, "learning_rate": 5.7195939221020506e-06, "loss": 0.8689, "step": 8127 }, { "epoch": 0.4853696405111668, "grad_norm": 2.946882963180542, "learning_rate": 5.718930396125009e-06, "loss": 0.8401, "step": 8128 }, { "epoch": 0.4854293562641825, "grad_norm": 2.149517059326172, "learning_rate": 5.718266870147967e-06, "loss": 0.8338, "step": 8129 }, { "epoch": 0.48548907201719815, "grad_norm": 2.0716280937194824, "learning_rate": 5.717603344170925e-06, "loss": 0.8691, "step": 8130 }, { "epoch": 0.48554878777021376, "grad_norm": 3.1003801822662354, "learning_rate": 5.716939818193883e-06, "loss": 0.8626, "step": 8131 }, { "epoch": 0.48560850352322943, "grad_norm": 2.4144959449768066, "learning_rate": 5.71627629221684e-06, "loss": 0.8183, "step": 8132 }, { "epoch": 0.4856682192762451, "grad_norm": 2.0428287982940674, "learning_rate": 5.715612766239798e-06, "loss": 0.8651, "step": 8133 }, { "epoch": 0.4857279350292607, "grad_norm": 2.6420652866363525, "learning_rate": 5.714949240262757e-06, "loss": 0.8295, "step": 8134 }, { "epoch": 0.48578765078227637, "grad_norm": 2.054884910583496, "learning_rate": 5.7142857142857145e-06, "loss": 0.8343, "step": 8135 }, { "epoch": 0.48584736653529204, "grad_norm": 1.8666037321090698, "learning_rate": 5.713622188308673e-06, "loss": 0.8811, "step": 8136 }, { "epoch": 0.48590708228830765, "grad_norm": 3.125596046447754, "learning_rate": 5.71295866233163e-06, "loss": 0.852, "step": 8137 }, { "epoch": 0.4859667980413233, "grad_norm": 3.2645037174224854, "learning_rate": 5.712295136354589e-06, "loss": 0.8678, "step": 8138 }, { "epoch": 0.4860265137943389, "grad_norm": 2.2422146797180176, "learning_rate": 5.711631610377547e-06, "loss": 0.8624, "step": 8139 }, { "epoch": 0.4860862295473546, "grad_norm": 1.9633959531784058, "learning_rate": 5.710968084400504e-06, "loss": 0.8585, "step": 8140 }, { "epoch": 0.48614594530037025, "grad_norm": 2.4833648204803467, "learning_rate": 5.710304558423463e-06, "loss": 0.8653, "step": 8141 }, { "epoch": 0.48620566105338586, "grad_norm": 2.4615983963012695, "learning_rate": 5.70964103244642e-06, "loss": 0.9069, "step": 8142 }, { "epoch": 0.48626537680640153, "grad_norm": 2.173177719116211, "learning_rate": 5.7089775064693785e-06, "loss": 0.8642, "step": 8143 }, { "epoch": 0.4863250925594172, "grad_norm": 2.6175663471221924, "learning_rate": 5.708313980492337e-06, "loss": 0.848, "step": 8144 }, { "epoch": 0.4863848083124328, "grad_norm": 2.2244873046875, "learning_rate": 5.707650454515295e-06, "loss": 0.8592, "step": 8145 }, { "epoch": 0.48644452406544847, "grad_norm": 2.2613577842712402, "learning_rate": 5.706986928538253e-06, "loss": 0.8735, "step": 8146 }, { "epoch": 0.48650423981846413, "grad_norm": 2.2246901988983154, "learning_rate": 5.70632340256121e-06, "loss": 0.8807, "step": 8147 }, { "epoch": 0.48656395557147974, "grad_norm": 2.0399584770202637, "learning_rate": 5.705659876584169e-06, "loss": 0.8697, "step": 8148 }, { "epoch": 0.4866236713244954, "grad_norm": 1.6558001041412354, "learning_rate": 5.704996350607127e-06, "loss": 0.8252, "step": 8149 }, { "epoch": 0.486683387077511, "grad_norm": 2.7855894565582275, "learning_rate": 5.704332824630084e-06, "loss": 0.8921, "step": 8150 }, { "epoch": 0.4867431028305267, "grad_norm": 2.3916468620300293, "learning_rate": 5.703669298653043e-06, "loss": 0.8701, "step": 8151 }, { "epoch": 0.48680281858354235, "grad_norm": 2.4894793033599854, "learning_rate": 5.7030057726760005e-06, "loss": 0.8883, "step": 8152 }, { "epoch": 0.48686253433655796, "grad_norm": 2.273411989212036, "learning_rate": 5.702342246698959e-06, "loss": 0.8716, "step": 8153 }, { "epoch": 0.4869222500895736, "grad_norm": 1.9591412544250488, "learning_rate": 5.701678720721917e-06, "loss": 0.8564, "step": 8154 }, { "epoch": 0.4869819658425893, "grad_norm": 6.62717866897583, "learning_rate": 5.701015194744875e-06, "loss": 0.8759, "step": 8155 }, { "epoch": 0.4870416815956049, "grad_norm": 2.297975540161133, "learning_rate": 5.700351668767833e-06, "loss": 0.916, "step": 8156 }, { "epoch": 0.48710139734862057, "grad_norm": 2.1032943725585938, "learning_rate": 5.69968814279079e-06, "loss": 0.8694, "step": 8157 }, { "epoch": 0.48716111310163623, "grad_norm": 2.2974374294281006, "learning_rate": 5.699024616813748e-06, "loss": 0.8967, "step": 8158 }, { "epoch": 0.48722082885465184, "grad_norm": 3.413120746612549, "learning_rate": 5.698361090836707e-06, "loss": 0.8883, "step": 8159 }, { "epoch": 0.4872805446076675, "grad_norm": 2.217897653579712, "learning_rate": 5.6976975648596645e-06, "loss": 0.8396, "step": 8160 }, { "epoch": 0.4873402603606832, "grad_norm": 1.8435817956924438, "learning_rate": 5.697034038882623e-06, "loss": 0.8636, "step": 8161 }, { "epoch": 0.4873999761136988, "grad_norm": 1.655226707458496, "learning_rate": 5.69637051290558e-06, "loss": 0.8662, "step": 8162 }, { "epoch": 0.48745969186671445, "grad_norm": 2.5184926986694336, "learning_rate": 5.695706986928539e-06, "loss": 0.8357, "step": 8163 }, { "epoch": 0.48751940761973006, "grad_norm": 1.80681312084198, "learning_rate": 5.695043460951497e-06, "loss": 0.8509, "step": 8164 }, { "epoch": 0.4875791233727457, "grad_norm": 2.1870293617248535, "learning_rate": 5.694379934974454e-06, "loss": 0.8677, "step": 8165 }, { "epoch": 0.4876388391257614, "grad_norm": 2.1000216007232666, "learning_rate": 5.693716408997413e-06, "loss": 0.8478, "step": 8166 }, { "epoch": 0.487698554878777, "grad_norm": 2.3358514308929443, "learning_rate": 5.69305288302037e-06, "loss": 0.8631, "step": 8167 }, { "epoch": 0.48775827063179267, "grad_norm": 2.136333703994751, "learning_rate": 5.6923893570433284e-06, "loss": 0.8606, "step": 8168 }, { "epoch": 0.48781798638480833, "grad_norm": 2.61623477935791, "learning_rate": 5.691725831066287e-06, "loss": 0.8495, "step": 8169 }, { "epoch": 0.48787770213782394, "grad_norm": 2.587996006011963, "learning_rate": 5.691062305089245e-06, "loss": 0.8719, "step": 8170 }, { "epoch": 0.4879374178908396, "grad_norm": 2.173427104949951, "learning_rate": 5.690398779112203e-06, "loss": 0.8597, "step": 8171 }, { "epoch": 0.4879971336438553, "grad_norm": 2.4855425357818604, "learning_rate": 5.68973525313516e-06, "loss": 0.8672, "step": 8172 }, { "epoch": 0.4880568493968709, "grad_norm": 2.5959744453430176, "learning_rate": 5.689071727158119e-06, "loss": 0.8798, "step": 8173 }, { "epoch": 0.48811656514988655, "grad_norm": 1.7867037057876587, "learning_rate": 5.688408201181077e-06, "loss": 0.8925, "step": 8174 }, { "epoch": 0.48817628090290216, "grad_norm": 3.886784553527832, "learning_rate": 5.687744675204034e-06, "loss": 0.8771, "step": 8175 }, { "epoch": 0.4882359966559178, "grad_norm": 2.084149122238159, "learning_rate": 5.687081149226993e-06, "loss": 0.864, "step": 8176 }, { "epoch": 0.4882957124089335, "grad_norm": 4.335297584533691, "learning_rate": 5.6864176232499505e-06, "loss": 0.8933, "step": 8177 }, { "epoch": 0.4883554281619491, "grad_norm": 1.646130919456482, "learning_rate": 5.685754097272909e-06, "loss": 0.8395, "step": 8178 }, { "epoch": 0.48841514391496477, "grad_norm": 2.0525963306427, "learning_rate": 5.685090571295867e-06, "loss": 0.8419, "step": 8179 }, { "epoch": 0.48847485966798043, "grad_norm": 2.104400157928467, "learning_rate": 5.684427045318825e-06, "loss": 0.837, "step": 8180 }, { "epoch": 0.48853457542099604, "grad_norm": 2.399308443069458, "learning_rate": 5.683763519341783e-06, "loss": 0.9119, "step": 8181 }, { "epoch": 0.4885942911740117, "grad_norm": 3.4364707469940186, "learning_rate": 5.68309999336474e-06, "loss": 0.8906, "step": 8182 }, { "epoch": 0.4886540069270274, "grad_norm": 3.6872949600219727, "learning_rate": 5.682436467387698e-06, "loss": 0.8868, "step": 8183 }, { "epoch": 0.488713722680043, "grad_norm": 2.682379961013794, "learning_rate": 5.681772941410657e-06, "loss": 0.8613, "step": 8184 }, { "epoch": 0.48877343843305865, "grad_norm": 2.8696751594543457, "learning_rate": 5.6811094154336145e-06, "loss": 0.8831, "step": 8185 }, { "epoch": 0.48883315418607426, "grad_norm": 2.1189048290252686, "learning_rate": 5.6804458894565726e-06, "loss": 0.8597, "step": 8186 }, { "epoch": 0.4888928699390899, "grad_norm": 2.030134439468384, "learning_rate": 5.67978236347953e-06, "loss": 0.8281, "step": 8187 }, { "epoch": 0.4889525856921056, "grad_norm": 2.7237119674682617, "learning_rate": 5.679118837502489e-06, "loss": 0.8717, "step": 8188 }, { "epoch": 0.4890123014451212, "grad_norm": 3.363434314727783, "learning_rate": 5.678455311525447e-06, "loss": 0.8305, "step": 8189 }, { "epoch": 0.48907201719813687, "grad_norm": 2.077460765838623, "learning_rate": 5.677791785548404e-06, "loss": 0.8485, "step": 8190 }, { "epoch": 0.48913173295115253, "grad_norm": 1.9885976314544678, "learning_rate": 5.677128259571363e-06, "loss": 0.8924, "step": 8191 }, { "epoch": 0.48919144870416814, "grad_norm": 2.1603212356567383, "learning_rate": 5.67646473359432e-06, "loss": 0.8499, "step": 8192 }, { "epoch": 0.4892511644571838, "grad_norm": 2.2421436309814453, "learning_rate": 5.675801207617278e-06, "loss": 0.8232, "step": 8193 }, { "epoch": 0.4893108802101995, "grad_norm": 2.267641544342041, "learning_rate": 5.675137681640237e-06, "loss": 0.8768, "step": 8194 }, { "epoch": 0.4893705959632151, "grad_norm": 1.8068783283233643, "learning_rate": 5.674474155663195e-06, "loss": 0.8421, "step": 8195 }, { "epoch": 0.48943031171623075, "grad_norm": 2.007673501968384, "learning_rate": 5.673810629686153e-06, "loss": 0.8365, "step": 8196 }, { "epoch": 0.4894900274692464, "grad_norm": 1.7699259519577026, "learning_rate": 5.67314710370911e-06, "loss": 0.8547, "step": 8197 }, { "epoch": 0.489549743222262, "grad_norm": 1.673822045326233, "learning_rate": 5.672483577732069e-06, "loss": 0.875, "step": 8198 }, { "epoch": 0.4896094589752777, "grad_norm": 11.093353271484375, "learning_rate": 5.671820051755027e-06, "loss": 0.8291, "step": 8199 }, { "epoch": 0.4896691747282933, "grad_norm": 2.087191343307495, "learning_rate": 5.671156525777984e-06, "loss": 0.8821, "step": 8200 }, { "epoch": 0.4896691747282933, "eval_text_loss": 0.9161249995231628, "eval_text_runtime": 15.1894, "eval_text_samples_per_second": 263.342, "eval_text_steps_per_second": 0.527, "step": 8200 }, { "epoch": 0.4896691747282933, "eval_image_loss": 0.6301747560501099, "eval_image_runtime": 5.0044, "eval_image_samples_per_second": 799.297, "eval_image_steps_per_second": 1.599, "step": 8200 }, { "epoch": 0.4896691747282933, "eval_video_loss": 1.07363760471344, "eval_video_runtime": 76.3194, "eval_video_samples_per_second": 52.411, "eval_video_steps_per_second": 0.105, "step": 8200 }, { "epoch": 0.48972889048130896, "grad_norm": 1.9058347940444946, "learning_rate": 5.670492999800943e-06, "loss": 0.8735, "step": 8201 }, { "epoch": 0.48978860623432463, "grad_norm": 1.9562805891036987, "learning_rate": 5.6698294738239005e-06, "loss": 0.8649, "step": 8202 }, { "epoch": 0.48984832198734024, "grad_norm": 2.1004908084869385, "learning_rate": 5.6691659478468586e-06, "loss": 0.8558, "step": 8203 }, { "epoch": 0.4899080377403559, "grad_norm": 1.643771767616272, "learning_rate": 5.668502421869817e-06, "loss": 0.857, "step": 8204 }, { "epoch": 0.48996775349337157, "grad_norm": 2.3523035049438477, "learning_rate": 5.667838895892775e-06, "loss": 0.8872, "step": 8205 }, { "epoch": 0.4900274692463872, "grad_norm": 2.2153565883636475, "learning_rate": 5.667175369915733e-06, "loss": 0.8292, "step": 8206 }, { "epoch": 0.49008718499940285, "grad_norm": 3.052762508392334, "learning_rate": 5.66651184393869e-06, "loss": 0.8941, "step": 8207 }, { "epoch": 0.4901469007524185, "grad_norm": 2.749520778656006, "learning_rate": 5.665848317961648e-06, "loss": 0.8814, "step": 8208 }, { "epoch": 0.4902066165054341, "grad_norm": 2.179581642150879, "learning_rate": 5.665184791984607e-06, "loss": 0.8738, "step": 8209 }, { "epoch": 0.4902663322584498, "grad_norm": 2.065009117126465, "learning_rate": 5.6645212660075644e-06, "loss": 0.8825, "step": 8210 }, { "epoch": 0.4903260480114654, "grad_norm": 1.954010248184204, "learning_rate": 5.6638577400305225e-06, "loss": 0.8762, "step": 8211 }, { "epoch": 0.49038576376448106, "grad_norm": 1.9604146480560303, "learning_rate": 5.66319421405348e-06, "loss": 0.8637, "step": 8212 }, { "epoch": 0.49044547951749673, "grad_norm": 1.8145692348480225, "learning_rate": 5.662530688076439e-06, "loss": 0.8492, "step": 8213 }, { "epoch": 0.49050519527051234, "grad_norm": 2.9269423484802246, "learning_rate": 5.661867162099397e-06, "loss": 0.8598, "step": 8214 }, { "epoch": 0.490564911023528, "grad_norm": 1.7282333374023438, "learning_rate": 5.661203636122354e-06, "loss": 0.8278, "step": 8215 }, { "epoch": 0.49062462677654367, "grad_norm": 1.8751163482666016, "learning_rate": 5.660540110145313e-06, "loss": 0.851, "step": 8216 }, { "epoch": 0.4906843425295593, "grad_norm": 2.1834964752197266, "learning_rate": 5.65987658416827e-06, "loss": 0.8674, "step": 8217 }, { "epoch": 0.49074405828257495, "grad_norm": 1.8006106615066528, "learning_rate": 5.659213058191228e-06, "loss": 0.8744, "step": 8218 }, { "epoch": 0.4908037740355906, "grad_norm": 3.511692523956299, "learning_rate": 5.658549532214187e-06, "loss": 0.8826, "step": 8219 }, { "epoch": 0.4908634897886062, "grad_norm": 2.9643969535827637, "learning_rate": 5.657886006237145e-06, "loss": 0.8319, "step": 8220 }, { "epoch": 0.4909232055416219, "grad_norm": 1.8676152229309082, "learning_rate": 5.657222480260103e-06, "loss": 0.8368, "step": 8221 }, { "epoch": 0.4909829212946375, "grad_norm": 1.917784571647644, "learning_rate": 5.65655895428306e-06, "loss": 0.8832, "step": 8222 }, { "epoch": 0.49104263704765316, "grad_norm": 1.7165402173995972, "learning_rate": 5.655895428306019e-06, "loss": 0.8589, "step": 8223 }, { "epoch": 0.49110235280066883, "grad_norm": 2.9794888496398926, "learning_rate": 5.655231902328977e-06, "loss": 0.8536, "step": 8224 }, { "epoch": 0.49116206855368444, "grad_norm": 2.0357823371887207, "learning_rate": 5.654568376351934e-06, "loss": 0.8198, "step": 8225 }, { "epoch": 0.4912217843067001, "grad_norm": 2.2911217212677, "learning_rate": 5.653904850374893e-06, "loss": 0.8804, "step": 8226 }, { "epoch": 0.49128150005971577, "grad_norm": 1.971076250076294, "learning_rate": 5.6532413243978504e-06, "loss": 0.8201, "step": 8227 }, { "epoch": 0.4913412158127314, "grad_norm": 1.5658793449401855, "learning_rate": 5.6525777984208085e-06, "loss": 0.8348, "step": 8228 }, { "epoch": 0.49140093156574705, "grad_norm": 2.0633583068847656, "learning_rate": 5.651914272443767e-06, "loss": 0.8658, "step": 8229 }, { "epoch": 0.4914606473187627, "grad_norm": 2.726144552230835, "learning_rate": 5.651250746466725e-06, "loss": 0.9238, "step": 8230 }, { "epoch": 0.4915203630717783, "grad_norm": 5.912776947021484, "learning_rate": 5.650587220489683e-06, "loss": 0.8594, "step": 8231 }, { "epoch": 0.491580078824794, "grad_norm": 2.1898422241210938, "learning_rate": 5.64992369451264e-06, "loss": 0.8986, "step": 8232 }, { "epoch": 0.49163979457780965, "grad_norm": 1.7762506008148193, "learning_rate": 5.649260168535598e-06, "loss": 0.8569, "step": 8233 }, { "epoch": 0.49169951033082526, "grad_norm": 6.253693580627441, "learning_rate": 5.648596642558557e-06, "loss": 0.8867, "step": 8234 }, { "epoch": 0.49175922608384093, "grad_norm": 2.0908186435699463, "learning_rate": 5.647933116581514e-06, "loss": 0.8589, "step": 8235 }, { "epoch": 0.49181894183685654, "grad_norm": 2.5491700172424316, "learning_rate": 5.6472695906044725e-06, "loss": 0.8289, "step": 8236 }, { "epoch": 0.4918786575898722, "grad_norm": 1.8398106098175049, "learning_rate": 5.64660606462743e-06, "loss": 0.87, "step": 8237 }, { "epoch": 0.49193837334288787, "grad_norm": 2.2677581310272217, "learning_rate": 5.645942538650389e-06, "loss": 0.8129, "step": 8238 }, { "epoch": 0.4919980890959035, "grad_norm": 2.053985595703125, "learning_rate": 5.645279012673347e-06, "loss": 0.8732, "step": 8239 }, { "epoch": 0.49205780484891914, "grad_norm": 2.3463633060455322, "learning_rate": 5.644615486696304e-06, "loss": 0.8533, "step": 8240 }, { "epoch": 0.4921175206019348, "grad_norm": 5.0934648513793945, "learning_rate": 5.643951960719263e-06, "loss": 0.8185, "step": 8241 }, { "epoch": 0.4921772363549504, "grad_norm": 1.9110479354858398, "learning_rate": 5.64328843474222e-06, "loss": 0.865, "step": 8242 }, { "epoch": 0.4922369521079661, "grad_norm": 3.5167064666748047, "learning_rate": 5.642624908765178e-06, "loss": 0.8564, "step": 8243 }, { "epoch": 0.49229666786098175, "grad_norm": 2.198725700378418, "learning_rate": 5.641961382788137e-06, "loss": 0.8727, "step": 8244 }, { "epoch": 0.49235638361399736, "grad_norm": 2.6439199447631836, "learning_rate": 5.6412978568110946e-06, "loss": 0.8471, "step": 8245 }, { "epoch": 0.492416099367013, "grad_norm": 2.236469268798828, "learning_rate": 5.640634330834053e-06, "loss": 0.8662, "step": 8246 }, { "epoch": 0.49247581512002864, "grad_norm": 2.2681241035461426, "learning_rate": 5.63997080485701e-06, "loss": 0.8435, "step": 8247 }, { "epoch": 0.4925355308730443, "grad_norm": 1.9201678037643433, "learning_rate": 5.639307278879969e-06, "loss": 0.868, "step": 8248 }, { "epoch": 0.49259524662605997, "grad_norm": 2.1523053646087646, "learning_rate": 5.638643752902927e-06, "loss": 0.8884, "step": 8249 }, { "epoch": 0.4926549623790756, "grad_norm": 1.6521830558776855, "learning_rate": 5.637980226925884e-06, "loss": 0.863, "step": 8250 }, { "epoch": 0.49271467813209124, "grad_norm": 5.518657207489014, "learning_rate": 5.637316700948843e-06, "loss": 0.8754, "step": 8251 }, { "epoch": 0.4927743938851069, "grad_norm": 2.481339931488037, "learning_rate": 5.6366531749718e-06, "loss": 0.8958, "step": 8252 }, { "epoch": 0.4928341096381225, "grad_norm": 1.8050792217254639, "learning_rate": 5.6359896489947585e-06, "loss": 0.8692, "step": 8253 }, { "epoch": 0.4928938253911382, "grad_norm": 2.520979881286621, "learning_rate": 5.635326123017717e-06, "loss": 0.859, "step": 8254 }, { "epoch": 0.49295354114415385, "grad_norm": 2.2755260467529297, "learning_rate": 5.634662597040675e-06, "loss": 0.8614, "step": 8255 }, { "epoch": 0.49301325689716946, "grad_norm": 1.959062933921814, "learning_rate": 5.633999071063633e-06, "loss": 0.8693, "step": 8256 }, { "epoch": 0.4930729726501851, "grad_norm": 3.0367729663848877, "learning_rate": 5.63333554508659e-06, "loss": 0.8498, "step": 8257 }, { "epoch": 0.4931326884032008, "grad_norm": 1.9266444444656372, "learning_rate": 5.632672019109548e-06, "loss": 0.8697, "step": 8258 }, { "epoch": 0.4931924041562164, "grad_norm": 2.3443171977996826, "learning_rate": 5.632008493132507e-06, "loss": 0.8889, "step": 8259 }, { "epoch": 0.49325211990923207, "grad_norm": 1.8137246370315552, "learning_rate": 5.631344967155464e-06, "loss": 0.842, "step": 8260 }, { "epoch": 0.4933118356622477, "grad_norm": 3.281205892562866, "learning_rate": 5.6306814411784225e-06, "loss": 0.8672, "step": 8261 }, { "epoch": 0.49337155141526334, "grad_norm": 2.339216947555542, "learning_rate": 5.63001791520138e-06, "loss": 0.8524, "step": 8262 }, { "epoch": 0.493431267168279, "grad_norm": 9.307194709777832, "learning_rate": 5.629354389224339e-06, "loss": 0.8392, "step": 8263 }, { "epoch": 0.4934909829212946, "grad_norm": 1.7834978103637695, "learning_rate": 5.628690863247297e-06, "loss": 0.8571, "step": 8264 }, { "epoch": 0.4935506986743103, "grad_norm": 2.4200234413146973, "learning_rate": 5.628027337270254e-06, "loss": 0.8514, "step": 8265 }, { "epoch": 0.49361041442732595, "grad_norm": 2.5527408123016357, "learning_rate": 5.627363811293213e-06, "loss": 0.8154, "step": 8266 }, { "epoch": 0.49367013018034156, "grad_norm": 1.845957636833191, "learning_rate": 5.62670028531617e-06, "loss": 0.8525, "step": 8267 }, { "epoch": 0.4937298459333572, "grad_norm": 1.970093846321106, "learning_rate": 5.626036759339128e-06, "loss": 0.8296, "step": 8268 }, { "epoch": 0.4937895616863729, "grad_norm": 2.3883185386657715, "learning_rate": 5.625373233362087e-06, "loss": 0.877, "step": 8269 }, { "epoch": 0.4938492774393885, "grad_norm": 1.955181360244751, "learning_rate": 5.6247097073850445e-06, "loss": 0.8246, "step": 8270 }, { "epoch": 0.49390899319240417, "grad_norm": 2.105900287628174, "learning_rate": 5.624046181408003e-06, "loss": 0.8823, "step": 8271 }, { "epoch": 0.4939687089454198, "grad_norm": 1.8598816394805908, "learning_rate": 5.62338265543096e-06, "loss": 0.8554, "step": 8272 }, { "epoch": 0.49402842469843544, "grad_norm": 1.901106357574463, "learning_rate": 5.622719129453919e-06, "loss": 0.8725, "step": 8273 }, { "epoch": 0.4940881404514511, "grad_norm": 1.9156019687652588, "learning_rate": 5.622055603476877e-06, "loss": 0.8391, "step": 8274 }, { "epoch": 0.4941478562044667, "grad_norm": 2.635620355606079, "learning_rate": 5.621392077499834e-06, "loss": 0.859, "step": 8275 }, { "epoch": 0.4942075719574824, "grad_norm": 2.6172449588775635, "learning_rate": 5.620728551522793e-06, "loss": 0.8329, "step": 8276 }, { "epoch": 0.49426728771049805, "grad_norm": 2.4115395545959473, "learning_rate": 5.62006502554575e-06, "loss": 0.8324, "step": 8277 }, { "epoch": 0.49432700346351366, "grad_norm": 2.4879846572875977, "learning_rate": 5.6194014995687085e-06, "loss": 0.843, "step": 8278 }, { "epoch": 0.4943867192165293, "grad_norm": 3.0377697944641113, "learning_rate": 5.618737973591667e-06, "loss": 0.8793, "step": 8279 }, { "epoch": 0.494446434969545, "grad_norm": 1.7792848348617554, "learning_rate": 5.618074447614625e-06, "loss": 0.8214, "step": 8280 }, { "epoch": 0.4945061507225606, "grad_norm": 2.372002363204956, "learning_rate": 5.617410921637583e-06, "loss": 0.8926, "step": 8281 }, { "epoch": 0.49456586647557627, "grad_norm": 1.9001657962799072, "learning_rate": 5.61674739566054e-06, "loss": 0.8717, "step": 8282 }, { "epoch": 0.4946255822285919, "grad_norm": 2.8165245056152344, "learning_rate": 5.616083869683498e-06, "loss": 0.8831, "step": 8283 }, { "epoch": 0.49468529798160754, "grad_norm": 1.586600661277771, "learning_rate": 5.615420343706457e-06, "loss": 0.8503, "step": 8284 }, { "epoch": 0.4947450137346232, "grad_norm": 2.0944206714630127, "learning_rate": 5.614756817729414e-06, "loss": 0.8676, "step": 8285 }, { "epoch": 0.4948047294876388, "grad_norm": 1.6600788831710815, "learning_rate": 5.6140932917523724e-06, "loss": 0.8684, "step": 8286 }, { "epoch": 0.4948644452406545, "grad_norm": 1.7625621557235718, "learning_rate": 5.61342976577533e-06, "loss": 0.867, "step": 8287 }, { "epoch": 0.49492416099367015, "grad_norm": 1.5246267318725586, "learning_rate": 5.612766239798289e-06, "loss": 0.8193, "step": 8288 }, { "epoch": 0.49498387674668576, "grad_norm": 3.6695899963378906, "learning_rate": 5.612102713821247e-06, "loss": 0.873, "step": 8289 }, { "epoch": 0.4950435924997014, "grad_norm": 1.956023931503296, "learning_rate": 5.611439187844204e-06, "loss": 0.8515, "step": 8290 }, { "epoch": 0.4951033082527171, "grad_norm": 2.173794984817505, "learning_rate": 5.610775661867163e-06, "loss": 0.8583, "step": 8291 }, { "epoch": 0.4951630240057327, "grad_norm": 2.4680724143981934, "learning_rate": 5.61011213589012e-06, "loss": 0.8871, "step": 8292 }, { "epoch": 0.49522273975874836, "grad_norm": 2.6833901405334473, "learning_rate": 5.609448609913078e-06, "loss": 0.9195, "step": 8293 }, { "epoch": 0.49528245551176403, "grad_norm": 3.064760684967041, "learning_rate": 5.608785083936037e-06, "loss": 0.8429, "step": 8294 }, { "epoch": 0.49534217126477964, "grad_norm": 1.6709544658660889, "learning_rate": 5.6081215579589945e-06, "loss": 0.874, "step": 8295 }, { "epoch": 0.4954018870177953, "grad_norm": 1.8023290634155273, "learning_rate": 5.607458031981953e-06, "loss": 0.8382, "step": 8296 }, { "epoch": 0.4954616027708109, "grad_norm": 3.049975872039795, "learning_rate": 5.60679450600491e-06, "loss": 0.8823, "step": 8297 }, { "epoch": 0.4955213185238266, "grad_norm": 3.6378722190856934, "learning_rate": 5.606130980027869e-06, "loss": 0.8827, "step": 8298 }, { "epoch": 0.49558103427684225, "grad_norm": 9.992852210998535, "learning_rate": 5.605467454050827e-06, "loss": 0.8334, "step": 8299 }, { "epoch": 0.49564075002985786, "grad_norm": 1.9876912832260132, "learning_rate": 5.604803928073784e-06, "loss": 0.886, "step": 8300 }, { "epoch": 0.4957004657828735, "grad_norm": 1.651438593864441, "learning_rate": 5.604140402096743e-06, "loss": 0.8697, "step": 8301 }, { "epoch": 0.4957601815358892, "grad_norm": 2.33432936668396, "learning_rate": 5.6034768761197e-06, "loss": 0.9028, "step": 8302 }, { "epoch": 0.4958198972889048, "grad_norm": 2.608201742172241, "learning_rate": 5.6028133501426585e-06, "loss": 0.8351, "step": 8303 }, { "epoch": 0.49587961304192046, "grad_norm": 2.568932294845581, "learning_rate": 5.6021498241656166e-06, "loss": 0.8569, "step": 8304 }, { "epoch": 0.49593932879493613, "grad_norm": 2.393012762069702, "learning_rate": 5.601486298188575e-06, "loss": 0.857, "step": 8305 }, { "epoch": 0.49599904454795174, "grad_norm": 2.5894036293029785, "learning_rate": 5.600822772211533e-06, "loss": 0.8442, "step": 8306 }, { "epoch": 0.4960587603009674, "grad_norm": 2.3299121856689453, "learning_rate": 5.60015924623449e-06, "loss": 0.8417, "step": 8307 }, { "epoch": 0.496118476053983, "grad_norm": 2.482903003692627, "learning_rate": 5.599495720257448e-06, "loss": 0.8454, "step": 8308 }, { "epoch": 0.4961781918069987, "grad_norm": 1.9036028385162354, "learning_rate": 5.598832194280407e-06, "loss": 0.8418, "step": 8309 }, { "epoch": 0.49623790756001435, "grad_norm": 1.8905152082443237, "learning_rate": 5.598168668303364e-06, "loss": 0.863, "step": 8310 }, { "epoch": 0.49629762331302996, "grad_norm": 2.601384401321411, "learning_rate": 5.597505142326322e-06, "loss": 0.8363, "step": 8311 }, { "epoch": 0.4963573390660456, "grad_norm": 2.7158451080322266, "learning_rate": 5.59684161634928e-06, "loss": 0.8348, "step": 8312 }, { "epoch": 0.4964170548190613, "grad_norm": 2.085942268371582, "learning_rate": 5.596178090372239e-06, "loss": 0.8581, "step": 8313 }, { "epoch": 0.4964767705720769, "grad_norm": 3.185891628265381, "learning_rate": 5.595514564395197e-06, "loss": 0.8585, "step": 8314 }, { "epoch": 0.49653648632509256, "grad_norm": 2.7849831581115723, "learning_rate": 5.594851038418154e-06, "loss": 0.8556, "step": 8315 }, { "epoch": 0.49659620207810823, "grad_norm": 2.6902031898498535, "learning_rate": 5.594187512441113e-06, "loss": 0.8338, "step": 8316 }, { "epoch": 0.49665591783112384, "grad_norm": 2.144005298614502, "learning_rate": 5.59352398646407e-06, "loss": 0.8431, "step": 8317 }, { "epoch": 0.4967156335841395, "grad_norm": 1.592427134513855, "learning_rate": 5.592860460487028e-06, "loss": 0.8466, "step": 8318 }, { "epoch": 0.4967753493371551, "grad_norm": 1.8189526796340942, "learning_rate": 5.592196934509987e-06, "loss": 0.8821, "step": 8319 }, { "epoch": 0.4968350650901708, "grad_norm": 2.3260555267333984, "learning_rate": 5.5915334085329445e-06, "loss": 0.8595, "step": 8320 }, { "epoch": 0.49689478084318645, "grad_norm": 1.711814045906067, "learning_rate": 5.5908698825559026e-06, "loss": 0.8609, "step": 8321 }, { "epoch": 0.49695449659620206, "grad_norm": 1.6903260946273804, "learning_rate": 5.59020635657886e-06, "loss": 0.8537, "step": 8322 }, { "epoch": 0.4970142123492177, "grad_norm": 2.667259693145752, "learning_rate": 5.589542830601819e-06, "loss": 0.8994, "step": 8323 }, { "epoch": 0.4970739281022334, "grad_norm": 1.961807131767273, "learning_rate": 5.588879304624777e-06, "loss": 0.8415, "step": 8324 }, { "epoch": 0.497133643855249, "grad_norm": 3.4429733753204346, "learning_rate": 5.588215778647734e-06, "loss": 0.8455, "step": 8325 }, { "epoch": 0.49719335960826466, "grad_norm": 2.269023895263672, "learning_rate": 5.587552252670693e-06, "loss": 0.8228, "step": 8326 }, { "epoch": 0.49725307536128033, "grad_norm": 1.944472074508667, "learning_rate": 5.58688872669365e-06, "loss": 0.8464, "step": 8327 }, { "epoch": 0.49731279111429594, "grad_norm": 2.112326145172119, "learning_rate": 5.586225200716608e-06, "loss": 0.8713, "step": 8328 }, { "epoch": 0.4973725068673116, "grad_norm": 1.7689921855926514, "learning_rate": 5.5855616747395665e-06, "loss": 0.8486, "step": 8329 }, { "epoch": 0.49743222262032727, "grad_norm": 1.7919988632202148, "learning_rate": 5.584898148762525e-06, "loss": 0.8251, "step": 8330 }, { "epoch": 0.4974919383733429, "grad_norm": 5.31292200088501, "learning_rate": 5.584234622785483e-06, "loss": 0.8631, "step": 8331 }, { "epoch": 0.49755165412635854, "grad_norm": 2.605903148651123, "learning_rate": 5.58357109680844e-06, "loss": 0.8645, "step": 8332 }, { "epoch": 0.49761136987937415, "grad_norm": 2.6911208629608154, "learning_rate": 5.582907570831398e-06, "loss": 0.8642, "step": 8333 }, { "epoch": 0.4976710856323898, "grad_norm": 5.236682415008545, "learning_rate": 5.582244044854357e-06, "loss": 0.8284, "step": 8334 }, { "epoch": 0.4977308013854055, "grad_norm": 3.583146572113037, "learning_rate": 5.581580518877314e-06, "loss": 0.8765, "step": 8335 }, { "epoch": 0.4977905171384211, "grad_norm": 3.9908812046051025, "learning_rate": 5.580916992900272e-06, "loss": 0.8674, "step": 8336 }, { "epoch": 0.49785023289143676, "grad_norm": 2.396509885787964, "learning_rate": 5.58025346692323e-06, "loss": 0.878, "step": 8337 }, { "epoch": 0.4979099486444524, "grad_norm": 1.6262457370758057, "learning_rate": 5.579589940946189e-06, "loss": 0.8658, "step": 8338 }, { "epoch": 0.49796966439746804, "grad_norm": 1.9921427965164185, "learning_rate": 5.578926414969147e-06, "loss": 0.8523, "step": 8339 }, { "epoch": 0.4980293801504837, "grad_norm": 3.2074766159057617, "learning_rate": 5.578262888992104e-06, "loss": 0.8752, "step": 8340 }, { "epoch": 0.49808909590349937, "grad_norm": 2.522691249847412, "learning_rate": 5.577599363015063e-06, "loss": 0.9361, "step": 8341 }, { "epoch": 0.498148811656515, "grad_norm": 2.5284230709075928, "learning_rate": 5.57693583703802e-06, "loss": 0.9057, "step": 8342 }, { "epoch": 0.49820852740953064, "grad_norm": 1.975077748298645, "learning_rate": 5.576272311060978e-06, "loss": 0.8708, "step": 8343 }, { "epoch": 0.49826824316254625, "grad_norm": 4.735520362854004, "learning_rate": 5.575608785083937e-06, "loss": 0.8068, "step": 8344 }, { "epoch": 0.4983279589155619, "grad_norm": 1.637375831604004, "learning_rate": 5.5749452591068944e-06, "loss": 0.8484, "step": 8345 }, { "epoch": 0.4983876746685776, "grad_norm": 5.033210277557373, "learning_rate": 5.5742817331298525e-06, "loss": 0.8518, "step": 8346 }, { "epoch": 0.4984473904215932, "grad_norm": 2.031649112701416, "learning_rate": 5.57361820715281e-06, "loss": 0.8921, "step": 8347 }, { "epoch": 0.49850710617460886, "grad_norm": 1.642454743385315, "learning_rate": 5.572954681175769e-06, "loss": 0.8138, "step": 8348 }, { "epoch": 0.4985668219276245, "grad_norm": 1.7256810665130615, "learning_rate": 5.572291155198727e-06, "loss": 0.8655, "step": 8349 }, { "epoch": 0.49862653768064014, "grad_norm": 2.614596128463745, "learning_rate": 5.571627629221684e-06, "loss": 0.8716, "step": 8350 }, { "epoch": 0.4986862534336558, "grad_norm": 3.341231107711792, "learning_rate": 5.570964103244643e-06, "loss": 0.8715, "step": 8351 }, { "epoch": 0.49874596918667147, "grad_norm": 2.88093900680542, "learning_rate": 5.5703005772676e-06, "loss": 0.8656, "step": 8352 }, { "epoch": 0.4988056849396871, "grad_norm": 1.8252619504928589, "learning_rate": 5.569637051290558e-06, "loss": 0.8088, "step": 8353 }, { "epoch": 0.49886540069270274, "grad_norm": 1.683589220046997, "learning_rate": 5.5689735253135165e-06, "loss": 0.8665, "step": 8354 }, { "epoch": 0.49892511644571835, "grad_norm": 2.4561707973480225, "learning_rate": 5.568309999336475e-06, "loss": 0.8762, "step": 8355 }, { "epoch": 0.498984832198734, "grad_norm": 2.06054425239563, "learning_rate": 5.567646473359433e-06, "loss": 0.8465, "step": 8356 }, { "epoch": 0.4990445479517497, "grad_norm": 1.6533033847808838, "learning_rate": 5.56698294738239e-06, "loss": 0.8442, "step": 8357 }, { "epoch": 0.4991042637047653, "grad_norm": 4.229533672332764, "learning_rate": 5.566319421405348e-06, "loss": 0.8737, "step": 8358 }, { "epoch": 0.49916397945778096, "grad_norm": 2.121480941772461, "learning_rate": 5.565655895428307e-06, "loss": 0.8831, "step": 8359 }, { "epoch": 0.4992236952107966, "grad_norm": 2.9357030391693115, "learning_rate": 5.564992369451264e-06, "loss": 0.8614, "step": 8360 }, { "epoch": 0.49928341096381224, "grad_norm": 1.8719731569290161, "learning_rate": 5.564328843474222e-06, "loss": 0.8504, "step": 8361 }, { "epoch": 0.4993431267168279, "grad_norm": 2.9145584106445312, "learning_rate": 5.56366531749718e-06, "loss": 0.8549, "step": 8362 }, { "epoch": 0.49940284246984357, "grad_norm": 2.4542815685272217, "learning_rate": 5.5630017915201386e-06, "loss": 0.8657, "step": 8363 }, { "epoch": 0.4994625582228592, "grad_norm": 2.555535078048706, "learning_rate": 5.562338265543097e-06, "loss": 0.8698, "step": 8364 }, { "epoch": 0.49952227397587484, "grad_norm": 1.6479010581970215, "learning_rate": 5.561674739566054e-06, "loss": 0.8264, "step": 8365 }, { "epoch": 0.4995819897288905, "grad_norm": 1.8207294940948486, "learning_rate": 5.561011213589013e-06, "loss": 0.8594, "step": 8366 }, { "epoch": 0.4996417054819061, "grad_norm": 1.6410905122756958, "learning_rate": 5.56034768761197e-06, "loss": 0.8741, "step": 8367 }, { "epoch": 0.4997014212349218, "grad_norm": 2.5415146350860596, "learning_rate": 5.559684161634928e-06, "loss": 0.9007, "step": 8368 }, { "epoch": 0.4997611369879374, "grad_norm": 1.9343690872192383, "learning_rate": 5.559020635657887e-06, "loss": 0.867, "step": 8369 }, { "epoch": 0.49982085274095306, "grad_norm": 2.1744768619537354, "learning_rate": 5.558357109680844e-06, "loss": 0.819, "step": 8370 }, { "epoch": 0.4998805684939687, "grad_norm": 1.767181396484375, "learning_rate": 5.5576935837038025e-06, "loss": 0.854, "step": 8371 }, { "epoch": 0.49994028424698433, "grad_norm": 3.0436792373657227, "learning_rate": 5.55703005772676e-06, "loss": 0.8583, "step": 8372 }, { "epoch": 0.5, "grad_norm": 5.877686023712158, "learning_rate": 5.556366531749719e-06, "loss": 0.8922, "step": 8373 }, { "epoch": 0.5000597157530157, "grad_norm": 2.433722734451294, "learning_rate": 5.555703005772677e-06, "loss": 0.8434, "step": 8374 }, { "epoch": 0.5001194315060313, "grad_norm": 1.6744500398635864, "learning_rate": 5.555039479795634e-06, "loss": 0.8854, "step": 8375 }, { "epoch": 0.5001791472590469, "grad_norm": 2.0003502368927, "learning_rate": 5.554375953818593e-06, "loss": 0.828, "step": 8376 }, { "epoch": 0.5002388630120626, "grad_norm": 1.6573256254196167, "learning_rate": 5.55371242784155e-06, "loss": 0.8997, "step": 8377 }, { "epoch": 0.5002985787650782, "grad_norm": 2.0101022720336914, "learning_rate": 5.553048901864508e-06, "loss": 0.8766, "step": 8378 }, { "epoch": 0.5003582945180939, "grad_norm": 5.053191184997559, "learning_rate": 5.5523853758874665e-06, "loss": 0.8653, "step": 8379 }, { "epoch": 0.5004180102711095, "grad_norm": 2.480231523513794, "learning_rate": 5.5517218499104246e-06, "loss": 0.847, "step": 8380 }, { "epoch": 0.5004777260241252, "grad_norm": 2.131870985031128, "learning_rate": 5.551058323933383e-06, "loss": 0.8565, "step": 8381 }, { "epoch": 0.5005374417771408, "grad_norm": 1.8327221870422363, "learning_rate": 5.55039479795634e-06, "loss": 0.8732, "step": 8382 }, { "epoch": 0.5005971575301564, "grad_norm": 3.597815990447998, "learning_rate": 5.549731271979298e-06, "loss": 0.866, "step": 8383 }, { "epoch": 0.5006568732831721, "grad_norm": 1.9644535779953003, "learning_rate": 5.549067746002257e-06, "loss": 0.8794, "step": 8384 }, { "epoch": 0.5007165890361878, "grad_norm": 2.4032723903656006, "learning_rate": 5.548404220025214e-06, "loss": 0.8422, "step": 8385 }, { "epoch": 0.5007763047892034, "grad_norm": 3.7080495357513428, "learning_rate": 5.547740694048172e-06, "loss": 0.8739, "step": 8386 }, { "epoch": 0.500836020542219, "grad_norm": 5.92297887802124, "learning_rate": 5.5470771680711296e-06, "loss": 0.9016, "step": 8387 }, { "epoch": 0.5008957362952347, "grad_norm": 2.9558990001678467, "learning_rate": 5.5464136420940885e-06, "loss": 0.8686, "step": 8388 }, { "epoch": 0.5009554520482503, "grad_norm": 4.8320841789245605, "learning_rate": 5.545750116117047e-06, "loss": 0.8601, "step": 8389 }, { "epoch": 0.501015167801266, "grad_norm": 1.9591619968414307, "learning_rate": 5.545086590140004e-06, "loss": 0.8856, "step": 8390 }, { "epoch": 0.5010748835542816, "grad_norm": 4.272107124328613, "learning_rate": 5.544423064162963e-06, "loss": 0.8754, "step": 8391 }, { "epoch": 0.5011345993072973, "grad_norm": 2.0084481239318848, "learning_rate": 5.54375953818592e-06, "loss": 0.8663, "step": 8392 }, { "epoch": 0.5011943150603129, "grad_norm": 1.6914329528808594, "learning_rate": 5.543096012208878e-06, "loss": 0.8375, "step": 8393 }, { "epoch": 0.5012540308133285, "grad_norm": 1.784720778465271, "learning_rate": 5.542432486231837e-06, "loss": 0.8645, "step": 8394 }, { "epoch": 0.5013137465663442, "grad_norm": 2.0590195655822754, "learning_rate": 5.541768960254794e-06, "loss": 0.8741, "step": 8395 }, { "epoch": 0.5013734623193599, "grad_norm": 3.5018532276153564, "learning_rate": 5.5411054342777525e-06, "loss": 0.887, "step": 8396 }, { "epoch": 0.5014331780723755, "grad_norm": 2.934465169906616, "learning_rate": 5.54044190830071e-06, "loss": 0.8681, "step": 8397 }, { "epoch": 0.5014928938253911, "grad_norm": 1.7171214818954468, "learning_rate": 5.539778382323669e-06, "loss": 0.8387, "step": 8398 }, { "epoch": 0.5015526095784067, "grad_norm": 2.094825267791748, "learning_rate": 5.539114856346627e-06, "loss": 0.854, "step": 8399 }, { "epoch": 0.5016123253314224, "grad_norm": 1.8040626049041748, "learning_rate": 5.538451330369584e-06, "loss": 0.8364, "step": 8400 }, { "epoch": 0.5016123253314224, "eval_text_loss": 0.9143439531326294, "eval_text_runtime": 15.1884, "eval_text_samples_per_second": 263.359, "eval_text_steps_per_second": 0.527, "step": 8400 }, { "epoch": 0.5016123253314224, "eval_image_loss": 0.6275792717933655, "eval_image_runtime": 5.0129, "eval_image_samples_per_second": 797.935, "eval_image_steps_per_second": 1.596, "step": 8400 }, { "epoch": 0.5016123253314224, "eval_video_loss": 1.0733654499053955, "eval_video_runtime": 76.5328, "eval_video_samples_per_second": 52.265, "eval_video_steps_per_second": 0.105, "step": 8400 }, { "epoch": 0.5016720410844381, "grad_norm": 2.0380516052246094, "learning_rate": 5.537787804392543e-06, "loss": 0.8758, "step": 8401 }, { "epoch": 0.5017317568374537, "grad_norm": 1.5207040309906006, "learning_rate": 5.5371242784155e-06, "loss": 0.8713, "step": 8402 }, { "epoch": 0.5017914725904694, "grad_norm": 1.983609914779663, "learning_rate": 5.536460752438458e-06, "loss": 0.8473, "step": 8403 }, { "epoch": 0.501851188343485, "grad_norm": 2.3554418087005615, "learning_rate": 5.5357972264614164e-06, "loss": 0.866, "step": 8404 }, { "epoch": 0.5019109040965006, "grad_norm": 2.6834030151367188, "learning_rate": 5.5351337004843745e-06, "loss": 0.826, "step": 8405 }, { "epoch": 0.5019706198495163, "grad_norm": 2.165159225463867, "learning_rate": 5.534470174507333e-06, "loss": 0.8489, "step": 8406 }, { "epoch": 0.502030335602532, "grad_norm": 2.7564098834991455, "learning_rate": 5.53380664853029e-06, "loss": 0.8587, "step": 8407 }, { "epoch": 0.5020900513555476, "grad_norm": 1.7287225723266602, "learning_rate": 5.533143122553248e-06, "loss": 0.8619, "step": 8408 }, { "epoch": 0.5021497671085633, "grad_norm": 1.604691982269287, "learning_rate": 5.532479596576207e-06, "loss": 0.8502, "step": 8409 }, { "epoch": 0.5022094828615788, "grad_norm": 1.7430205345153809, "learning_rate": 5.531816070599164e-06, "loss": 0.8653, "step": 8410 }, { "epoch": 0.5022691986145945, "grad_norm": 1.5814379453659058, "learning_rate": 5.531152544622122e-06, "loss": 0.8496, "step": 8411 }, { "epoch": 0.5023289143676102, "grad_norm": 3.3486998081207275, "learning_rate": 5.5304890186450795e-06, "loss": 0.8641, "step": 8412 }, { "epoch": 0.5023886301206258, "grad_norm": 1.655290126800537, "learning_rate": 5.5298254926680385e-06, "loss": 0.8371, "step": 8413 }, { "epoch": 0.5024483458736415, "grad_norm": 1.994926929473877, "learning_rate": 5.529161966690997e-06, "loss": 0.8569, "step": 8414 }, { "epoch": 0.5025080616266571, "grad_norm": 3.3115875720977783, "learning_rate": 5.528498440713954e-06, "loss": 0.828, "step": 8415 }, { "epoch": 0.5025677773796727, "grad_norm": 2.020320177078247, "learning_rate": 5.527834914736913e-06, "loss": 0.8829, "step": 8416 }, { "epoch": 0.5026274931326884, "grad_norm": 4.184566974639893, "learning_rate": 5.52717138875987e-06, "loss": 0.837, "step": 8417 }, { "epoch": 0.5026872088857041, "grad_norm": 2.141547679901123, "learning_rate": 5.526507862782828e-06, "loss": 0.8629, "step": 8418 }, { "epoch": 0.5027469246387197, "grad_norm": 1.5870805978775024, "learning_rate": 5.525844336805787e-06, "loss": 0.8755, "step": 8419 }, { "epoch": 0.5028066403917354, "grad_norm": 1.875364065170288, "learning_rate": 5.525180810828744e-06, "loss": 0.8775, "step": 8420 }, { "epoch": 0.502866356144751, "grad_norm": 2.3160226345062256, "learning_rate": 5.5245172848517024e-06, "loss": 0.8304, "step": 8421 }, { "epoch": 0.5029260718977666, "grad_norm": 2.222022294998169, "learning_rate": 5.52385375887466e-06, "loss": 0.829, "step": 8422 }, { "epoch": 0.5029857876507823, "grad_norm": 2.051830768585205, "learning_rate": 5.523190232897619e-06, "loss": 0.8701, "step": 8423 }, { "epoch": 0.5030455034037979, "grad_norm": 1.5788997411727905, "learning_rate": 5.522526706920577e-06, "loss": 0.8565, "step": 8424 }, { "epoch": 0.5031052191568136, "grad_norm": 2.0326976776123047, "learning_rate": 5.521863180943534e-06, "loss": 0.8806, "step": 8425 }, { "epoch": 0.5031649349098292, "grad_norm": 2.2244839668273926, "learning_rate": 5.521199654966493e-06, "loss": 0.8708, "step": 8426 }, { "epoch": 0.5032246506628448, "grad_norm": 2.575111150741577, "learning_rate": 5.52053612898945e-06, "loss": 0.9004, "step": 8427 }, { "epoch": 0.5032843664158605, "grad_norm": 1.8242813348770142, "learning_rate": 5.519872603012408e-06, "loss": 0.8328, "step": 8428 }, { "epoch": 0.5033440821688762, "grad_norm": 1.6701289415359497, "learning_rate": 5.519209077035366e-06, "loss": 0.8694, "step": 8429 }, { "epoch": 0.5034037979218918, "grad_norm": 1.902298927307129, "learning_rate": 5.5185455510583245e-06, "loss": 0.882, "step": 8430 }, { "epoch": 0.5034635136749075, "grad_norm": 2.359549045562744, "learning_rate": 5.517882025081283e-06, "loss": 0.8515, "step": 8431 }, { "epoch": 0.503523229427923, "grad_norm": 3.190551519393921, "learning_rate": 5.51721849910424e-06, "loss": 0.8558, "step": 8432 }, { "epoch": 0.5035829451809387, "grad_norm": 1.9544644355773926, "learning_rate": 5.516554973127198e-06, "loss": 0.886, "step": 8433 }, { "epoch": 0.5036426609339544, "grad_norm": 1.961855411529541, "learning_rate": 5.515891447150157e-06, "loss": 0.8647, "step": 8434 }, { "epoch": 0.50370237668697, "grad_norm": 2.219290018081665, "learning_rate": 5.515227921173114e-06, "loss": 0.8801, "step": 8435 }, { "epoch": 0.5037620924399857, "grad_norm": 2.7304506301879883, "learning_rate": 5.514564395196072e-06, "loss": 0.8762, "step": 8436 }, { "epoch": 0.5038218081930013, "grad_norm": 3.1213529109954834, "learning_rate": 5.5139008692190295e-06, "loss": 0.8656, "step": 8437 }, { "epoch": 0.5038815239460169, "grad_norm": 3.9527556896209717, "learning_rate": 5.5132373432419885e-06, "loss": 0.8533, "step": 8438 }, { "epoch": 0.5039412396990326, "grad_norm": 2.2869560718536377, "learning_rate": 5.5125738172649466e-06, "loss": 0.8695, "step": 8439 }, { "epoch": 0.5040009554520483, "grad_norm": 3.524653196334839, "learning_rate": 5.511910291287904e-06, "loss": 0.8465, "step": 8440 }, { "epoch": 0.5040606712050639, "grad_norm": 1.9212990999221802, "learning_rate": 5.511246765310863e-06, "loss": 0.8666, "step": 8441 }, { "epoch": 0.5041203869580796, "grad_norm": 1.9124407768249512, "learning_rate": 5.51058323933382e-06, "loss": 0.8553, "step": 8442 }, { "epoch": 0.5041801027110951, "grad_norm": 2.00211763381958, "learning_rate": 5.509919713356778e-06, "loss": 0.8793, "step": 8443 }, { "epoch": 0.5042398184641108, "grad_norm": 3.2011005878448486, "learning_rate": 5.509256187379737e-06, "loss": 0.8662, "step": 8444 }, { "epoch": 0.5042995342171265, "grad_norm": 2.198434829711914, "learning_rate": 5.508592661402694e-06, "loss": 0.8602, "step": 8445 }, { "epoch": 0.5043592499701421, "grad_norm": 2.200819730758667, "learning_rate": 5.507929135425652e-06, "loss": 0.8663, "step": 8446 }, { "epoch": 0.5044189657231578, "grad_norm": 1.7589643001556396, "learning_rate": 5.50726560944861e-06, "loss": 0.8626, "step": 8447 }, { "epoch": 0.5044786814761734, "grad_norm": 1.8474719524383545, "learning_rate": 5.506602083471569e-06, "loss": 0.8673, "step": 8448 }, { "epoch": 0.504538397229189, "grad_norm": 2.691638708114624, "learning_rate": 5.505938557494527e-06, "loss": 0.8555, "step": 8449 }, { "epoch": 0.5045981129822047, "grad_norm": 2.3657944202423096, "learning_rate": 5.505275031517484e-06, "loss": 0.8542, "step": 8450 }, { "epoch": 0.5046578287352204, "grad_norm": 2.5512351989746094, "learning_rate": 5.504611505540443e-06, "loss": 0.8458, "step": 8451 }, { "epoch": 0.504717544488236, "grad_norm": 2.4629621505737305, "learning_rate": 5.5039479795634e-06, "loss": 0.8767, "step": 8452 }, { "epoch": 0.5047772602412517, "grad_norm": 2.78698992729187, "learning_rate": 5.503284453586358e-06, "loss": 0.81, "step": 8453 }, { "epoch": 0.5048369759942672, "grad_norm": 2.5350966453552246, "learning_rate": 5.502620927609316e-06, "loss": 0.8591, "step": 8454 }, { "epoch": 0.5048966917472829, "grad_norm": 1.5758795738220215, "learning_rate": 5.5019574016322745e-06, "loss": 0.8536, "step": 8455 }, { "epoch": 0.5049564075002986, "grad_norm": 3.3237147331237793, "learning_rate": 5.501293875655233e-06, "loss": 0.8343, "step": 8456 }, { "epoch": 0.5050161232533142, "grad_norm": 2.0849666595458984, "learning_rate": 5.50063034967819e-06, "loss": 0.8268, "step": 8457 }, { "epoch": 0.5050758390063299, "grad_norm": 2.012935161590576, "learning_rate": 5.499966823701148e-06, "loss": 0.8469, "step": 8458 }, { "epoch": 0.5051355547593455, "grad_norm": 3.360774040222168, "learning_rate": 5.499303297724107e-06, "loss": 0.8442, "step": 8459 }, { "epoch": 0.5051952705123611, "grad_norm": 1.902774691581726, "learning_rate": 5.498639771747064e-06, "loss": 0.8488, "step": 8460 }, { "epoch": 0.5052549862653768, "grad_norm": 2.252260684967041, "learning_rate": 5.497976245770022e-06, "loss": 0.8915, "step": 8461 }, { "epoch": 0.5053147020183925, "grad_norm": 3.239204168319702, "learning_rate": 5.4973127197929795e-06, "loss": 0.8784, "step": 8462 }, { "epoch": 0.5053744177714081, "grad_norm": 2.293137311935425, "learning_rate": 5.4966491938159384e-06, "loss": 0.8583, "step": 8463 }, { "epoch": 0.5054341335244238, "grad_norm": 3.0648653507232666, "learning_rate": 5.4959856678388965e-06, "loss": 0.8591, "step": 8464 }, { "epoch": 0.5054938492774393, "grad_norm": 2.336296796798706, "learning_rate": 5.495322141861854e-06, "loss": 0.8557, "step": 8465 }, { "epoch": 0.505553565030455, "grad_norm": 1.9906561374664307, "learning_rate": 5.494658615884813e-06, "loss": 0.828, "step": 8466 }, { "epoch": 0.5056132807834707, "grad_norm": 2.5224227905273438, "learning_rate": 5.49399508990777e-06, "loss": 0.863, "step": 8467 }, { "epoch": 0.5056729965364863, "grad_norm": 2.2112467288970947, "learning_rate": 5.493331563930728e-06, "loss": 0.8536, "step": 8468 }, { "epoch": 0.505732712289502, "grad_norm": 1.9811701774597168, "learning_rate": 5.492668037953687e-06, "loss": 0.8524, "step": 8469 }, { "epoch": 0.5057924280425176, "grad_norm": 2.635293960571289, "learning_rate": 5.492004511976644e-06, "loss": 0.8246, "step": 8470 }, { "epoch": 0.5058521437955332, "grad_norm": 1.8432114124298096, "learning_rate": 5.491340985999602e-06, "loss": 0.8868, "step": 8471 }, { "epoch": 0.5059118595485489, "grad_norm": 1.844138503074646, "learning_rate": 5.49067746002256e-06, "loss": 0.8166, "step": 8472 }, { "epoch": 0.5059715753015646, "grad_norm": 2.0941271781921387, "learning_rate": 5.490013934045519e-06, "loss": 0.8258, "step": 8473 }, { "epoch": 0.5060312910545802, "grad_norm": 2.068331003189087, "learning_rate": 5.489350408068477e-06, "loss": 0.8432, "step": 8474 }, { "epoch": 0.5060910068075959, "grad_norm": 2.319636583328247, "learning_rate": 5.488686882091434e-06, "loss": 0.8084, "step": 8475 }, { "epoch": 0.5061507225606114, "grad_norm": 2.2251663208007812, "learning_rate": 5.488023356114393e-06, "loss": 0.8298, "step": 8476 }, { "epoch": 0.5062104383136271, "grad_norm": 3.0838510990142822, "learning_rate": 5.48735983013735e-06, "loss": 0.8769, "step": 8477 }, { "epoch": 0.5062701540666428, "grad_norm": 2.8307180404663086, "learning_rate": 5.486696304160308e-06, "loss": 0.8263, "step": 8478 }, { "epoch": 0.5063298698196584, "grad_norm": 1.999464988708496, "learning_rate": 5.486032778183266e-06, "loss": 0.8516, "step": 8479 }, { "epoch": 0.5063895855726741, "grad_norm": 2.100994825363159, "learning_rate": 5.4853692522062244e-06, "loss": 0.8754, "step": 8480 }, { "epoch": 0.5064493013256898, "grad_norm": 1.6927685737609863, "learning_rate": 5.4847057262291825e-06, "loss": 0.8658, "step": 8481 }, { "epoch": 0.5065090170787053, "grad_norm": 2.1550397872924805, "learning_rate": 5.48404220025214e-06, "loss": 0.9041, "step": 8482 }, { "epoch": 0.506568732831721, "grad_norm": 3.939086675643921, "learning_rate": 5.483378674275098e-06, "loss": 0.8589, "step": 8483 }, { "epoch": 0.5066284485847367, "grad_norm": 1.966794490814209, "learning_rate": 5.482715148298057e-06, "loss": 0.8442, "step": 8484 }, { "epoch": 0.5066881643377523, "grad_norm": 1.9266550540924072, "learning_rate": 5.482051622321014e-06, "loss": 0.8928, "step": 8485 }, { "epoch": 0.506747880090768, "grad_norm": 2.019111394882202, "learning_rate": 5.481388096343972e-06, "loss": 0.8765, "step": 8486 }, { "epoch": 0.5068075958437835, "grad_norm": 2.258767604827881, "learning_rate": 5.4807245703669295e-06, "loss": 0.866, "step": 8487 }, { "epoch": 0.5068673115967992, "grad_norm": 2.052335023880005, "learning_rate": 5.480061044389888e-06, "loss": 0.8711, "step": 8488 }, { "epoch": 0.5069270273498149, "grad_norm": 2.818589687347412, "learning_rate": 5.4793975184128465e-06, "loss": 0.8411, "step": 8489 }, { "epoch": 0.5069867431028305, "grad_norm": 2.036832332611084, "learning_rate": 5.478733992435804e-06, "loss": 0.8223, "step": 8490 }, { "epoch": 0.5070464588558462, "grad_norm": 2.1311681270599365, "learning_rate": 5.478070466458763e-06, "loss": 0.8769, "step": 8491 }, { "epoch": 0.5071061746088619, "grad_norm": 2.5375232696533203, "learning_rate": 5.47740694048172e-06, "loss": 0.8515, "step": 8492 }, { "epoch": 0.5071658903618774, "grad_norm": 2.6901938915252686, "learning_rate": 5.476743414504678e-06, "loss": 0.9038, "step": 8493 }, { "epoch": 0.5072256061148931, "grad_norm": 1.7767410278320312, "learning_rate": 5.476079888527637e-06, "loss": 0.8763, "step": 8494 }, { "epoch": 0.5072853218679088, "grad_norm": 2.66028094291687, "learning_rate": 5.475416362550594e-06, "loss": 0.8649, "step": 8495 }, { "epoch": 0.5073450376209244, "grad_norm": 2.142815589904785, "learning_rate": 5.474752836573552e-06, "loss": 0.8608, "step": 8496 }, { "epoch": 0.5074047533739401, "grad_norm": 2.1515395641326904, "learning_rate": 5.47408931059651e-06, "loss": 0.8576, "step": 8497 }, { "epoch": 0.5074644691269556, "grad_norm": 9.702887535095215, "learning_rate": 5.4734257846194686e-06, "loss": 0.8716, "step": 8498 }, { "epoch": 0.5075241848799713, "grad_norm": 1.704371690750122, "learning_rate": 5.472762258642427e-06, "loss": 0.8538, "step": 8499 }, { "epoch": 0.507583900632987, "grad_norm": 3.5622599124908447, "learning_rate": 5.472098732665384e-06, "loss": 0.8813, "step": 8500 }, { "epoch": 0.5076436163860026, "grad_norm": 2.506279230117798, "learning_rate": 5.471435206688343e-06, "loss": 0.7831, "step": 8501 }, { "epoch": 0.5077033321390183, "grad_norm": 3.1782283782958984, "learning_rate": 5.4707716807113e-06, "loss": 0.8882, "step": 8502 }, { "epoch": 0.507763047892034, "grad_norm": 2.4819912910461426, "learning_rate": 5.470108154734258e-06, "loss": 0.8437, "step": 8503 }, { "epoch": 0.5078227636450495, "grad_norm": 3.443459987640381, "learning_rate": 5.469444628757216e-06, "loss": 0.8443, "step": 8504 }, { "epoch": 0.5078824793980652, "grad_norm": 2.3362410068511963, "learning_rate": 5.468781102780174e-06, "loss": 0.8653, "step": 8505 }, { "epoch": 0.5079421951510809, "grad_norm": 3.2186191082000732, "learning_rate": 5.4681175768031325e-06, "loss": 0.8217, "step": 8506 }, { "epoch": 0.5080019109040965, "grad_norm": 2.3843579292297363, "learning_rate": 5.46745405082609e-06, "loss": 0.8746, "step": 8507 }, { "epoch": 0.5080616266571122, "grad_norm": 3.0343005657196045, "learning_rate": 5.466790524849048e-06, "loss": 0.8547, "step": 8508 }, { "epoch": 0.5081213424101277, "grad_norm": 2.864849805831909, "learning_rate": 5.466126998872007e-06, "loss": 0.852, "step": 8509 }, { "epoch": 0.5081810581631434, "grad_norm": 1.554114580154419, "learning_rate": 5.465463472894964e-06, "loss": 0.8289, "step": 8510 }, { "epoch": 0.5082407739161591, "grad_norm": 1.9923861026763916, "learning_rate": 5.464799946917922e-06, "loss": 0.8544, "step": 8511 }, { "epoch": 0.5083004896691747, "grad_norm": 1.6187047958374023, "learning_rate": 5.4641364209408794e-06, "loss": 0.8559, "step": 8512 }, { "epoch": 0.5083602054221904, "grad_norm": 3.340502977371216, "learning_rate": 5.463472894963838e-06, "loss": 0.8554, "step": 8513 }, { "epoch": 0.5084199211752061, "grad_norm": 2.3102757930755615, "learning_rate": 5.4628093689867965e-06, "loss": 0.8564, "step": 8514 }, { "epoch": 0.5084796369282216, "grad_norm": 1.764221429824829, "learning_rate": 5.462145843009754e-06, "loss": 0.8263, "step": 8515 }, { "epoch": 0.5085393526812373, "grad_norm": 1.6061460971832275, "learning_rate": 5.461482317032713e-06, "loss": 0.833, "step": 8516 }, { "epoch": 0.508599068434253, "grad_norm": 2.093790054321289, "learning_rate": 5.46081879105567e-06, "loss": 0.847, "step": 8517 }, { "epoch": 0.5086587841872686, "grad_norm": 2.083467483520508, "learning_rate": 5.460155265078628e-06, "loss": 0.8167, "step": 8518 }, { "epoch": 0.5087184999402843, "grad_norm": 2.233597993850708, "learning_rate": 5.459491739101587e-06, "loss": 0.8304, "step": 8519 }, { "epoch": 0.5087782156932998, "grad_norm": 2.2272775173187256, "learning_rate": 5.458828213124544e-06, "loss": 0.8754, "step": 8520 }, { "epoch": 0.5088379314463155, "grad_norm": 1.754308819770813, "learning_rate": 5.458164687147502e-06, "loss": 0.8802, "step": 8521 }, { "epoch": 0.5088976471993312, "grad_norm": 2.671729803085327, "learning_rate": 5.45750116117046e-06, "loss": 0.8617, "step": 8522 }, { "epoch": 0.5089573629523468, "grad_norm": 1.6537779569625854, "learning_rate": 5.4568376351934185e-06, "loss": 0.8917, "step": 8523 }, { "epoch": 0.5090170787053625, "grad_norm": 2.2524173259735107, "learning_rate": 5.456174109216377e-06, "loss": 0.8508, "step": 8524 }, { "epoch": 0.5090767944583782, "grad_norm": 2.8563733100891113, "learning_rate": 5.455510583239334e-06, "loss": 0.8431, "step": 8525 }, { "epoch": 0.5091365102113937, "grad_norm": 1.7113232612609863, "learning_rate": 5.454847057262293e-06, "loss": 0.8435, "step": 8526 }, { "epoch": 0.5091962259644094, "grad_norm": 2.569748878479004, "learning_rate": 5.45418353128525e-06, "loss": 0.8378, "step": 8527 }, { "epoch": 0.509255941717425, "grad_norm": 2.217689037322998, "learning_rate": 5.453520005308208e-06, "loss": 0.8556, "step": 8528 }, { "epoch": 0.5093156574704407, "grad_norm": 2.0692317485809326, "learning_rate": 5.452856479331166e-06, "loss": 0.8481, "step": 8529 }, { "epoch": 0.5093753732234564, "grad_norm": 2.178574323654175, "learning_rate": 5.452192953354124e-06, "loss": 0.8826, "step": 8530 }, { "epoch": 0.5094350889764719, "grad_norm": 1.7531137466430664, "learning_rate": 5.4515294273770825e-06, "loss": 0.8644, "step": 8531 }, { "epoch": 0.5094948047294876, "grad_norm": 2.36087703704834, "learning_rate": 5.45086590140004e-06, "loss": 0.8335, "step": 8532 }, { "epoch": 0.5095545204825033, "grad_norm": 1.6442073583602905, "learning_rate": 5.450202375422998e-06, "loss": 0.8959, "step": 8533 }, { "epoch": 0.5096142362355189, "grad_norm": 1.8692964315414429, "learning_rate": 5.449538849445957e-06, "loss": 0.875, "step": 8534 }, { "epoch": 0.5096739519885346, "grad_norm": 2.080536365509033, "learning_rate": 5.448875323468914e-06, "loss": 0.8897, "step": 8535 }, { "epoch": 0.5097336677415503, "grad_norm": 2.170468330383301, "learning_rate": 5.448211797491872e-06, "loss": 0.8766, "step": 8536 }, { "epoch": 0.5097933834945658, "grad_norm": 1.4104640483856201, "learning_rate": 5.447548271514829e-06, "loss": 0.8621, "step": 8537 }, { "epoch": 0.5098530992475815, "grad_norm": 2.134516716003418, "learning_rate": 5.446884745537788e-06, "loss": 0.8812, "step": 8538 }, { "epoch": 0.5099128150005972, "grad_norm": 2.1678760051727295, "learning_rate": 5.4462212195607464e-06, "loss": 0.8332, "step": 8539 }, { "epoch": 0.5099725307536128, "grad_norm": 2.5634148120880127, "learning_rate": 5.445557693583704e-06, "loss": 0.8408, "step": 8540 }, { "epoch": 0.5100322465066285, "grad_norm": 2.383389472961426, "learning_rate": 5.444894167606663e-06, "loss": 0.8677, "step": 8541 }, { "epoch": 0.5100919622596441, "grad_norm": 1.8426578044891357, "learning_rate": 5.44423064162962e-06, "loss": 0.8911, "step": 8542 }, { "epoch": 0.5101516780126597, "grad_norm": 4.327221393585205, "learning_rate": 5.443567115652578e-06, "loss": 0.8387, "step": 8543 }, { "epoch": 0.5102113937656754, "grad_norm": 1.8092659711837769, "learning_rate": 5.442903589675537e-06, "loss": 0.8532, "step": 8544 }, { "epoch": 0.510271109518691, "grad_norm": 4.303023815155029, "learning_rate": 5.442240063698494e-06, "loss": 0.8442, "step": 8545 }, { "epoch": 0.5103308252717067, "grad_norm": 1.7321842908859253, "learning_rate": 5.441576537721452e-06, "loss": 0.8021, "step": 8546 }, { "epoch": 0.5103905410247224, "grad_norm": 2.651249647140503, "learning_rate": 5.4409130117444096e-06, "loss": 0.851, "step": 8547 }, { "epoch": 0.5104502567777379, "grad_norm": 2.1324386596679688, "learning_rate": 5.4402494857673685e-06, "loss": 0.8278, "step": 8548 }, { "epoch": 0.5105099725307536, "grad_norm": 1.9000566005706787, "learning_rate": 5.439585959790327e-06, "loss": 0.8462, "step": 8549 }, { "epoch": 0.5105696882837693, "grad_norm": 2.595154285430908, "learning_rate": 5.438922433813284e-06, "loss": 0.8438, "step": 8550 }, { "epoch": 0.5106294040367849, "grad_norm": 2.3364803791046143, "learning_rate": 5.438258907836243e-06, "loss": 0.8523, "step": 8551 }, { "epoch": 0.5106891197898006, "grad_norm": 2.368591785430908, "learning_rate": 5.4375953818592e-06, "loss": 0.8818, "step": 8552 }, { "epoch": 0.5107488355428162, "grad_norm": 1.87059485912323, "learning_rate": 5.436931855882158e-06, "loss": 0.8827, "step": 8553 }, { "epoch": 0.5108085512958318, "grad_norm": 2.6610054969787598, "learning_rate": 5.436268329905116e-06, "loss": 0.8236, "step": 8554 }, { "epoch": 0.5108682670488475, "grad_norm": 2.977675437927246, "learning_rate": 5.435604803928074e-06, "loss": 0.8937, "step": 8555 }, { "epoch": 0.5109279828018631, "grad_norm": 4.642186164855957, "learning_rate": 5.4349412779510325e-06, "loss": 0.8484, "step": 8556 }, { "epoch": 0.5109876985548788, "grad_norm": 2.2231171131134033, "learning_rate": 5.43427775197399e-06, "loss": 0.8171, "step": 8557 }, { "epoch": 0.5110474143078945, "grad_norm": 1.9614503383636475, "learning_rate": 5.433614225996948e-06, "loss": 0.8231, "step": 8558 }, { "epoch": 0.51110713006091, "grad_norm": 1.62299382686615, "learning_rate": 5.432950700019907e-06, "loss": 0.828, "step": 8559 }, { "epoch": 0.5111668458139257, "grad_norm": 2.247055768966675, "learning_rate": 5.432287174042864e-06, "loss": 0.8497, "step": 8560 }, { "epoch": 0.5112265615669414, "grad_norm": 2.157515525817871, "learning_rate": 5.431623648065822e-06, "loss": 0.8857, "step": 8561 }, { "epoch": 0.511286277319957, "grad_norm": 1.7753477096557617, "learning_rate": 5.430960122088779e-06, "loss": 0.8329, "step": 8562 }, { "epoch": 0.5113459930729727, "grad_norm": 1.9345837831497192, "learning_rate": 5.430296596111738e-06, "loss": 0.852, "step": 8563 }, { "epoch": 0.5114057088259883, "grad_norm": 2.347090005874634, "learning_rate": 5.429633070134696e-06, "loss": 0.8947, "step": 8564 }, { "epoch": 0.5114654245790039, "grad_norm": 1.8250160217285156, "learning_rate": 5.428969544157654e-06, "loss": 0.9145, "step": 8565 }, { "epoch": 0.5115251403320196, "grad_norm": 8.522843360900879, "learning_rate": 5.428306018180613e-06, "loss": 0.8125, "step": 8566 }, { "epoch": 0.5115848560850352, "grad_norm": 2.698871374130249, "learning_rate": 5.42764249220357e-06, "loss": 0.8579, "step": 8567 }, { "epoch": 0.5116445718380509, "grad_norm": 2.378779888153076, "learning_rate": 5.426978966226528e-06, "loss": 0.8679, "step": 8568 }, { "epoch": 0.5117042875910666, "grad_norm": 4.413297176361084, "learning_rate": 5.426315440249487e-06, "loss": 0.8722, "step": 8569 }, { "epoch": 0.5117640033440821, "grad_norm": 2.2973716259002686, "learning_rate": 5.425651914272444e-06, "loss": 0.8593, "step": 8570 }, { "epoch": 0.5118237190970978, "grad_norm": 2.2158255577087402, "learning_rate": 5.424988388295402e-06, "loss": 0.8276, "step": 8571 }, { "epoch": 0.5118834348501135, "grad_norm": 2.0099568367004395, "learning_rate": 5.4243248623183595e-06, "loss": 0.8706, "step": 8572 }, { "epoch": 0.5119431506031291, "grad_norm": 2.3530147075653076, "learning_rate": 5.4236613363413185e-06, "loss": 0.8276, "step": 8573 }, { "epoch": 0.5120028663561448, "grad_norm": 2.000300884246826, "learning_rate": 5.4229978103642766e-06, "loss": 0.8563, "step": 8574 }, { "epoch": 0.5120625821091604, "grad_norm": 1.9217017889022827, "learning_rate": 5.422334284387234e-06, "loss": 0.8515, "step": 8575 }, { "epoch": 0.512122297862176, "grad_norm": 1.9597567319869995, "learning_rate": 5.421670758410193e-06, "loss": 0.877, "step": 8576 }, { "epoch": 0.5121820136151917, "grad_norm": 2.148192882537842, "learning_rate": 5.42100723243315e-06, "loss": 0.9052, "step": 8577 }, { "epoch": 0.5122417293682073, "grad_norm": 1.8000482320785522, "learning_rate": 5.420343706456108e-06, "loss": 0.8194, "step": 8578 }, { "epoch": 0.512301445121223, "grad_norm": 1.7166812419891357, "learning_rate": 5.419680180479066e-06, "loss": 0.8315, "step": 8579 }, { "epoch": 0.5123611608742387, "grad_norm": 4.991137504577637, "learning_rate": 5.419016654502024e-06, "loss": 0.8721, "step": 8580 }, { "epoch": 0.5124208766272542, "grad_norm": 1.9910739660263062, "learning_rate": 5.4183531285249824e-06, "loss": 0.8391, "step": 8581 }, { "epoch": 0.5124805923802699, "grad_norm": 2.9768903255462646, "learning_rate": 5.41768960254794e-06, "loss": 0.8376, "step": 8582 }, { "epoch": 0.5125403081332855, "grad_norm": 1.6060214042663574, "learning_rate": 5.417026076570898e-06, "loss": 0.8471, "step": 8583 }, { "epoch": 0.5126000238863012, "grad_norm": 2.993356227874756, "learning_rate": 5.416362550593857e-06, "loss": 0.8652, "step": 8584 }, { "epoch": 0.5126597396393169, "grad_norm": 3.277665615081787, "learning_rate": 5.415699024616814e-06, "loss": 0.8783, "step": 8585 }, { "epoch": 0.5127194553923325, "grad_norm": 2.580528736114502, "learning_rate": 5.415035498639772e-06, "loss": 0.8282, "step": 8586 }, { "epoch": 0.5127791711453481, "grad_norm": 3.0480306148529053, "learning_rate": 5.414371972662729e-06, "loss": 0.8733, "step": 8587 }, { "epoch": 0.5128388868983638, "grad_norm": 3.8727364540100098, "learning_rate": 5.413708446685688e-06, "loss": 0.874, "step": 8588 }, { "epoch": 0.5128986026513794, "grad_norm": 3.4793241024017334, "learning_rate": 5.413044920708646e-06, "loss": 0.8915, "step": 8589 }, { "epoch": 0.5129583184043951, "grad_norm": 1.8383831977844238, "learning_rate": 5.412381394731604e-06, "loss": 0.9117, "step": 8590 }, { "epoch": 0.5130180341574108, "grad_norm": 2.3076331615448, "learning_rate": 5.411717868754563e-06, "loss": 0.8633, "step": 8591 }, { "epoch": 0.5130777499104263, "grad_norm": 2.1027095317840576, "learning_rate": 5.41105434277752e-06, "loss": 0.8594, "step": 8592 }, { "epoch": 0.513137465663442, "grad_norm": 2.295403480529785, "learning_rate": 5.410390816800478e-06, "loss": 0.8577, "step": 8593 }, { "epoch": 0.5131971814164576, "grad_norm": 4.719551086425781, "learning_rate": 5.409727290823437e-06, "loss": 0.8751, "step": 8594 }, { "epoch": 0.5132568971694733, "grad_norm": 2.2731292247772217, "learning_rate": 5.409063764846394e-06, "loss": 0.8017, "step": 8595 }, { "epoch": 0.513316612922489, "grad_norm": 2.2475852966308594, "learning_rate": 5.408400238869352e-06, "loss": 0.8753, "step": 8596 }, { "epoch": 0.5133763286755046, "grad_norm": 1.8335760831832886, "learning_rate": 5.4077367128923095e-06, "loss": 0.8175, "step": 8597 }, { "epoch": 0.5134360444285202, "grad_norm": 1.9429293870925903, "learning_rate": 5.4070731869152684e-06, "loss": 0.8536, "step": 8598 }, { "epoch": 0.5134957601815359, "grad_norm": 3.5300960540771484, "learning_rate": 5.4064096609382265e-06, "loss": 0.8989, "step": 8599 }, { "epoch": 0.5135554759345515, "grad_norm": 2.2894883155822754, "learning_rate": 5.405746134961184e-06, "loss": 0.8276, "step": 8600 }, { "epoch": 0.5135554759345515, "eval_text_loss": 0.9136365652084351, "eval_text_runtime": 15.2246, "eval_text_samples_per_second": 262.733, "eval_text_steps_per_second": 0.525, "step": 8600 }, { "epoch": 0.5135554759345515, "eval_image_loss": 0.6250418424606323, "eval_image_runtime": 5.0292, "eval_image_samples_per_second": 795.353, "eval_image_steps_per_second": 1.591, "step": 8600 }, { "epoch": 0.5135554759345515, "eval_video_loss": 1.0698187351226807, "eval_video_runtime": 76.6469, "eval_video_samples_per_second": 52.187, "eval_video_steps_per_second": 0.104, "step": 8600 }, { "epoch": 0.5136151916875672, "grad_norm": 6.631076335906982, "learning_rate": 5.405082608984143e-06, "loss": 0.8478, "step": 8601 }, { "epoch": 0.5136749074405829, "grad_norm": 1.9997903108596802, "learning_rate": 5.4044190830071e-06, "loss": 0.8817, "step": 8602 }, { "epoch": 0.5137346231935984, "grad_norm": 2.433051347732544, "learning_rate": 5.403755557030058e-06, "loss": 0.8748, "step": 8603 }, { "epoch": 0.5137943389466141, "grad_norm": 2.0755839347839355, "learning_rate": 5.403092031053016e-06, "loss": 0.9072, "step": 8604 }, { "epoch": 0.5138540546996297, "grad_norm": 2.373560667037964, "learning_rate": 5.402428505075974e-06, "loss": 0.8521, "step": 8605 }, { "epoch": 0.5139137704526454, "grad_norm": 1.770004153251648, "learning_rate": 5.401764979098932e-06, "loss": 0.842, "step": 8606 }, { "epoch": 0.5139734862056611, "grad_norm": 2.815354347229004, "learning_rate": 5.40110145312189e-06, "loss": 0.8322, "step": 8607 }, { "epoch": 0.5140332019586767, "grad_norm": 2.924250602722168, "learning_rate": 5.400437927144848e-06, "loss": 0.8517, "step": 8608 }, { "epoch": 0.5140929177116923, "grad_norm": 2.851867198944092, "learning_rate": 5.399774401167807e-06, "loss": 0.8588, "step": 8609 }, { "epoch": 0.514152633464708, "grad_norm": 2.836287498474121, "learning_rate": 5.399110875190764e-06, "loss": 0.8467, "step": 8610 }, { "epoch": 0.5142123492177236, "grad_norm": 2.1173484325408936, "learning_rate": 5.398447349213722e-06, "loss": 0.8817, "step": 8611 }, { "epoch": 0.5142720649707393, "grad_norm": 2.9865353107452393, "learning_rate": 5.397783823236679e-06, "loss": 0.8684, "step": 8612 }, { "epoch": 0.514331780723755, "grad_norm": 1.5475480556488037, "learning_rate": 5.397120297259638e-06, "loss": 0.8714, "step": 8613 }, { "epoch": 0.5143914964767706, "grad_norm": 2.030719757080078, "learning_rate": 5.396456771282596e-06, "loss": 0.8191, "step": 8614 }, { "epoch": 0.5144512122297862, "grad_norm": 2.9890944957733154, "learning_rate": 5.395793245305554e-06, "loss": 0.827, "step": 8615 }, { "epoch": 0.5145109279828018, "grad_norm": 2.3631176948547363, "learning_rate": 5.3951297193285126e-06, "loss": 0.8435, "step": 8616 }, { "epoch": 0.5145706437358175, "grad_norm": 3.8540940284729004, "learning_rate": 5.39446619335147e-06, "loss": 0.8647, "step": 8617 }, { "epoch": 0.5146303594888332, "grad_norm": 2.2715811729431152, "learning_rate": 5.393802667374428e-06, "loss": 0.8319, "step": 8618 }, { "epoch": 0.5146900752418488, "grad_norm": 1.8368784189224243, "learning_rate": 5.393139141397387e-06, "loss": 0.8748, "step": 8619 }, { "epoch": 0.5147497909948644, "grad_norm": 7.063708782196045, "learning_rate": 5.392475615420344e-06, "loss": 0.8806, "step": 8620 }, { "epoch": 0.5148095067478801, "grad_norm": 2.002890110015869, "learning_rate": 5.391812089443302e-06, "loss": 0.8675, "step": 8621 }, { "epoch": 0.5148692225008957, "grad_norm": 2.3037641048431396, "learning_rate": 5.3911485634662595e-06, "loss": 0.8541, "step": 8622 }, { "epoch": 0.5149289382539114, "grad_norm": 2.430056571960449, "learning_rate": 5.390485037489218e-06, "loss": 0.8492, "step": 8623 }, { "epoch": 0.5149886540069271, "grad_norm": 2.0903007984161377, "learning_rate": 5.3898215115121765e-06, "loss": 0.8514, "step": 8624 }, { "epoch": 0.5150483697599427, "grad_norm": 1.7686766386032104, "learning_rate": 5.389157985535134e-06, "loss": 0.8534, "step": 8625 }, { "epoch": 0.5151080855129583, "grad_norm": 3.339465618133545, "learning_rate": 5.388494459558093e-06, "loss": 0.8318, "step": 8626 }, { "epoch": 0.515167801265974, "grad_norm": 6.396732807159424, "learning_rate": 5.38783093358105e-06, "loss": 0.861, "step": 8627 }, { "epoch": 0.5152275170189896, "grad_norm": 1.7038016319274902, "learning_rate": 5.387167407604008e-06, "loss": 0.8845, "step": 8628 }, { "epoch": 0.5152872327720053, "grad_norm": 3.416792631149292, "learning_rate": 5.386503881626966e-06, "loss": 0.8313, "step": 8629 }, { "epoch": 0.5153469485250209, "grad_norm": 6.386898517608643, "learning_rate": 5.385840355649924e-06, "loss": 0.8891, "step": 8630 }, { "epoch": 0.5154066642780365, "grad_norm": 2.3384830951690674, "learning_rate": 5.385176829672882e-06, "loss": 0.876, "step": 8631 }, { "epoch": 0.5154663800310522, "grad_norm": 2.1597821712493896, "learning_rate": 5.38451330369584e-06, "loss": 0.8464, "step": 8632 }, { "epoch": 0.5155260957840678, "grad_norm": 2.0964555740356445, "learning_rate": 5.383849777718798e-06, "loss": 0.8646, "step": 8633 }, { "epoch": 0.5155858115370835, "grad_norm": 2.5558419227600098, "learning_rate": 5.383186251741757e-06, "loss": 0.8494, "step": 8634 }, { "epoch": 0.5156455272900992, "grad_norm": 2.9902713298797607, "learning_rate": 5.382522725764714e-06, "loss": 0.8646, "step": 8635 }, { "epoch": 0.5157052430431148, "grad_norm": 2.466015100479126, "learning_rate": 5.381859199787672e-06, "loss": 0.9036, "step": 8636 }, { "epoch": 0.5157649587961304, "grad_norm": 2.475829601287842, "learning_rate": 5.381195673810629e-06, "loss": 0.8612, "step": 8637 }, { "epoch": 0.515824674549146, "grad_norm": 3.9096460342407227, "learning_rate": 5.380532147833588e-06, "loss": 0.8406, "step": 8638 }, { "epoch": 0.5158843903021617, "grad_norm": 3.5992703437805176, "learning_rate": 5.379868621856546e-06, "loss": 0.8811, "step": 8639 }, { "epoch": 0.5159441060551774, "grad_norm": 3.14517879486084, "learning_rate": 5.379205095879504e-06, "loss": 0.8941, "step": 8640 }, { "epoch": 0.516003821808193, "grad_norm": 2.1075806617736816, "learning_rate": 5.3785415699024625e-06, "loss": 0.9224, "step": 8641 }, { "epoch": 0.5160635375612086, "grad_norm": 7.161324977874756, "learning_rate": 5.37787804392542e-06, "loss": 0.8905, "step": 8642 }, { "epoch": 0.5161232533142243, "grad_norm": 2.146465539932251, "learning_rate": 5.377214517948378e-06, "loss": 0.8619, "step": 8643 }, { "epoch": 0.5161829690672399, "grad_norm": 2.0442819595336914, "learning_rate": 5.376550991971337e-06, "loss": 0.835, "step": 8644 }, { "epoch": 0.5162426848202556, "grad_norm": 1.9632927179336548, "learning_rate": 5.375887465994294e-06, "loss": 0.8813, "step": 8645 }, { "epoch": 0.5163024005732713, "grad_norm": 3.4717533588409424, "learning_rate": 5.375223940017252e-06, "loss": 0.8899, "step": 8646 }, { "epoch": 0.5163621163262869, "grad_norm": 1.8769960403442383, "learning_rate": 5.3745604140402094e-06, "loss": 0.8239, "step": 8647 }, { "epoch": 0.5164218320793025, "grad_norm": 1.9329032897949219, "learning_rate": 5.373896888063168e-06, "loss": 0.8864, "step": 8648 }, { "epoch": 0.5164815478323181, "grad_norm": 3.9017865657806396, "learning_rate": 5.3732333620861265e-06, "loss": 0.8627, "step": 8649 }, { "epoch": 0.5165412635853338, "grad_norm": 2.0766072273254395, "learning_rate": 5.372569836109084e-06, "loss": 0.8282, "step": 8650 }, { "epoch": 0.5166009793383495, "grad_norm": 2.3351736068725586, "learning_rate": 5.371906310132043e-06, "loss": 0.8643, "step": 8651 }, { "epoch": 0.5166606950913651, "grad_norm": 2.155513286590576, "learning_rate": 5.371242784155e-06, "loss": 0.8824, "step": 8652 }, { "epoch": 0.5167204108443807, "grad_norm": 1.9739962816238403, "learning_rate": 5.370579258177958e-06, "loss": 0.8684, "step": 8653 }, { "epoch": 0.5167801265973964, "grad_norm": 1.750164270401001, "learning_rate": 5.369915732200916e-06, "loss": 0.8554, "step": 8654 }, { "epoch": 0.516839842350412, "grad_norm": 2.323514461517334, "learning_rate": 5.369252206223874e-06, "loss": 0.85, "step": 8655 }, { "epoch": 0.5168995581034277, "grad_norm": 2.1486542224884033, "learning_rate": 5.368588680246832e-06, "loss": 0.876, "step": 8656 }, { "epoch": 0.5169592738564434, "grad_norm": 1.7615915536880493, "learning_rate": 5.36792515426979e-06, "loss": 0.8762, "step": 8657 }, { "epoch": 0.517018989609459, "grad_norm": 1.854076862335205, "learning_rate": 5.367261628292748e-06, "loss": 0.8898, "step": 8658 }, { "epoch": 0.5170787053624746, "grad_norm": 1.7672828435897827, "learning_rate": 5.366598102315707e-06, "loss": 0.8824, "step": 8659 }, { "epoch": 0.5171384211154902, "grad_norm": 1.565794825553894, "learning_rate": 5.365934576338664e-06, "loss": 0.8157, "step": 8660 }, { "epoch": 0.5171981368685059, "grad_norm": 1.6008964776992798, "learning_rate": 5.365271050361622e-06, "loss": 0.8406, "step": 8661 }, { "epoch": 0.5172578526215216, "grad_norm": 2.1824088096618652, "learning_rate": 5.364607524384579e-06, "loss": 0.8652, "step": 8662 }, { "epoch": 0.5173175683745372, "grad_norm": 2.6221001148223877, "learning_rate": 5.363943998407538e-06, "loss": 0.8595, "step": 8663 }, { "epoch": 0.5173772841275528, "grad_norm": 3.3310165405273438, "learning_rate": 5.363280472430496e-06, "loss": 0.8768, "step": 8664 }, { "epoch": 0.5174369998805685, "grad_norm": 3.2748336791992188, "learning_rate": 5.3626169464534536e-06, "loss": 0.8987, "step": 8665 }, { "epoch": 0.5174967156335841, "grad_norm": 1.5641443729400635, "learning_rate": 5.3619534204764125e-06, "loss": 0.8374, "step": 8666 }, { "epoch": 0.5175564313865998, "grad_norm": 2.3049356937408447, "learning_rate": 5.36128989449937e-06, "loss": 0.8808, "step": 8667 }, { "epoch": 0.5176161471396155, "grad_norm": 2.739358425140381, "learning_rate": 5.360626368522328e-06, "loss": 0.8361, "step": 8668 }, { "epoch": 0.5176758628926311, "grad_norm": 2.47841215133667, "learning_rate": 5.359962842545287e-06, "loss": 0.8648, "step": 8669 }, { "epoch": 0.5177355786456467, "grad_norm": 2.2563369274139404, "learning_rate": 5.359299316568244e-06, "loss": 0.8152, "step": 8670 }, { "epoch": 0.5177952943986623, "grad_norm": 3.645287036895752, "learning_rate": 5.358635790591202e-06, "loss": 0.8486, "step": 8671 }, { "epoch": 0.517855010151678, "grad_norm": 3.6684815883636475, "learning_rate": 5.357972264614159e-06, "loss": 0.8756, "step": 8672 }, { "epoch": 0.5179147259046937, "grad_norm": 1.954551339149475, "learning_rate": 5.357308738637118e-06, "loss": 0.8501, "step": 8673 }, { "epoch": 0.5179744416577093, "grad_norm": 3.2055089473724365, "learning_rate": 5.3566452126600765e-06, "loss": 0.8488, "step": 8674 }, { "epoch": 0.518034157410725, "grad_norm": 2.6319570541381836, "learning_rate": 5.355981686683034e-06, "loss": 0.8806, "step": 8675 }, { "epoch": 0.5180938731637406, "grad_norm": 4.709954261779785, "learning_rate": 5.355318160705993e-06, "loss": 0.8482, "step": 8676 }, { "epoch": 0.5181535889167562, "grad_norm": 1.7927863597869873, "learning_rate": 5.35465463472895e-06, "loss": 0.832, "step": 8677 }, { "epoch": 0.5182133046697719, "grad_norm": 2.0946497917175293, "learning_rate": 5.353991108751908e-06, "loss": 0.8665, "step": 8678 }, { "epoch": 0.5182730204227876, "grad_norm": 2.1156845092773438, "learning_rate": 5.353327582774866e-06, "loss": 0.8304, "step": 8679 }, { "epoch": 0.5183327361758032, "grad_norm": 17.05388832092285, "learning_rate": 5.352664056797824e-06, "loss": 0.8373, "step": 8680 }, { "epoch": 0.5183924519288188, "grad_norm": 2.16877818107605, "learning_rate": 5.352000530820782e-06, "loss": 0.8352, "step": 8681 }, { "epoch": 0.5184521676818344, "grad_norm": 3.1732444763183594, "learning_rate": 5.3513370048437396e-06, "loss": 0.8586, "step": 8682 }, { "epoch": 0.5185118834348501, "grad_norm": 2.167640209197998, "learning_rate": 5.350673478866698e-06, "loss": 0.8634, "step": 8683 }, { "epoch": 0.5185715991878658, "grad_norm": 1.5513551235198975, "learning_rate": 5.350009952889657e-06, "loss": 0.8503, "step": 8684 }, { "epoch": 0.5186313149408814, "grad_norm": 2.1598212718963623, "learning_rate": 5.349346426912614e-06, "loss": 0.8436, "step": 8685 }, { "epoch": 0.5186910306938971, "grad_norm": 2.714162588119507, "learning_rate": 5.348682900935572e-06, "loss": 0.8689, "step": 8686 }, { "epoch": 0.5187507464469127, "grad_norm": 1.880755066871643, "learning_rate": 5.348019374958529e-06, "loss": 0.851, "step": 8687 }, { "epoch": 0.5188104621999283, "grad_norm": 1.5672240257263184, "learning_rate": 5.347355848981488e-06, "loss": 0.86, "step": 8688 }, { "epoch": 0.518870177952944, "grad_norm": 2.082427978515625, "learning_rate": 5.346692323004446e-06, "loss": 0.8553, "step": 8689 }, { "epoch": 0.5189298937059597, "grad_norm": 2.420382022857666, "learning_rate": 5.3460287970274035e-06, "loss": 0.8446, "step": 8690 }, { "epoch": 0.5189896094589753, "grad_norm": 2.1255288124084473, "learning_rate": 5.3453652710503625e-06, "loss": 0.8653, "step": 8691 }, { "epoch": 0.5190493252119909, "grad_norm": 6.2664079666137695, "learning_rate": 5.34470174507332e-06, "loss": 0.8536, "step": 8692 }, { "epoch": 0.5191090409650065, "grad_norm": 2.1030619144439697, "learning_rate": 5.344038219096278e-06, "loss": 0.8574, "step": 8693 }, { "epoch": 0.5191687567180222, "grad_norm": 2.209657669067383, "learning_rate": 5.343374693119237e-06, "loss": 0.8739, "step": 8694 }, { "epoch": 0.5192284724710379, "grad_norm": 2.5253610610961914, "learning_rate": 5.342711167142194e-06, "loss": 0.8295, "step": 8695 }, { "epoch": 0.5192881882240535, "grad_norm": 1.7016164064407349, "learning_rate": 5.342047641165152e-06, "loss": 0.8186, "step": 8696 }, { "epoch": 0.5193479039770692, "grad_norm": 2.3673596382141113, "learning_rate": 5.341384115188109e-06, "loss": 0.8404, "step": 8697 }, { "epoch": 0.5194076197300848, "grad_norm": 2.191267967224121, "learning_rate": 5.340720589211068e-06, "loss": 0.9021, "step": 8698 }, { "epoch": 0.5194673354831004, "grad_norm": 1.7942460775375366, "learning_rate": 5.3400570632340264e-06, "loss": 0.8239, "step": 8699 }, { "epoch": 0.5195270512361161, "grad_norm": 2.565711736679077, "learning_rate": 5.339393537256984e-06, "loss": 0.89, "step": 8700 }, { "epoch": 0.5195867669891318, "grad_norm": 2.841379165649414, "learning_rate": 5.338730011279943e-06, "loss": 0.8262, "step": 8701 }, { "epoch": 0.5196464827421474, "grad_norm": 1.9204602241516113, "learning_rate": 5.3380664853029e-06, "loss": 0.8597, "step": 8702 }, { "epoch": 0.519706198495163, "grad_norm": 1.8002947568893433, "learning_rate": 5.337402959325858e-06, "loss": 0.8404, "step": 8703 }, { "epoch": 0.5197659142481786, "grad_norm": 2.090357780456543, "learning_rate": 5.336739433348816e-06, "loss": 0.891, "step": 8704 }, { "epoch": 0.5198256300011943, "grad_norm": 1.8939448595046997, "learning_rate": 5.336075907371774e-06, "loss": 0.8695, "step": 8705 }, { "epoch": 0.51988534575421, "grad_norm": 2.138834238052368, "learning_rate": 5.335412381394732e-06, "loss": 0.8741, "step": 8706 }, { "epoch": 0.5199450615072256, "grad_norm": 3.0894246101379395, "learning_rate": 5.3347488554176895e-06, "loss": 0.9264, "step": 8707 }, { "epoch": 0.5200047772602413, "grad_norm": 1.8273706436157227, "learning_rate": 5.334085329440648e-06, "loss": 0.8097, "step": 8708 }, { "epoch": 0.5200644930132569, "grad_norm": 2.159694194793701, "learning_rate": 5.333421803463607e-06, "loss": 0.8408, "step": 8709 }, { "epoch": 0.5201242087662725, "grad_norm": 2.6352028846740723, "learning_rate": 5.332758277486564e-06, "loss": 0.8579, "step": 8710 }, { "epoch": 0.5201839245192882, "grad_norm": 3.139939069747925, "learning_rate": 5.332094751509522e-06, "loss": 0.8793, "step": 8711 }, { "epoch": 0.5202436402723039, "grad_norm": 2.466510534286499, "learning_rate": 5.331431225532479e-06, "loss": 0.884, "step": 8712 }, { "epoch": 0.5203033560253195, "grad_norm": 1.9630522727966309, "learning_rate": 5.330767699555438e-06, "loss": 0.8539, "step": 8713 }, { "epoch": 0.5203630717783351, "grad_norm": 1.9739185571670532, "learning_rate": 5.330104173578396e-06, "loss": 0.9112, "step": 8714 }, { "epoch": 0.5204227875313507, "grad_norm": 1.9614404439926147, "learning_rate": 5.3294406476013535e-06, "loss": 0.8638, "step": 8715 }, { "epoch": 0.5204825032843664, "grad_norm": 2.058264970779419, "learning_rate": 5.3287771216243124e-06, "loss": 0.8596, "step": 8716 }, { "epoch": 0.5205422190373821, "grad_norm": 1.7902923822402954, "learning_rate": 5.32811359564727e-06, "loss": 0.8807, "step": 8717 }, { "epoch": 0.5206019347903977, "grad_norm": 1.9115307331085205, "learning_rate": 5.327450069670228e-06, "loss": 0.8577, "step": 8718 }, { "epoch": 0.5206616505434134, "grad_norm": 2.861650228500366, "learning_rate": 5.326786543693187e-06, "loss": 0.8492, "step": 8719 }, { "epoch": 0.520721366296429, "grad_norm": 1.7482064962387085, "learning_rate": 5.326123017716144e-06, "loss": 0.8758, "step": 8720 }, { "epoch": 0.5207810820494446, "grad_norm": 2.1022069454193115, "learning_rate": 5.325459491739102e-06, "loss": 0.8202, "step": 8721 }, { "epoch": 0.5208407978024603, "grad_norm": 1.6083205938339233, "learning_rate": 5.324795965762059e-06, "loss": 0.8564, "step": 8722 }, { "epoch": 0.520900513555476, "grad_norm": 4.022153854370117, "learning_rate": 5.324132439785018e-06, "loss": 0.8772, "step": 8723 }, { "epoch": 0.5209602293084916, "grad_norm": 2.5722227096557617, "learning_rate": 5.323468913807976e-06, "loss": 0.8734, "step": 8724 }, { "epoch": 0.5210199450615072, "grad_norm": 2.1372783184051514, "learning_rate": 5.322805387830934e-06, "loss": 0.8603, "step": 8725 }, { "epoch": 0.5210796608145228, "grad_norm": 2.3410489559173584, "learning_rate": 5.322141861853893e-06, "loss": 0.8512, "step": 8726 }, { "epoch": 0.5211393765675385, "grad_norm": 7.794963359832764, "learning_rate": 5.32147833587685e-06, "loss": 0.8064, "step": 8727 }, { "epoch": 0.5211990923205542, "grad_norm": 1.664110541343689, "learning_rate": 5.320814809899808e-06, "loss": 0.8021, "step": 8728 }, { "epoch": 0.5212588080735698, "grad_norm": 1.742300033569336, "learning_rate": 5.320151283922766e-06, "loss": 0.8399, "step": 8729 }, { "epoch": 0.5213185238265855, "grad_norm": 2.2733287811279297, "learning_rate": 5.319487757945724e-06, "loss": 0.8385, "step": 8730 }, { "epoch": 0.521378239579601, "grad_norm": 1.9285982847213745, "learning_rate": 5.318824231968682e-06, "loss": 0.8451, "step": 8731 }, { "epoch": 0.5214379553326167, "grad_norm": 2.44926381111145, "learning_rate": 5.3181607059916395e-06, "loss": 0.868, "step": 8732 }, { "epoch": 0.5214976710856324, "grad_norm": 2.016873598098755, "learning_rate": 5.317497180014598e-06, "loss": 0.8977, "step": 8733 }, { "epoch": 0.521557386838648, "grad_norm": 2.1877694129943848, "learning_rate": 5.3168336540375566e-06, "loss": 0.8246, "step": 8734 }, { "epoch": 0.5216171025916637, "grad_norm": 3.247824192047119, "learning_rate": 5.316170128060514e-06, "loss": 0.8601, "step": 8735 }, { "epoch": 0.5216768183446793, "grad_norm": 2.1373214721679688, "learning_rate": 5.315506602083472e-06, "loss": 0.8584, "step": 8736 }, { "epoch": 0.5217365340976949, "grad_norm": 1.7943294048309326, "learning_rate": 5.314843076106429e-06, "loss": 0.8603, "step": 8737 }, { "epoch": 0.5217962498507106, "grad_norm": 2.7357072830200195, "learning_rate": 5.314179550129388e-06, "loss": 0.8495, "step": 8738 }, { "epoch": 0.5218559656037263, "grad_norm": 3.2343316078186035, "learning_rate": 5.313516024152346e-06, "loss": 0.8561, "step": 8739 }, { "epoch": 0.5219156813567419, "grad_norm": 2.2487802505493164, "learning_rate": 5.3128524981753035e-06, "loss": 0.8953, "step": 8740 }, { "epoch": 0.5219753971097576, "grad_norm": 2.4455502033233643, "learning_rate": 5.312188972198262e-06, "loss": 0.8452, "step": 8741 }, { "epoch": 0.5220351128627732, "grad_norm": 2.411153554916382, "learning_rate": 5.31152544622122e-06, "loss": 0.8647, "step": 8742 }, { "epoch": 0.5220948286157888, "grad_norm": 2.7649037837982178, "learning_rate": 5.310861920244178e-06, "loss": 0.8403, "step": 8743 }, { "epoch": 0.5221545443688045, "grad_norm": 2.2125144004821777, "learning_rate": 5.310198394267137e-06, "loss": 0.8501, "step": 8744 }, { "epoch": 0.5222142601218202, "grad_norm": 2.8537325859069824, "learning_rate": 5.309534868290094e-06, "loss": 0.834, "step": 8745 }, { "epoch": 0.5222739758748358, "grad_norm": 1.846602201461792, "learning_rate": 5.308871342313052e-06, "loss": 0.8266, "step": 8746 }, { "epoch": 0.5223336916278515, "grad_norm": 2.25780987739563, "learning_rate": 5.308207816336009e-06, "loss": 0.8774, "step": 8747 }, { "epoch": 0.522393407380867, "grad_norm": 1.784630537033081, "learning_rate": 5.307544290358968e-06, "loss": 0.8007, "step": 8748 }, { "epoch": 0.5224531231338827, "grad_norm": 1.667253851890564, "learning_rate": 5.306880764381926e-06, "loss": 0.8447, "step": 8749 }, { "epoch": 0.5225128388868984, "grad_norm": 2.216679811477661, "learning_rate": 5.306217238404884e-06, "loss": 0.8538, "step": 8750 }, { "epoch": 0.522572554639914, "grad_norm": 1.9210141897201538, "learning_rate": 5.3055537124278426e-06, "loss": 0.8909, "step": 8751 }, { "epoch": 0.5226322703929297, "grad_norm": 1.870072364807129, "learning_rate": 5.3048901864508e-06, "loss": 0.8395, "step": 8752 }, { "epoch": 0.5226919861459453, "grad_norm": 2.392728805541992, "learning_rate": 5.304226660473758e-06, "loss": 0.8501, "step": 8753 }, { "epoch": 0.5227517018989609, "grad_norm": 3.5824978351593018, "learning_rate": 5.303563134496716e-06, "loss": 0.8842, "step": 8754 }, { "epoch": 0.5228114176519766, "grad_norm": 1.7203948497772217, "learning_rate": 5.302899608519674e-06, "loss": 0.8493, "step": 8755 }, { "epoch": 0.5228711334049922, "grad_norm": 1.7155790328979492, "learning_rate": 5.302236082542632e-06, "loss": 0.8861, "step": 8756 }, { "epoch": 0.5229308491580079, "grad_norm": 2.1675939559936523, "learning_rate": 5.3015725565655895e-06, "loss": 0.874, "step": 8757 }, { "epoch": 0.5229905649110236, "grad_norm": 2.0825116634368896, "learning_rate": 5.300909030588548e-06, "loss": 0.8332, "step": 8758 }, { "epoch": 0.5230502806640391, "grad_norm": 1.6089423894882202, "learning_rate": 5.3002455046115065e-06, "loss": 0.8384, "step": 8759 }, { "epoch": 0.5231099964170548, "grad_norm": 2.3095827102661133, "learning_rate": 5.299581978634464e-06, "loss": 0.8773, "step": 8760 }, { "epoch": 0.5231697121700705, "grad_norm": 1.7518776655197144, "learning_rate": 5.298918452657422e-06, "loss": 0.8546, "step": 8761 }, { "epoch": 0.5232294279230861, "grad_norm": 2.2048635482788086, "learning_rate": 5.298254926680379e-06, "loss": 0.8733, "step": 8762 }, { "epoch": 0.5232891436761018, "grad_norm": 2.4966061115264893, "learning_rate": 5.297591400703338e-06, "loss": 0.8797, "step": 8763 }, { "epoch": 0.5233488594291174, "grad_norm": 2.3209919929504395, "learning_rate": 5.296927874726296e-06, "loss": 0.8431, "step": 8764 }, { "epoch": 0.523408575182133, "grad_norm": 2.1078052520751953, "learning_rate": 5.2962643487492534e-06, "loss": 0.8651, "step": 8765 }, { "epoch": 0.5234682909351487, "grad_norm": 2.713898181915283, "learning_rate": 5.295600822772212e-06, "loss": 0.8662, "step": 8766 }, { "epoch": 0.5235280066881643, "grad_norm": 2.8994133472442627, "learning_rate": 5.29493729679517e-06, "loss": 0.8679, "step": 8767 }, { "epoch": 0.52358772244118, "grad_norm": 2.0425243377685547, "learning_rate": 5.294273770818128e-06, "loss": 0.8563, "step": 8768 }, { "epoch": 0.5236474381941957, "grad_norm": 2.8109943866729736, "learning_rate": 5.293610244841087e-06, "loss": 0.8582, "step": 8769 }, { "epoch": 0.5237071539472112, "grad_norm": 2.630282402038574, "learning_rate": 5.292946718864044e-06, "loss": 0.8684, "step": 8770 }, { "epoch": 0.5237668697002269, "grad_norm": 2.088210344314575, "learning_rate": 5.292283192887002e-06, "loss": 0.889, "step": 8771 }, { "epoch": 0.5238265854532426, "grad_norm": 1.782000184059143, "learning_rate": 5.291619666909959e-06, "loss": 0.8359, "step": 8772 }, { "epoch": 0.5238863012062582, "grad_norm": 1.5914145708084106, "learning_rate": 5.290956140932918e-06, "loss": 0.8494, "step": 8773 }, { "epoch": 0.5239460169592739, "grad_norm": 2.018972873687744, "learning_rate": 5.290292614955876e-06, "loss": 0.8277, "step": 8774 }, { "epoch": 0.5240057327122895, "grad_norm": 2.0510776042938232, "learning_rate": 5.289629088978834e-06, "loss": 0.8308, "step": 8775 }, { "epoch": 0.5240654484653051, "grad_norm": 1.9178149700164795, "learning_rate": 5.2889655630017925e-06, "loss": 0.8234, "step": 8776 }, { "epoch": 0.5241251642183208, "grad_norm": 1.4615503549575806, "learning_rate": 5.28830203702475e-06, "loss": 0.8355, "step": 8777 }, { "epoch": 0.5241848799713364, "grad_norm": 2.1049997806549072, "learning_rate": 5.287638511047708e-06, "loss": 0.8473, "step": 8778 }, { "epoch": 0.5242445957243521, "grad_norm": 3.4083235263824463, "learning_rate": 5.286974985070667e-06, "loss": 0.8506, "step": 8779 }, { "epoch": 0.5243043114773678, "grad_norm": 1.8974193334579468, "learning_rate": 5.286311459093624e-06, "loss": 0.8782, "step": 8780 }, { "epoch": 0.5243640272303833, "grad_norm": 1.8571268320083618, "learning_rate": 5.285647933116582e-06, "loss": 0.8999, "step": 8781 }, { "epoch": 0.524423742983399, "grad_norm": 1.7317560911178589, "learning_rate": 5.2849844071395395e-06, "loss": 0.8476, "step": 8782 }, { "epoch": 0.5244834587364147, "grad_norm": 2.2326536178588867, "learning_rate": 5.2843208811624976e-06, "loss": 0.8647, "step": 8783 }, { "epoch": 0.5245431744894303, "grad_norm": 2.6985673904418945, "learning_rate": 5.2836573551854565e-06, "loss": 0.8555, "step": 8784 }, { "epoch": 0.524602890242446, "grad_norm": 4.194653511047363, "learning_rate": 5.282993829208414e-06, "loss": 0.8542, "step": 8785 }, { "epoch": 0.5246626059954616, "grad_norm": 2.455714464187622, "learning_rate": 5.282330303231372e-06, "loss": 0.882, "step": 8786 }, { "epoch": 0.5247223217484772, "grad_norm": 2.6320950984954834, "learning_rate": 5.281666777254329e-06, "loss": 0.8258, "step": 8787 }, { "epoch": 0.5247820375014929, "grad_norm": 2.0900521278381348, "learning_rate": 5.281003251277288e-06, "loss": 0.8183, "step": 8788 }, { "epoch": 0.5248417532545085, "grad_norm": 3.9364473819732666, "learning_rate": 5.280339725300246e-06, "loss": 0.8763, "step": 8789 }, { "epoch": 0.5249014690075242, "grad_norm": 2.8330297470092773, "learning_rate": 5.279676199323203e-06, "loss": 0.8383, "step": 8790 }, { "epoch": 0.5249611847605399, "grad_norm": 6.061817169189453, "learning_rate": 5.279012673346162e-06, "loss": 0.863, "step": 8791 }, { "epoch": 0.5250209005135554, "grad_norm": 2.1479594707489014, "learning_rate": 5.27834914736912e-06, "loss": 0.8628, "step": 8792 }, { "epoch": 0.5250806162665711, "grad_norm": 2.0784387588500977, "learning_rate": 5.277685621392078e-06, "loss": 0.8938, "step": 8793 }, { "epoch": 0.5251403320195868, "grad_norm": 2.6942737102508545, "learning_rate": 5.277022095415037e-06, "loss": 0.8495, "step": 8794 }, { "epoch": 0.5252000477726024, "grad_norm": 2.795339584350586, "learning_rate": 5.276358569437994e-06, "loss": 0.8803, "step": 8795 }, { "epoch": 0.5252597635256181, "grad_norm": 2.7824015617370605, "learning_rate": 5.275695043460952e-06, "loss": 0.8373, "step": 8796 }, { "epoch": 0.5253194792786336, "grad_norm": 2.522099733352661, "learning_rate": 5.275031517483909e-06, "loss": 0.848, "step": 8797 }, { "epoch": 0.5253791950316493, "grad_norm": 2.309136390686035, "learning_rate": 5.274367991506868e-06, "loss": 0.8337, "step": 8798 }, { "epoch": 0.525438910784665, "grad_norm": 1.98874032497406, "learning_rate": 5.273704465529826e-06, "loss": 0.8505, "step": 8799 }, { "epoch": 0.5254986265376806, "grad_norm": 2.253523826599121, "learning_rate": 5.2730409395527836e-06, "loss": 0.8526, "step": 8800 }, { "epoch": 0.5254986265376806, "eval_text_loss": 0.9133353233337402, "eval_text_runtime": 15.1916, "eval_text_samples_per_second": 263.303, "eval_text_steps_per_second": 0.527, "step": 8800 }, { "epoch": 0.5254986265376806, "eval_image_loss": 0.6231774091720581, "eval_image_runtime": 5.0442, "eval_image_samples_per_second": 792.994, "eval_image_steps_per_second": 1.586, "step": 8800 }, { "epoch": 0.5254986265376806, "eval_video_loss": 1.0676722526550293, "eval_video_runtime": 77.0239, "eval_video_samples_per_second": 51.932, "eval_video_steps_per_second": 0.104, "step": 8800 }, { "epoch": 0.5255583422906963, "grad_norm": 1.7740100622177124, "learning_rate": 5.2723774135757425e-06, "loss": 0.8673, "step": 8801 }, { "epoch": 0.525618058043712, "grad_norm": 1.9330506324768066, "learning_rate": 5.2717138875987e-06, "loss": 0.8435, "step": 8802 }, { "epoch": 0.5256777737967275, "grad_norm": 2.2988336086273193, "learning_rate": 5.271050361621658e-06, "loss": 0.8195, "step": 8803 }, { "epoch": 0.5257374895497432, "grad_norm": 1.7288635969161987, "learning_rate": 5.270386835644617e-06, "loss": 0.8711, "step": 8804 }, { "epoch": 0.5257972053027589, "grad_norm": 1.8317028284072876, "learning_rate": 5.269723309667574e-06, "loss": 0.8737, "step": 8805 }, { "epoch": 0.5258569210557745, "grad_norm": 1.8139212131500244, "learning_rate": 5.269059783690532e-06, "loss": 0.8465, "step": 8806 }, { "epoch": 0.5259166368087902, "grad_norm": 2.2008140087127686, "learning_rate": 5.268396257713489e-06, "loss": 0.8651, "step": 8807 }, { "epoch": 0.5259763525618057, "grad_norm": 2.355557441711426, "learning_rate": 5.2677327317364475e-06, "loss": 0.8252, "step": 8808 }, { "epoch": 0.5260360683148214, "grad_norm": 2.039146900177002, "learning_rate": 5.2670692057594065e-06, "loss": 0.9045, "step": 8809 }, { "epoch": 0.5260957840678371, "grad_norm": 2.1199986934661865, "learning_rate": 5.266405679782364e-06, "loss": 0.8342, "step": 8810 }, { "epoch": 0.5261554998208527, "grad_norm": 3.3915724754333496, "learning_rate": 5.265742153805322e-06, "loss": 0.8661, "step": 8811 }, { "epoch": 0.5262152155738684, "grad_norm": 2.372028112411499, "learning_rate": 5.265078627828279e-06, "loss": 0.8609, "step": 8812 }, { "epoch": 0.5262749313268841, "grad_norm": 2.5914292335510254, "learning_rate": 5.264415101851238e-06, "loss": 0.8474, "step": 8813 }, { "epoch": 0.5263346470798996, "grad_norm": 3.8714113235473633, "learning_rate": 5.263751575874196e-06, "loss": 0.8337, "step": 8814 }, { "epoch": 0.5263943628329153, "grad_norm": 1.5602662563323975, "learning_rate": 5.263088049897153e-06, "loss": 0.8665, "step": 8815 }, { "epoch": 0.526454078585931, "grad_norm": 1.8196194171905518, "learning_rate": 5.262424523920112e-06, "loss": 0.8566, "step": 8816 }, { "epoch": 0.5265137943389466, "grad_norm": 2.299698829650879, "learning_rate": 5.26176099794307e-06, "loss": 0.8559, "step": 8817 }, { "epoch": 0.5265735100919623, "grad_norm": 2.7695956230163574, "learning_rate": 5.261097471966028e-06, "loss": 0.816, "step": 8818 }, { "epoch": 0.526633225844978, "grad_norm": 4.611942291259766, "learning_rate": 5.260433945988987e-06, "loss": 0.8729, "step": 8819 }, { "epoch": 0.5266929415979935, "grad_norm": 2.349299192428589, "learning_rate": 5.259770420011944e-06, "loss": 0.8823, "step": 8820 }, { "epoch": 0.5267526573510092, "grad_norm": 1.9659488201141357, "learning_rate": 5.259106894034902e-06, "loss": 0.8582, "step": 8821 }, { "epoch": 0.5268123731040248, "grad_norm": 1.7667925357818604, "learning_rate": 5.258443368057859e-06, "loss": 0.8721, "step": 8822 }, { "epoch": 0.5268720888570405, "grad_norm": 2.5337295532226562, "learning_rate": 5.257779842080818e-06, "loss": 0.8316, "step": 8823 }, { "epoch": 0.5269318046100562, "grad_norm": 3.914405345916748, "learning_rate": 5.257116316103776e-06, "loss": 0.8592, "step": 8824 }, { "epoch": 0.5269915203630717, "grad_norm": 2.149148464202881, "learning_rate": 5.2564527901267335e-06, "loss": 0.8605, "step": 8825 }, { "epoch": 0.5270512361160874, "grad_norm": 2.5924956798553467, "learning_rate": 5.2557892641496925e-06, "loss": 0.8506, "step": 8826 }, { "epoch": 0.5271109518691031, "grad_norm": 2.002140522003174, "learning_rate": 5.25512573817265e-06, "loss": 0.8256, "step": 8827 }, { "epoch": 0.5271706676221187, "grad_norm": 2.273699998855591, "learning_rate": 5.254462212195608e-06, "loss": 0.8766, "step": 8828 }, { "epoch": 0.5272303833751344, "grad_norm": 2.0116052627563477, "learning_rate": 5.253798686218567e-06, "loss": 0.8539, "step": 8829 }, { "epoch": 0.5272900991281501, "grad_norm": 1.9890336990356445, "learning_rate": 5.253135160241524e-06, "loss": 0.8875, "step": 8830 }, { "epoch": 0.5273498148811656, "grad_norm": 2.2981743812561035, "learning_rate": 5.252471634264482e-06, "loss": 0.8778, "step": 8831 }, { "epoch": 0.5274095306341813, "grad_norm": 2.3104896545410156, "learning_rate": 5.251808108287439e-06, "loss": 0.8544, "step": 8832 }, { "epoch": 0.5274692463871969, "grad_norm": 2.6661956310272217, "learning_rate": 5.251144582310398e-06, "loss": 0.8515, "step": 8833 }, { "epoch": 0.5275289621402126, "grad_norm": 4.359808921813965, "learning_rate": 5.2504810563333564e-06, "loss": 0.8413, "step": 8834 }, { "epoch": 0.5275886778932283, "grad_norm": 2.5876948833465576, "learning_rate": 5.249817530356314e-06, "loss": 0.8286, "step": 8835 }, { "epoch": 0.5276483936462438, "grad_norm": 2.118032693862915, "learning_rate": 5.249154004379272e-06, "loss": 0.8604, "step": 8836 }, { "epoch": 0.5277081093992595, "grad_norm": 2.5551934242248535, "learning_rate": 5.248490478402229e-06, "loss": 0.8215, "step": 8837 }, { "epoch": 0.5277678251522752, "grad_norm": 1.7074649333953857, "learning_rate": 5.247826952425188e-06, "loss": 0.8395, "step": 8838 }, { "epoch": 0.5278275409052908, "grad_norm": 1.7174512147903442, "learning_rate": 5.247163426448146e-06, "loss": 0.8173, "step": 8839 }, { "epoch": 0.5278872566583065, "grad_norm": 1.7140578031539917, "learning_rate": 5.246499900471103e-06, "loss": 0.8229, "step": 8840 }, { "epoch": 0.5279469724113222, "grad_norm": 3.350917339324951, "learning_rate": 5.245836374494062e-06, "loss": 0.8636, "step": 8841 }, { "epoch": 0.5280066881643377, "grad_norm": 2.0741593837738037, "learning_rate": 5.2451728485170196e-06, "loss": 0.8738, "step": 8842 }, { "epoch": 0.5280664039173534, "grad_norm": 2.7549023628234863, "learning_rate": 5.244509322539978e-06, "loss": 0.8805, "step": 8843 }, { "epoch": 0.528126119670369, "grad_norm": 2.2544026374816895, "learning_rate": 5.243845796562937e-06, "loss": 0.8684, "step": 8844 }, { "epoch": 0.5281858354233847, "grad_norm": 1.8422324657440186, "learning_rate": 5.243182270585894e-06, "loss": 0.8613, "step": 8845 }, { "epoch": 0.5282455511764004, "grad_norm": 2.055689811706543, "learning_rate": 5.242518744608852e-06, "loss": 0.8702, "step": 8846 }, { "epoch": 0.5283052669294159, "grad_norm": 2.168426990509033, "learning_rate": 5.241855218631809e-06, "loss": 0.8226, "step": 8847 }, { "epoch": 0.5283649826824316, "grad_norm": 3.7132434844970703, "learning_rate": 5.241191692654768e-06, "loss": 0.8738, "step": 8848 }, { "epoch": 0.5284246984354473, "grad_norm": 2.1600067615509033, "learning_rate": 5.240528166677726e-06, "loss": 0.8272, "step": 8849 }, { "epoch": 0.5284844141884629, "grad_norm": 1.5954651832580566, "learning_rate": 5.2398646407006835e-06, "loss": 0.8685, "step": 8850 }, { "epoch": 0.5285441299414786, "grad_norm": 2.255922794342041, "learning_rate": 5.2392011147236425e-06, "loss": 0.8429, "step": 8851 }, { "epoch": 0.5286038456944943, "grad_norm": 1.985050916671753, "learning_rate": 5.2385375887466e-06, "loss": 0.8507, "step": 8852 }, { "epoch": 0.5286635614475098, "grad_norm": 3.245039939880371, "learning_rate": 5.237874062769558e-06, "loss": 0.8834, "step": 8853 }, { "epoch": 0.5287232772005255, "grad_norm": 2.779391288757324, "learning_rate": 5.237210536792517e-06, "loss": 0.8304, "step": 8854 }, { "epoch": 0.5287829929535411, "grad_norm": 1.967210292816162, "learning_rate": 5.236547010815474e-06, "loss": 0.8652, "step": 8855 }, { "epoch": 0.5288427087065568, "grad_norm": 2.075289726257324, "learning_rate": 5.235883484838432e-06, "loss": 0.8472, "step": 8856 }, { "epoch": 0.5289024244595725, "grad_norm": 6.044736385345459, "learning_rate": 5.235219958861389e-06, "loss": 0.8608, "step": 8857 }, { "epoch": 0.528962140212588, "grad_norm": 2.756903886795044, "learning_rate": 5.234556432884348e-06, "loss": 0.8716, "step": 8858 }, { "epoch": 0.5290218559656037, "grad_norm": 1.9509272575378418, "learning_rate": 5.233892906907306e-06, "loss": 0.8589, "step": 8859 }, { "epoch": 0.5290815717186194, "grad_norm": 1.812034249305725, "learning_rate": 5.233229380930264e-06, "loss": 0.8169, "step": 8860 }, { "epoch": 0.529141287471635, "grad_norm": 2.435108184814453, "learning_rate": 5.232565854953222e-06, "loss": 0.8649, "step": 8861 }, { "epoch": 0.5292010032246507, "grad_norm": 1.7081093788146973, "learning_rate": 5.23190232897618e-06, "loss": 0.8628, "step": 8862 }, { "epoch": 0.5292607189776664, "grad_norm": 2.6189565658569336, "learning_rate": 5.231238802999138e-06, "loss": 0.895, "step": 8863 }, { "epoch": 0.5293204347306819, "grad_norm": 2.5646207332611084, "learning_rate": 5.230575277022096e-06, "loss": 0.8187, "step": 8864 }, { "epoch": 0.5293801504836976, "grad_norm": 2.356229066848755, "learning_rate": 5.229911751045053e-06, "loss": 0.8582, "step": 8865 }, { "epoch": 0.5294398662367132, "grad_norm": 6.2691473960876465, "learning_rate": 5.229248225068012e-06, "loss": 0.8272, "step": 8866 }, { "epoch": 0.5294995819897289, "grad_norm": 2.497310161590576, "learning_rate": 5.2285846990909695e-06, "loss": 0.8654, "step": 8867 }, { "epoch": 0.5295592977427446, "grad_norm": 1.9312657117843628, "learning_rate": 5.227921173113928e-06, "loss": 0.8794, "step": 8868 }, { "epoch": 0.5296190134957601, "grad_norm": 2.3731839656829834, "learning_rate": 5.2272576471368866e-06, "loss": 0.8349, "step": 8869 }, { "epoch": 0.5296787292487758, "grad_norm": 1.7475652694702148, "learning_rate": 5.226594121159844e-06, "loss": 0.8019, "step": 8870 }, { "epoch": 0.5297384450017915, "grad_norm": 2.735548973083496, "learning_rate": 5.225930595182802e-06, "loss": 0.8812, "step": 8871 }, { "epoch": 0.5297981607548071, "grad_norm": 2.980342388153076, "learning_rate": 5.225267069205759e-06, "loss": 0.8623, "step": 8872 }, { "epoch": 0.5298578765078228, "grad_norm": 2.7508888244628906, "learning_rate": 5.224603543228718e-06, "loss": 0.8643, "step": 8873 }, { "epoch": 0.5299175922608385, "grad_norm": 5.39530611038208, "learning_rate": 5.223940017251676e-06, "loss": 0.8446, "step": 8874 }, { "epoch": 0.529977308013854, "grad_norm": 2.012721538543701, "learning_rate": 5.2232764912746335e-06, "loss": 0.8645, "step": 8875 }, { "epoch": 0.5300370237668697, "grad_norm": 1.6108182668685913, "learning_rate": 5.2226129652975924e-06, "loss": 0.8439, "step": 8876 }, { "epoch": 0.5300967395198853, "grad_norm": 2.418853282928467, "learning_rate": 5.22194943932055e-06, "loss": 0.8431, "step": 8877 }, { "epoch": 0.530156455272901, "grad_norm": 2.5490384101867676, "learning_rate": 5.221285913343508e-06, "loss": 0.8358, "step": 8878 }, { "epoch": 0.5302161710259167, "grad_norm": 3.0477075576782227, "learning_rate": 5.220622387366467e-06, "loss": 0.8833, "step": 8879 }, { "epoch": 0.5302758867789323, "grad_norm": 2.025912284851074, "learning_rate": 5.219958861389424e-06, "loss": 0.83, "step": 8880 }, { "epoch": 0.5303356025319479, "grad_norm": 2.130061149597168, "learning_rate": 5.219295335412382e-06, "loss": 0.8533, "step": 8881 }, { "epoch": 0.5303953182849636, "grad_norm": 2.348846673965454, "learning_rate": 5.218631809435339e-06, "loss": 0.897, "step": 8882 }, { "epoch": 0.5304550340379792, "grad_norm": 2.240776777267456, "learning_rate": 5.217968283458298e-06, "loss": 0.8681, "step": 8883 }, { "epoch": 0.5305147497909949, "grad_norm": 2.67380952835083, "learning_rate": 5.217304757481256e-06, "loss": 0.8651, "step": 8884 }, { "epoch": 0.5305744655440106, "grad_norm": 3.6807827949523926, "learning_rate": 5.216641231504214e-06, "loss": 0.8467, "step": 8885 }, { "epoch": 0.5306341812970261, "grad_norm": 3.4734866619110107, "learning_rate": 5.215977705527172e-06, "loss": 0.8613, "step": 8886 }, { "epoch": 0.5306938970500418, "grad_norm": 2.2179946899414062, "learning_rate": 5.21531417955013e-06, "loss": 0.8436, "step": 8887 }, { "epoch": 0.5307536128030574, "grad_norm": 1.7523833513259888, "learning_rate": 5.214650653573088e-06, "loss": 0.8584, "step": 8888 }, { "epoch": 0.5308133285560731, "grad_norm": 2.8657736778259277, "learning_rate": 5.213987127596046e-06, "loss": 0.8303, "step": 8889 }, { "epoch": 0.5308730443090888, "grad_norm": 2.1477913856506348, "learning_rate": 5.213323601619003e-06, "loss": 0.851, "step": 8890 }, { "epoch": 0.5309327600621044, "grad_norm": 2.043835401535034, "learning_rate": 5.212660075641962e-06, "loss": 0.8719, "step": 8891 }, { "epoch": 0.53099247581512, "grad_norm": 2.4616894721984863, "learning_rate": 5.2119965496649195e-06, "loss": 0.8394, "step": 8892 }, { "epoch": 0.5310521915681357, "grad_norm": 2.2581331729888916, "learning_rate": 5.211333023687878e-06, "loss": 0.8308, "step": 8893 }, { "epoch": 0.5311119073211513, "grad_norm": 2.702240228652954, "learning_rate": 5.2106694977108365e-06, "loss": 0.8892, "step": 8894 }, { "epoch": 0.531171623074167, "grad_norm": 2.040555477142334, "learning_rate": 5.210005971733794e-06, "loss": 0.8627, "step": 8895 }, { "epoch": 0.5312313388271827, "grad_norm": 2.807020425796509, "learning_rate": 5.209342445756752e-06, "loss": 0.8443, "step": 8896 }, { "epoch": 0.5312910545801982, "grad_norm": 1.9993739128112793, "learning_rate": 5.208678919779709e-06, "loss": 0.8375, "step": 8897 }, { "epoch": 0.5313507703332139, "grad_norm": 1.9187668561935425, "learning_rate": 5.208015393802668e-06, "loss": 0.8248, "step": 8898 }, { "epoch": 0.5314104860862295, "grad_norm": 2.4218947887420654, "learning_rate": 5.207351867825626e-06, "loss": 0.8527, "step": 8899 }, { "epoch": 0.5314702018392452, "grad_norm": 1.8285553455352783, "learning_rate": 5.2066883418485835e-06, "loss": 0.8653, "step": 8900 }, { "epoch": 0.5315299175922609, "grad_norm": 2.777775287628174, "learning_rate": 5.206024815871542e-06, "loss": 0.8123, "step": 8901 }, { "epoch": 0.5315896333452765, "grad_norm": 3.823373794555664, "learning_rate": 5.2053612898945e-06, "loss": 0.8525, "step": 8902 }, { "epoch": 0.5316493490982921, "grad_norm": 7.39255428314209, "learning_rate": 5.204697763917458e-06, "loss": 0.8424, "step": 8903 }, { "epoch": 0.5317090648513078, "grad_norm": 2.416267156600952, "learning_rate": 5.204034237940417e-06, "loss": 0.8676, "step": 8904 }, { "epoch": 0.5317687806043234, "grad_norm": 2.4804587364196777, "learning_rate": 5.203370711963374e-06, "loss": 0.813, "step": 8905 }, { "epoch": 0.5318284963573391, "grad_norm": 1.944875717163086, "learning_rate": 5.202707185986332e-06, "loss": 0.863, "step": 8906 }, { "epoch": 0.5318882121103548, "grad_norm": 1.9027721881866455, "learning_rate": 5.202043660009289e-06, "loss": 0.8586, "step": 8907 }, { "epoch": 0.5319479278633703, "grad_norm": 4.805450439453125, "learning_rate": 5.201380134032248e-06, "loss": 0.8969, "step": 8908 }, { "epoch": 0.532007643616386, "grad_norm": 2.064112663269043, "learning_rate": 5.200716608055206e-06, "loss": 0.8728, "step": 8909 }, { "epoch": 0.5320673593694016, "grad_norm": 2.3678581714630127, "learning_rate": 5.200053082078164e-06, "loss": 0.832, "step": 8910 }, { "epoch": 0.5321270751224173, "grad_norm": 2.0080552101135254, "learning_rate": 5.199389556101122e-06, "loss": 0.8513, "step": 8911 }, { "epoch": 0.532186790875433, "grad_norm": 2.3004167079925537, "learning_rate": 5.19872603012408e-06, "loss": 0.8371, "step": 8912 }, { "epoch": 0.5322465066284486, "grad_norm": 2.370105743408203, "learning_rate": 5.198062504147038e-06, "loss": 0.8607, "step": 8913 }, { "epoch": 0.5323062223814642, "grad_norm": 1.9126019477844238, "learning_rate": 5.197398978169996e-06, "loss": 0.845, "step": 8914 }, { "epoch": 0.5323659381344799, "grad_norm": 2.256948232650757, "learning_rate": 5.196735452192953e-06, "loss": 0.8243, "step": 8915 }, { "epoch": 0.5324256538874955, "grad_norm": 3.3036246299743652, "learning_rate": 5.196071926215912e-06, "loss": 0.8631, "step": 8916 }, { "epoch": 0.5324853696405112, "grad_norm": 2.4954586029052734, "learning_rate": 5.1954084002388695e-06, "loss": 0.8552, "step": 8917 }, { "epoch": 0.5325450853935269, "grad_norm": 1.8433992862701416, "learning_rate": 5.1947448742618276e-06, "loss": 0.8254, "step": 8918 }, { "epoch": 0.5326048011465424, "grad_norm": 3.6824357509613037, "learning_rate": 5.1940813482847865e-06, "loss": 0.8873, "step": 8919 }, { "epoch": 0.5326645168995581, "grad_norm": 1.8754607439041138, "learning_rate": 5.193417822307744e-06, "loss": 0.8241, "step": 8920 }, { "epoch": 0.5327242326525737, "grad_norm": 1.83454167842865, "learning_rate": 5.192754296330702e-06, "loss": 0.8104, "step": 8921 }, { "epoch": 0.5327839484055894, "grad_norm": 1.8407812118530273, "learning_rate": 5.192090770353659e-06, "loss": 0.8706, "step": 8922 }, { "epoch": 0.5328436641586051, "grad_norm": 1.830893874168396, "learning_rate": 5.191427244376618e-06, "loss": 0.8566, "step": 8923 }, { "epoch": 0.5329033799116207, "grad_norm": 1.6798382997512817, "learning_rate": 5.190763718399576e-06, "loss": 0.8232, "step": 8924 }, { "epoch": 0.5329630956646363, "grad_norm": 2.876401424407959, "learning_rate": 5.190100192422533e-06, "loss": 0.8664, "step": 8925 }, { "epoch": 0.533022811417652, "grad_norm": 2.1158344745635986, "learning_rate": 5.189436666445492e-06, "loss": 0.844, "step": 8926 }, { "epoch": 0.5330825271706676, "grad_norm": 2.315793514251709, "learning_rate": 5.18877314046845e-06, "loss": 0.9015, "step": 8927 }, { "epoch": 0.5331422429236833, "grad_norm": 2.1190009117126465, "learning_rate": 5.188109614491408e-06, "loss": 0.8438, "step": 8928 }, { "epoch": 0.533201958676699, "grad_norm": 1.7389576435089111, "learning_rate": 5.187446088514367e-06, "loss": 0.8292, "step": 8929 }, { "epoch": 0.5332616744297145, "grad_norm": 2.4845728874206543, "learning_rate": 5.186782562537324e-06, "loss": 0.859, "step": 8930 }, { "epoch": 0.5333213901827302, "grad_norm": 1.9939448833465576, "learning_rate": 5.186119036560282e-06, "loss": 0.8782, "step": 8931 }, { "epoch": 0.5333811059357458, "grad_norm": 2.7090563774108887, "learning_rate": 5.185455510583239e-06, "loss": 0.8728, "step": 8932 }, { "epoch": 0.5334408216887615, "grad_norm": 1.9538384675979614, "learning_rate": 5.184791984606198e-06, "loss": 0.852, "step": 8933 }, { "epoch": 0.5335005374417772, "grad_norm": 1.6931393146514893, "learning_rate": 5.184128458629156e-06, "loss": 0.8418, "step": 8934 }, { "epoch": 0.5335602531947928, "grad_norm": 2.0354602336883545, "learning_rate": 5.183464932652114e-06, "loss": 0.8328, "step": 8935 }, { "epoch": 0.5336199689478084, "grad_norm": 3.148592472076416, "learning_rate": 5.182801406675072e-06, "loss": 0.8547, "step": 8936 }, { "epoch": 0.533679684700824, "grad_norm": 1.876060962677002, "learning_rate": 5.18213788069803e-06, "loss": 0.8846, "step": 8937 }, { "epoch": 0.5337394004538397, "grad_norm": 1.758014440536499, "learning_rate": 5.181474354720988e-06, "loss": 0.8738, "step": 8938 }, { "epoch": 0.5337991162068554, "grad_norm": 2.674185276031494, "learning_rate": 5.180810828743946e-06, "loss": 0.8442, "step": 8939 }, { "epoch": 0.533858831959871, "grad_norm": 2.134769916534424, "learning_rate": 5.180147302766903e-06, "loss": 0.8851, "step": 8940 }, { "epoch": 0.5339185477128866, "grad_norm": 2.6431446075439453, "learning_rate": 5.179483776789862e-06, "loss": 0.8685, "step": 8941 }, { "epoch": 0.5339782634659023, "grad_norm": 1.9541537761688232, "learning_rate": 5.1788202508128194e-06, "loss": 0.8497, "step": 8942 }, { "epoch": 0.5340379792189179, "grad_norm": 2.9552242755889893, "learning_rate": 5.1781567248357775e-06, "loss": 0.8577, "step": 8943 }, { "epoch": 0.5340976949719336, "grad_norm": 2.6341331005096436, "learning_rate": 5.1774931988587365e-06, "loss": 0.8394, "step": 8944 }, { "epoch": 0.5341574107249493, "grad_norm": 1.8783613443374634, "learning_rate": 5.176829672881694e-06, "loss": 0.8469, "step": 8945 }, { "epoch": 0.5342171264779649, "grad_norm": 3.7104499340057373, "learning_rate": 5.176166146904652e-06, "loss": 0.8395, "step": 8946 }, { "epoch": 0.5342768422309805, "grad_norm": 2.1159005165100098, "learning_rate": 5.175502620927609e-06, "loss": 0.8749, "step": 8947 }, { "epoch": 0.5343365579839962, "grad_norm": 1.5285954475402832, "learning_rate": 5.174839094950568e-06, "loss": 0.8407, "step": 8948 }, { "epoch": 0.5343962737370118, "grad_norm": 1.9351787567138672, "learning_rate": 5.174175568973526e-06, "loss": 0.8392, "step": 8949 }, { "epoch": 0.5344559894900275, "grad_norm": 2.114673614501953, "learning_rate": 5.173512042996483e-06, "loss": 0.8624, "step": 8950 }, { "epoch": 0.5345157052430431, "grad_norm": 3.0895862579345703, "learning_rate": 5.172848517019442e-06, "loss": 0.9307, "step": 8951 }, { "epoch": 0.5345754209960588, "grad_norm": 3.7025463581085205, "learning_rate": 5.1721849910424e-06, "loss": 0.8484, "step": 8952 }, { "epoch": 0.5346351367490744, "grad_norm": 1.9515730142593384, "learning_rate": 5.171521465065358e-06, "loss": 0.8401, "step": 8953 }, { "epoch": 0.53469485250209, "grad_norm": 2.687537431716919, "learning_rate": 5.170857939088317e-06, "loss": 0.826, "step": 8954 }, { "epoch": 0.5347545682551057, "grad_norm": 1.8991475105285645, "learning_rate": 5.170194413111274e-06, "loss": 0.9012, "step": 8955 }, { "epoch": 0.5348142840081214, "grad_norm": 2.819563865661621, "learning_rate": 5.169530887134232e-06, "loss": 0.8653, "step": 8956 }, { "epoch": 0.534873999761137, "grad_norm": 1.9663490056991577, "learning_rate": 5.168867361157189e-06, "loss": 0.8326, "step": 8957 }, { "epoch": 0.5349337155141526, "grad_norm": 2.3465938568115234, "learning_rate": 5.168203835180148e-06, "loss": 0.8595, "step": 8958 }, { "epoch": 0.5349934312671683, "grad_norm": 5.926351070404053, "learning_rate": 5.167540309203106e-06, "loss": 0.8368, "step": 8959 }, { "epoch": 0.5350531470201839, "grad_norm": 3.106128692626953, "learning_rate": 5.1668767832260635e-06, "loss": 0.8433, "step": 8960 }, { "epoch": 0.5351128627731996, "grad_norm": 1.6665138006210327, "learning_rate": 5.166213257249022e-06, "loss": 0.8174, "step": 8961 }, { "epoch": 0.5351725785262152, "grad_norm": 2.8563613891601562, "learning_rate": 5.16554973127198e-06, "loss": 0.8667, "step": 8962 }, { "epoch": 0.5352322942792309, "grad_norm": 2.879591703414917, "learning_rate": 5.164886205294938e-06, "loss": 0.874, "step": 8963 }, { "epoch": 0.5352920100322465, "grad_norm": 2.360196828842163, "learning_rate": 5.164222679317896e-06, "loss": 0.8452, "step": 8964 }, { "epoch": 0.5353517257852621, "grad_norm": 2.527527332305908, "learning_rate": 5.163559153340853e-06, "loss": 0.8183, "step": 8965 }, { "epoch": 0.5354114415382778, "grad_norm": 2.407005786895752, "learning_rate": 5.162895627363812e-06, "loss": 0.8506, "step": 8966 }, { "epoch": 0.5354711572912935, "grad_norm": 2.745023012161255, "learning_rate": 5.162232101386769e-06, "loss": 0.8258, "step": 8967 }, { "epoch": 0.5355308730443091, "grad_norm": 5.176920413970947, "learning_rate": 5.1615685754097275e-06, "loss": 0.8978, "step": 8968 }, { "epoch": 0.5355905887973247, "grad_norm": 2.114858865737915, "learning_rate": 5.1609050494326865e-06, "loss": 0.8585, "step": 8969 }, { "epoch": 0.5356503045503404, "grad_norm": 2.053431749343872, "learning_rate": 5.160241523455644e-06, "loss": 0.824, "step": 8970 }, { "epoch": 0.535710020303356, "grad_norm": 2.1088340282440186, "learning_rate": 5.159577997478602e-06, "loss": 0.8591, "step": 8971 }, { "epoch": 0.5357697360563717, "grad_norm": 2.1095163822174072, "learning_rate": 5.158914471501559e-06, "loss": 0.8344, "step": 8972 }, { "epoch": 0.5358294518093873, "grad_norm": 2.379460096359253, "learning_rate": 5.158250945524518e-06, "loss": 0.8975, "step": 8973 }, { "epoch": 0.535889167562403, "grad_norm": 2.353372573852539, "learning_rate": 5.157587419547476e-06, "loss": 0.8691, "step": 8974 }, { "epoch": 0.5359488833154186, "grad_norm": 3.762387275695801, "learning_rate": 5.156923893570433e-06, "loss": 0.8936, "step": 8975 }, { "epoch": 0.5360085990684342, "grad_norm": 2.3920369148254395, "learning_rate": 5.156260367593392e-06, "loss": 0.8613, "step": 8976 }, { "epoch": 0.5360683148214499, "grad_norm": 2.5071918964385986, "learning_rate": 5.1555968416163496e-06, "loss": 0.9198, "step": 8977 }, { "epoch": 0.5361280305744656, "grad_norm": 1.858242392539978, "learning_rate": 5.154933315639308e-06, "loss": 0.8251, "step": 8978 }, { "epoch": 0.5361877463274812, "grad_norm": 3.5008068084716797, "learning_rate": 5.154269789662267e-06, "loss": 0.8545, "step": 8979 }, { "epoch": 0.5362474620804968, "grad_norm": 2.202136754989624, "learning_rate": 5.153606263685224e-06, "loss": 0.852, "step": 8980 }, { "epoch": 0.5363071778335124, "grad_norm": 2.652144432067871, "learning_rate": 5.152942737708182e-06, "loss": 0.8698, "step": 8981 }, { "epoch": 0.5363668935865281, "grad_norm": 2.2292401790618896, "learning_rate": 5.152279211731139e-06, "loss": 0.8349, "step": 8982 }, { "epoch": 0.5364266093395438, "grad_norm": 3.3415186405181885, "learning_rate": 5.151615685754098e-06, "loss": 0.8286, "step": 8983 }, { "epoch": 0.5364863250925594, "grad_norm": 2.389364242553711, "learning_rate": 5.150952159777056e-06, "loss": 0.8715, "step": 8984 }, { "epoch": 0.5365460408455751, "grad_norm": 1.8363916873931885, "learning_rate": 5.1502886338000135e-06, "loss": 0.8095, "step": 8985 }, { "epoch": 0.5366057565985907, "grad_norm": 2.4193997383117676, "learning_rate": 5.149625107822972e-06, "loss": 0.8963, "step": 8986 }, { "epoch": 0.5366654723516063, "grad_norm": 2.3148179054260254, "learning_rate": 5.14896158184593e-06, "loss": 0.8544, "step": 8987 }, { "epoch": 0.536725188104622, "grad_norm": 2.4324584007263184, "learning_rate": 5.148298055868888e-06, "loss": 0.8631, "step": 8988 }, { "epoch": 0.5367849038576377, "grad_norm": 2.1885666847229004, "learning_rate": 5.147634529891846e-06, "loss": 0.8695, "step": 8989 }, { "epoch": 0.5368446196106533, "grad_norm": 2.811354637145996, "learning_rate": 5.146971003914803e-06, "loss": 0.822, "step": 8990 }, { "epoch": 0.5369043353636689, "grad_norm": 2.198059320449829, "learning_rate": 5.146307477937762e-06, "loss": 0.8714, "step": 8991 }, { "epoch": 0.5369640511166845, "grad_norm": 2.0941808223724365, "learning_rate": 5.145643951960719e-06, "loss": 0.8513, "step": 8992 }, { "epoch": 0.5370237668697002, "grad_norm": 2.112718343734741, "learning_rate": 5.1449804259836775e-06, "loss": 0.8178, "step": 8993 }, { "epoch": 0.5370834826227159, "grad_norm": 2.243579387664795, "learning_rate": 5.1443169000066364e-06, "loss": 0.852, "step": 8994 }, { "epoch": 0.5371431983757315, "grad_norm": 3.5242810249328613, "learning_rate": 5.143653374029594e-06, "loss": 0.8824, "step": 8995 }, { "epoch": 0.5372029141287472, "grad_norm": 2.0442347526550293, "learning_rate": 5.142989848052552e-06, "loss": 0.8716, "step": 8996 }, { "epoch": 0.5372626298817628, "grad_norm": 4.2653422355651855, "learning_rate": 5.142326322075509e-06, "loss": 0.836, "step": 8997 }, { "epoch": 0.5373223456347784, "grad_norm": 2.3865554332733154, "learning_rate": 5.141662796098468e-06, "loss": 0.8775, "step": 8998 }, { "epoch": 0.5373820613877941, "grad_norm": 2.872251033782959, "learning_rate": 5.140999270121426e-06, "loss": 0.8601, "step": 8999 }, { "epoch": 0.5374417771408098, "grad_norm": 3.1837828159332275, "learning_rate": 5.140335744144383e-06, "loss": 0.8603, "step": 9000 }, { "epoch": 0.5374417771408098, "eval_text_loss": 0.9116338491439819, "eval_text_runtime": 15.2196, "eval_text_samples_per_second": 262.819, "eval_text_steps_per_second": 0.526, "step": 9000 }, { "epoch": 0.5374417771408098, "eval_image_loss": 0.6238017678260803, "eval_image_runtime": 5.8162, "eval_image_samples_per_second": 687.732, "eval_image_steps_per_second": 1.375, "step": 9000 }, { "epoch": 0.5374417771408098, "eval_video_loss": 1.0632741451263428, "eval_video_runtime": 77.1317, "eval_video_samples_per_second": 51.859, "eval_video_steps_per_second": 0.104, "step": 9000 }, { "epoch": 0.5375014928938254, "grad_norm": 1.6946237087249756, "learning_rate": 5.139672218167342e-06, "loss": 0.8335, "step": 9001 }, { "epoch": 0.537561208646841, "grad_norm": 2.456939458847046, "learning_rate": 5.1390086921902995e-06, "loss": 0.8436, "step": 9002 }, { "epoch": 0.5376209243998566, "grad_norm": 1.8993041515350342, "learning_rate": 5.138345166213258e-06, "loss": 0.8568, "step": 9003 }, { "epoch": 0.5376806401528723, "grad_norm": 1.7991091012954712, "learning_rate": 5.137681640236217e-06, "loss": 0.8604, "step": 9004 }, { "epoch": 0.537740355905888, "grad_norm": 2.01118803024292, "learning_rate": 5.137018114259174e-06, "loss": 0.8697, "step": 9005 }, { "epoch": 0.5378000716589036, "grad_norm": 2.5552194118499756, "learning_rate": 5.136354588282132e-06, "loss": 0.841, "step": 9006 }, { "epoch": 0.5378597874119193, "grad_norm": 2.640507459640503, "learning_rate": 5.135691062305089e-06, "loss": 0.9018, "step": 9007 }, { "epoch": 0.5379195031649349, "grad_norm": 2.3889005184173584, "learning_rate": 5.135027536328048e-06, "loss": 0.8193, "step": 9008 }, { "epoch": 0.5379792189179505, "grad_norm": 1.5971287488937378, "learning_rate": 5.134364010351006e-06, "loss": 0.8325, "step": 9009 }, { "epoch": 0.5380389346709662, "grad_norm": 1.9989140033721924, "learning_rate": 5.1337004843739635e-06, "loss": 0.8665, "step": 9010 }, { "epoch": 0.5380986504239819, "grad_norm": 1.9155181646347046, "learning_rate": 5.133036958396922e-06, "loss": 0.8389, "step": 9011 }, { "epoch": 0.5381583661769975, "grad_norm": 1.875170111656189, "learning_rate": 5.13237343241988e-06, "loss": 0.8523, "step": 9012 }, { "epoch": 0.5382180819300132, "grad_norm": 1.8788182735443115, "learning_rate": 5.131709906442838e-06, "loss": 0.8509, "step": 9013 }, { "epoch": 0.5382777976830287, "grad_norm": 2.6037261486053467, "learning_rate": 5.131046380465796e-06, "loss": 0.8769, "step": 9014 }, { "epoch": 0.5383375134360444, "grad_norm": 1.724057674407959, "learning_rate": 5.130382854488753e-06, "loss": 0.8548, "step": 9015 }, { "epoch": 0.5383972291890601, "grad_norm": 1.6236921548843384, "learning_rate": 5.129719328511712e-06, "loss": 0.8494, "step": 9016 }, { "epoch": 0.5384569449420757, "grad_norm": 8.278932571411133, "learning_rate": 5.129055802534669e-06, "loss": 0.8677, "step": 9017 }, { "epoch": 0.5385166606950914, "grad_norm": 1.950211524963379, "learning_rate": 5.1283922765576274e-06, "loss": 0.8588, "step": 9018 }, { "epoch": 0.538576376448107, "grad_norm": 1.9663106203079224, "learning_rate": 5.127728750580586e-06, "loss": 0.8365, "step": 9019 }, { "epoch": 0.5386360922011226, "grad_norm": 1.8364055156707764, "learning_rate": 5.127065224603544e-06, "loss": 0.8539, "step": 9020 }, { "epoch": 0.5386958079541383, "grad_norm": 2.0405399799346924, "learning_rate": 5.126401698626502e-06, "loss": 0.8475, "step": 9021 }, { "epoch": 0.538755523707154, "grad_norm": 8.41829776763916, "learning_rate": 5.125738172649459e-06, "loss": 0.861, "step": 9022 }, { "epoch": 0.5388152394601696, "grad_norm": 2.5324511528015137, "learning_rate": 5.125074646672418e-06, "loss": 0.8492, "step": 9023 }, { "epoch": 0.5388749552131853, "grad_norm": 2.3704421520233154, "learning_rate": 5.124411120695376e-06, "loss": 0.8578, "step": 9024 }, { "epoch": 0.5389346709662008, "grad_norm": 1.596846342086792, "learning_rate": 5.123747594718333e-06, "loss": 0.8305, "step": 9025 }, { "epoch": 0.5389943867192165, "grad_norm": 2.039032459259033, "learning_rate": 5.123084068741292e-06, "loss": 0.8559, "step": 9026 }, { "epoch": 0.5390541024722322, "grad_norm": 2.2124969959259033, "learning_rate": 5.1224205427642495e-06, "loss": 0.8633, "step": 9027 }, { "epoch": 0.5391138182252478, "grad_norm": 3.1480603218078613, "learning_rate": 5.121757016787208e-06, "loss": 0.8414, "step": 9028 }, { "epoch": 0.5391735339782635, "grad_norm": 1.8677763938903809, "learning_rate": 5.1210934908101666e-06, "loss": 0.8403, "step": 9029 }, { "epoch": 0.5392332497312791, "grad_norm": 2.3620293140411377, "learning_rate": 5.120429964833124e-06, "loss": 0.8386, "step": 9030 }, { "epoch": 0.5392929654842947, "grad_norm": 2.9733238220214844, "learning_rate": 5.119766438856082e-06, "loss": 0.8224, "step": 9031 }, { "epoch": 0.5393526812373104, "grad_norm": 2.8640241622924805, "learning_rate": 5.119102912879039e-06, "loss": 0.8796, "step": 9032 }, { "epoch": 0.5394123969903261, "grad_norm": 1.8041092157363892, "learning_rate": 5.118439386901998e-06, "loss": 0.8748, "step": 9033 }, { "epoch": 0.5394721127433417, "grad_norm": 2.0371599197387695, "learning_rate": 5.117775860924956e-06, "loss": 0.8481, "step": 9034 }, { "epoch": 0.5395318284963574, "grad_norm": 1.6778632402420044, "learning_rate": 5.1171123349479135e-06, "loss": 0.8634, "step": 9035 }, { "epoch": 0.539591544249373, "grad_norm": 1.8581466674804688, "learning_rate": 5.1164488089708716e-06, "loss": 0.8853, "step": 9036 }, { "epoch": 0.5396512600023886, "grad_norm": 2.1861515045166016, "learning_rate": 5.11578528299383e-06, "loss": 0.8246, "step": 9037 }, { "epoch": 0.5397109757554043, "grad_norm": 2.0689334869384766, "learning_rate": 5.115121757016788e-06, "loss": 0.8748, "step": 9038 }, { "epoch": 0.5397706915084199, "grad_norm": 2.9741320610046387, "learning_rate": 5.114458231039746e-06, "loss": 0.8542, "step": 9039 }, { "epoch": 0.5398304072614356, "grad_norm": 2.5669353008270264, "learning_rate": 5.113794705062703e-06, "loss": 0.8587, "step": 9040 }, { "epoch": 0.5398901230144512, "grad_norm": 2.12876558303833, "learning_rate": 5.113131179085662e-06, "loss": 0.8485, "step": 9041 }, { "epoch": 0.5399498387674668, "grad_norm": 3.3261666297912598, "learning_rate": 5.112467653108619e-06, "loss": 0.8609, "step": 9042 }, { "epoch": 0.5400095545204825, "grad_norm": 2.7585654258728027, "learning_rate": 5.111804127131577e-06, "loss": 0.8633, "step": 9043 }, { "epoch": 0.5400692702734982, "grad_norm": 2.6951773166656494, "learning_rate": 5.111140601154536e-06, "loss": 0.8302, "step": 9044 }, { "epoch": 0.5401289860265138, "grad_norm": 1.9874247312545776, "learning_rate": 5.110477075177494e-06, "loss": 0.833, "step": 9045 }, { "epoch": 0.5401887017795295, "grad_norm": 2.3801393508911133, "learning_rate": 5.109813549200452e-06, "loss": 0.8889, "step": 9046 }, { "epoch": 0.540248417532545, "grad_norm": 3.585944652557373, "learning_rate": 5.109150023223409e-06, "loss": 0.8854, "step": 9047 }, { "epoch": 0.5403081332855607, "grad_norm": 1.956824541091919, "learning_rate": 5.108486497246368e-06, "loss": 0.8323, "step": 9048 }, { "epoch": 0.5403678490385764, "grad_norm": 1.8513871431350708, "learning_rate": 5.107822971269326e-06, "loss": 0.8431, "step": 9049 }, { "epoch": 0.540427564791592, "grad_norm": 2.304840326309204, "learning_rate": 5.107159445292283e-06, "loss": 0.8638, "step": 9050 }, { "epoch": 0.5404872805446077, "grad_norm": 1.9607746601104736, "learning_rate": 5.106495919315242e-06, "loss": 0.8674, "step": 9051 }, { "epoch": 0.5405469962976233, "grad_norm": 1.9171063899993896, "learning_rate": 5.1058323933381995e-06, "loss": 0.8492, "step": 9052 }, { "epoch": 0.5406067120506389, "grad_norm": 1.8109965324401855, "learning_rate": 5.105168867361158e-06, "loss": 0.8483, "step": 9053 }, { "epoch": 0.5406664278036546, "grad_norm": 1.7793117761611938, "learning_rate": 5.1045053413841165e-06, "loss": 0.832, "step": 9054 }, { "epoch": 0.5407261435566703, "grad_norm": 2.386157751083374, "learning_rate": 5.103841815407074e-06, "loss": 0.8592, "step": 9055 }, { "epoch": 0.5407858593096859, "grad_norm": 2.1501314640045166, "learning_rate": 5.103178289430032e-06, "loss": 0.8559, "step": 9056 }, { "epoch": 0.5408455750627016, "grad_norm": 2.820357084274292, "learning_rate": 5.102514763452989e-06, "loss": 0.8864, "step": 9057 }, { "epoch": 0.5409052908157171, "grad_norm": 1.8023236989974976, "learning_rate": 5.101851237475948e-06, "loss": 0.838, "step": 9058 }, { "epoch": 0.5409650065687328, "grad_norm": 3.6648364067077637, "learning_rate": 5.101187711498906e-06, "loss": 0.8463, "step": 9059 }, { "epoch": 0.5410247223217485, "grad_norm": 1.8874462842941284, "learning_rate": 5.1005241855218634e-06, "loss": 0.8371, "step": 9060 }, { "epoch": 0.5410844380747641, "grad_norm": 2.3162271976470947, "learning_rate": 5.0998606595448215e-06, "loss": 0.8545, "step": 9061 }, { "epoch": 0.5411441538277798, "grad_norm": 1.6710522174835205, "learning_rate": 5.09919713356778e-06, "loss": 0.8651, "step": 9062 }, { "epoch": 0.5412038695807954, "grad_norm": 1.8446052074432373, "learning_rate": 5.098533607590738e-06, "loss": 0.8506, "step": 9063 }, { "epoch": 0.541263585333811, "grad_norm": 2.406444787979126, "learning_rate": 5.097870081613696e-06, "loss": 0.9255, "step": 9064 }, { "epoch": 0.5413233010868267, "grad_norm": 1.939086675643921, "learning_rate": 5.097206555636653e-06, "loss": 0.8341, "step": 9065 }, { "epoch": 0.5413830168398424, "grad_norm": 2.6088743209838867, "learning_rate": 5.096543029659612e-06, "loss": 0.8498, "step": 9066 }, { "epoch": 0.541442732592858, "grad_norm": 1.69175124168396, "learning_rate": 5.095879503682569e-06, "loss": 0.8267, "step": 9067 }, { "epoch": 0.5415024483458737, "grad_norm": 1.8722846508026123, "learning_rate": 5.095215977705527e-06, "loss": 0.8663, "step": 9068 }, { "epoch": 0.5415621640988892, "grad_norm": 2.9988231658935547, "learning_rate": 5.094552451728486e-06, "loss": 0.8379, "step": 9069 }, { "epoch": 0.5416218798519049, "grad_norm": 2.6581618785858154, "learning_rate": 5.093888925751444e-06, "loss": 0.8569, "step": 9070 }, { "epoch": 0.5416815956049206, "grad_norm": 1.6512131690979004, "learning_rate": 5.093225399774402e-06, "loss": 0.8332, "step": 9071 }, { "epoch": 0.5417413113579362, "grad_norm": 1.9413535594940186, "learning_rate": 5.092561873797359e-06, "loss": 0.8605, "step": 9072 }, { "epoch": 0.5418010271109519, "grad_norm": 2.2476511001586914, "learning_rate": 5.091898347820318e-06, "loss": 0.8691, "step": 9073 }, { "epoch": 0.5418607428639675, "grad_norm": 3.8844947814941406, "learning_rate": 5.091234821843276e-06, "loss": 0.8545, "step": 9074 }, { "epoch": 0.5419204586169831, "grad_norm": 2.628843307495117, "learning_rate": 5.090571295866233e-06, "loss": 0.8332, "step": 9075 }, { "epoch": 0.5419801743699988, "grad_norm": 2.248622179031372, "learning_rate": 5.089907769889192e-06, "loss": 0.8866, "step": 9076 }, { "epoch": 0.5420398901230145, "grad_norm": 3.7949562072753906, "learning_rate": 5.0892442439121494e-06, "loss": 0.8838, "step": 9077 }, { "epoch": 0.5420996058760301, "grad_norm": 1.6637645959854126, "learning_rate": 5.0885807179351075e-06, "loss": 0.8452, "step": 9078 }, { "epoch": 0.5421593216290458, "grad_norm": 5.923891544342041, "learning_rate": 5.0879171919580665e-06, "loss": 0.8474, "step": 9079 }, { "epoch": 0.5422190373820613, "grad_norm": 1.9129316806793213, "learning_rate": 5.087253665981024e-06, "loss": 0.884, "step": 9080 }, { "epoch": 0.542278753135077, "grad_norm": 8.300219535827637, "learning_rate": 5.086590140003982e-06, "loss": 0.8812, "step": 9081 }, { "epoch": 0.5423384688880927, "grad_norm": 2.470531463623047, "learning_rate": 5.085926614026939e-06, "loss": 0.8192, "step": 9082 }, { "epoch": 0.5423981846411083, "grad_norm": 3.393718957901001, "learning_rate": 5.085263088049898e-06, "loss": 0.8775, "step": 9083 }, { "epoch": 0.542457900394124, "grad_norm": 1.85191810131073, "learning_rate": 5.084599562072856e-06, "loss": 0.8539, "step": 9084 }, { "epoch": 0.5425176161471397, "grad_norm": 1.907235026359558, "learning_rate": 5.083936036095813e-06, "loss": 0.8509, "step": 9085 }, { "epoch": 0.5425773319001552, "grad_norm": 3.464164972305298, "learning_rate": 5.0832725101187715e-06, "loss": 0.8501, "step": 9086 }, { "epoch": 0.5426370476531709, "grad_norm": 2.2529456615448, "learning_rate": 5.08260898414173e-06, "loss": 0.8732, "step": 9087 }, { "epoch": 0.5426967634061866, "grad_norm": 3.2432217597961426, "learning_rate": 5.081945458164688e-06, "loss": 0.8677, "step": 9088 }, { "epoch": 0.5427564791592022, "grad_norm": 2.037837028503418, "learning_rate": 5.081281932187646e-06, "loss": 0.8388, "step": 9089 }, { "epoch": 0.5428161949122179, "grad_norm": 1.5710902214050293, "learning_rate": 5.080618406210603e-06, "loss": 0.8796, "step": 9090 }, { "epoch": 0.5428759106652334, "grad_norm": 3.283900260925293, "learning_rate": 5.079954880233562e-06, "loss": 0.8317, "step": 9091 }, { "epoch": 0.5429356264182491, "grad_norm": 2.9678597450256348, "learning_rate": 5.079291354256519e-06, "loss": 0.8885, "step": 9092 }, { "epoch": 0.5429953421712648, "grad_norm": 4.360109806060791, "learning_rate": 5.078627828279477e-06, "loss": 0.8575, "step": 9093 }, { "epoch": 0.5430550579242804, "grad_norm": 2.939852714538574, "learning_rate": 5.077964302302436e-06, "loss": 0.8315, "step": 9094 }, { "epoch": 0.5431147736772961, "grad_norm": 2.393043279647827, "learning_rate": 5.0773007763253936e-06, "loss": 0.8551, "step": 9095 }, { "epoch": 0.5431744894303118, "grad_norm": 2.0515494346618652, "learning_rate": 5.076637250348352e-06, "loss": 0.8406, "step": 9096 }, { "epoch": 0.5432342051833273, "grad_norm": 2.6976664066314697, "learning_rate": 5.075973724371309e-06, "loss": 0.8212, "step": 9097 }, { "epoch": 0.543293920936343, "grad_norm": 1.694870948791504, "learning_rate": 5.075310198394268e-06, "loss": 0.8472, "step": 9098 }, { "epoch": 0.5433536366893587, "grad_norm": 1.9288737773895264, "learning_rate": 5.074646672417226e-06, "loss": 0.8265, "step": 9099 }, { "epoch": 0.5434133524423743, "grad_norm": 1.5876356363296509, "learning_rate": 5.073983146440183e-06, "loss": 0.834, "step": 9100 }, { "epoch": 0.54347306819539, "grad_norm": 2.0868568420410156, "learning_rate": 5.073319620463142e-06, "loss": 0.8712, "step": 9101 }, { "epoch": 0.5435327839484055, "grad_norm": 2.0376882553100586, "learning_rate": 5.072656094486099e-06, "loss": 0.8449, "step": 9102 }, { "epoch": 0.5435924997014212, "grad_norm": 2.020177125930786, "learning_rate": 5.0719925685090575e-06, "loss": 0.7949, "step": 9103 }, { "epoch": 0.5436522154544369, "grad_norm": 1.6661659479141235, "learning_rate": 5.0713290425320165e-06, "loss": 0.8339, "step": 9104 }, { "epoch": 0.5437119312074525, "grad_norm": 11.062813758850098, "learning_rate": 5.070665516554974e-06, "loss": 0.8371, "step": 9105 }, { "epoch": 0.5437716469604682, "grad_norm": 3.199483633041382, "learning_rate": 5.070001990577932e-06, "loss": 0.835, "step": 9106 }, { "epoch": 0.5438313627134839, "grad_norm": 1.6000912189483643, "learning_rate": 5.069338464600889e-06, "loss": 0.8309, "step": 9107 }, { "epoch": 0.5438910784664994, "grad_norm": 1.8744314908981323, "learning_rate": 5.068674938623848e-06, "loss": 0.8475, "step": 9108 }, { "epoch": 0.5439507942195151, "grad_norm": 1.8587788343429565, "learning_rate": 5.068011412646806e-06, "loss": 0.8784, "step": 9109 }, { "epoch": 0.5440105099725308, "grad_norm": 1.88383948802948, "learning_rate": 5.067347886669763e-06, "loss": 0.8721, "step": 9110 }, { "epoch": 0.5440702257255464, "grad_norm": 2.946901321411133, "learning_rate": 5.0666843606927215e-06, "loss": 0.8544, "step": 9111 }, { "epoch": 0.5441299414785621, "grad_norm": 1.983384609222412, "learning_rate": 5.0660208347156796e-06, "loss": 0.8869, "step": 9112 }, { "epoch": 0.5441896572315776, "grad_norm": 1.788668155670166, "learning_rate": 5.065357308738638e-06, "loss": 0.8886, "step": 9113 }, { "epoch": 0.5442493729845933, "grad_norm": 2.5287394523620605, "learning_rate": 5.064693782761596e-06, "loss": 0.8655, "step": 9114 }, { "epoch": 0.544309088737609, "grad_norm": 2.177898406982422, "learning_rate": 5.064030256784553e-06, "loss": 0.842, "step": 9115 }, { "epoch": 0.5443688044906246, "grad_norm": 1.9314810037612915, "learning_rate": 5.063366730807512e-06, "loss": 0.868, "step": 9116 }, { "epoch": 0.5444285202436403, "grad_norm": 1.6149572134017944, "learning_rate": 5.062703204830469e-06, "loss": 0.8258, "step": 9117 }, { "epoch": 0.544488235996656, "grad_norm": 2.7211720943450928, "learning_rate": 5.062039678853427e-06, "loss": 0.8508, "step": 9118 }, { "epoch": 0.5445479517496715, "grad_norm": 2.8222432136535645, "learning_rate": 5.061376152876386e-06, "loss": 0.8372, "step": 9119 }, { "epoch": 0.5446076675026872, "grad_norm": 3.2459702491760254, "learning_rate": 5.0607126268993435e-06, "loss": 0.8737, "step": 9120 }, { "epoch": 0.5446673832557029, "grad_norm": 2.406978130340576, "learning_rate": 5.060049100922302e-06, "loss": 0.8737, "step": 9121 }, { "epoch": 0.5447270990087185, "grad_norm": 1.5762561559677124, "learning_rate": 5.059385574945259e-06, "loss": 0.7996, "step": 9122 }, { "epoch": 0.5447868147617342, "grad_norm": 2.865124464035034, "learning_rate": 5.058722048968218e-06, "loss": 0.8733, "step": 9123 }, { "epoch": 0.5448465305147497, "grad_norm": 1.6567537784576416, "learning_rate": 5.058058522991176e-06, "loss": 0.8283, "step": 9124 }, { "epoch": 0.5449062462677654, "grad_norm": 1.7503560781478882, "learning_rate": 5.057394997014133e-06, "loss": 0.8869, "step": 9125 }, { "epoch": 0.5449659620207811, "grad_norm": 2.2208993434906006, "learning_rate": 5.056731471037092e-06, "loss": 0.8728, "step": 9126 }, { "epoch": 0.5450256777737967, "grad_norm": 1.6727180480957031, "learning_rate": 5.056067945060049e-06, "loss": 0.8252, "step": 9127 }, { "epoch": 0.5450853935268124, "grad_norm": 1.958367109298706, "learning_rate": 5.0554044190830075e-06, "loss": 0.8979, "step": 9128 }, { "epoch": 0.5451451092798281, "grad_norm": 1.9297900199890137, "learning_rate": 5.0547408931059664e-06, "loss": 0.8285, "step": 9129 }, { "epoch": 0.5452048250328436, "grad_norm": 2.518754482269287, "learning_rate": 5.054077367128924e-06, "loss": 0.8331, "step": 9130 }, { "epoch": 0.5452645407858593, "grad_norm": 1.8835210800170898, "learning_rate": 5.053413841151882e-06, "loss": 0.8617, "step": 9131 }, { "epoch": 0.545324256538875, "grad_norm": 1.7224100828170776, "learning_rate": 5.052750315174839e-06, "loss": 0.8097, "step": 9132 }, { "epoch": 0.5453839722918906, "grad_norm": 2.4869937896728516, "learning_rate": 5.052086789197798e-06, "loss": 0.8744, "step": 9133 }, { "epoch": 0.5454436880449063, "grad_norm": 2.042559862136841, "learning_rate": 5.051423263220756e-06, "loss": 0.865, "step": 9134 }, { "epoch": 0.5455034037979218, "grad_norm": 2.0974056720733643, "learning_rate": 5.050759737243713e-06, "loss": 0.8709, "step": 9135 }, { "epoch": 0.5455631195509375, "grad_norm": 6.778656959533691, "learning_rate": 5.0500962112666714e-06, "loss": 0.8734, "step": 9136 }, { "epoch": 0.5456228353039532, "grad_norm": 2.1490590572357178, "learning_rate": 5.0494326852896295e-06, "loss": 0.8863, "step": 9137 }, { "epoch": 0.5456825510569688, "grad_norm": 3.323646068572998, "learning_rate": 5.048769159312588e-06, "loss": 0.8233, "step": 9138 }, { "epoch": 0.5457422668099845, "grad_norm": 2.7289037704467773, "learning_rate": 5.048105633335546e-06, "loss": 0.8237, "step": 9139 }, { "epoch": 0.5458019825630002, "grad_norm": 1.934188961982727, "learning_rate": 5.047442107358503e-06, "loss": 0.836, "step": 9140 }, { "epoch": 0.5458616983160157, "grad_norm": 3.8129231929779053, "learning_rate": 5.046778581381462e-06, "loss": 0.8551, "step": 9141 }, { "epoch": 0.5459214140690314, "grad_norm": 2.2989749908447266, "learning_rate": 5.046115055404419e-06, "loss": 0.838, "step": 9142 }, { "epoch": 0.545981129822047, "grad_norm": 2.3706371784210205, "learning_rate": 5.045451529427377e-06, "loss": 0.8754, "step": 9143 }, { "epoch": 0.5460408455750627, "grad_norm": 2.3208909034729004, "learning_rate": 5.044788003450336e-06, "loss": 0.8335, "step": 9144 }, { "epoch": 0.5461005613280784, "grad_norm": 2.2405362129211426, "learning_rate": 5.0441244774732935e-06, "loss": 0.8621, "step": 9145 }, { "epoch": 0.546160277081094, "grad_norm": 4.343633651733398, "learning_rate": 5.043460951496252e-06, "loss": 0.866, "step": 9146 }, { "epoch": 0.5462199928341096, "grad_norm": 2.5910582542419434, "learning_rate": 5.042797425519209e-06, "loss": 0.9057, "step": 9147 }, { "epoch": 0.5462797085871253, "grad_norm": 1.7920185327529907, "learning_rate": 5.042133899542168e-06, "loss": 0.846, "step": 9148 }, { "epoch": 0.5463394243401409, "grad_norm": 1.558172583580017, "learning_rate": 5.041470373565126e-06, "loss": 0.8141, "step": 9149 }, { "epoch": 0.5463991400931566, "grad_norm": 3.370205879211426, "learning_rate": 5.040806847588083e-06, "loss": 0.8728, "step": 9150 }, { "epoch": 0.5464588558461723, "grad_norm": 1.9105674028396606, "learning_rate": 5.040143321611042e-06, "loss": 0.871, "step": 9151 }, { "epoch": 0.5465185715991878, "grad_norm": 2.0977165699005127, "learning_rate": 5.039479795633999e-06, "loss": 0.8653, "step": 9152 }, { "epoch": 0.5465782873522035, "grad_norm": 2.081024646759033, "learning_rate": 5.0388162696569575e-06, "loss": 0.8397, "step": 9153 }, { "epoch": 0.5466380031052192, "grad_norm": 1.860177993774414, "learning_rate": 5.038152743679916e-06, "loss": 0.831, "step": 9154 }, { "epoch": 0.5466977188582348, "grad_norm": 1.9020673036575317, "learning_rate": 5.037489217702874e-06, "loss": 0.8247, "step": 9155 }, { "epoch": 0.5467574346112505, "grad_norm": 2.0132060050964355, "learning_rate": 5.036825691725832e-06, "loss": 0.8914, "step": 9156 }, { "epoch": 0.5468171503642661, "grad_norm": 6.424712181091309, "learning_rate": 5.036162165748789e-06, "loss": 0.832, "step": 9157 }, { "epoch": 0.5468768661172817, "grad_norm": 1.9815990924835205, "learning_rate": 5.035498639771748e-06, "loss": 0.86, "step": 9158 }, { "epoch": 0.5469365818702974, "grad_norm": 1.5830296277999878, "learning_rate": 5.034835113794706e-06, "loss": 0.8916, "step": 9159 }, { "epoch": 0.546996297623313, "grad_norm": 1.764244794845581, "learning_rate": 5.034171587817663e-06, "loss": 0.8709, "step": 9160 }, { "epoch": 0.5470560133763287, "grad_norm": 2.2052736282348633, "learning_rate": 5.033508061840621e-06, "loss": 0.8508, "step": 9161 }, { "epoch": 0.5471157291293444, "grad_norm": 1.9174340963363647, "learning_rate": 5.0328445358635795e-06, "loss": 0.8546, "step": 9162 }, { "epoch": 0.5471754448823599, "grad_norm": 2.069115161895752, "learning_rate": 5.032181009886538e-06, "loss": 0.863, "step": 9163 }, { "epoch": 0.5472351606353756, "grad_norm": 3.3676280975341797, "learning_rate": 5.031517483909496e-06, "loss": 0.8385, "step": 9164 }, { "epoch": 0.5472948763883912, "grad_norm": 2.042145252227783, "learning_rate": 5.030853957932453e-06, "loss": 0.8565, "step": 9165 }, { "epoch": 0.5473545921414069, "grad_norm": 2.34940767288208, "learning_rate": 5.030190431955412e-06, "loss": 0.8389, "step": 9166 }, { "epoch": 0.5474143078944226, "grad_norm": 1.7825961112976074, "learning_rate": 5.029526905978369e-06, "loss": 0.8602, "step": 9167 }, { "epoch": 0.5474740236474382, "grad_norm": 1.6449534893035889, "learning_rate": 5.028863380001327e-06, "loss": 0.8369, "step": 9168 }, { "epoch": 0.5475337394004538, "grad_norm": 2.121617078781128, "learning_rate": 5.028199854024286e-06, "loss": 0.8517, "step": 9169 }, { "epoch": 0.5475934551534695, "grad_norm": 3.1686604022979736, "learning_rate": 5.0275363280472435e-06, "loss": 0.8404, "step": 9170 }, { "epoch": 0.5476531709064851, "grad_norm": 2.2375638484954834, "learning_rate": 5.0268728020702016e-06, "loss": 0.8863, "step": 9171 }, { "epoch": 0.5477128866595008, "grad_norm": 3.0867714881896973, "learning_rate": 5.026209276093159e-06, "loss": 0.8606, "step": 9172 }, { "epoch": 0.5477726024125165, "grad_norm": 2.361952066421509, "learning_rate": 5.025545750116118e-06, "loss": 0.8545, "step": 9173 }, { "epoch": 0.547832318165532, "grad_norm": 2.2840726375579834, "learning_rate": 5.024882224139076e-06, "loss": 0.861, "step": 9174 }, { "epoch": 0.5478920339185477, "grad_norm": 3.2455365657806396, "learning_rate": 5.024218698162033e-06, "loss": 0.8471, "step": 9175 }, { "epoch": 0.5479517496715633, "grad_norm": 3.782978057861328, "learning_rate": 5.023555172184992e-06, "loss": 0.8509, "step": 9176 }, { "epoch": 0.548011465424579, "grad_norm": 2.5240464210510254, "learning_rate": 5.022891646207949e-06, "loss": 0.826, "step": 9177 }, { "epoch": 0.5480711811775947, "grad_norm": 1.9609155654907227, "learning_rate": 5.0222281202309074e-06, "loss": 0.874, "step": 9178 }, { "epoch": 0.5481308969306103, "grad_norm": 1.826284646987915, "learning_rate": 5.021564594253866e-06, "loss": 0.8536, "step": 9179 }, { "epoch": 0.5481906126836259, "grad_norm": 1.8462769985198975, "learning_rate": 5.020901068276824e-06, "loss": 0.8593, "step": 9180 }, { "epoch": 0.5482503284366416, "grad_norm": 2.8548998832702637, "learning_rate": 5.020237542299782e-06, "loss": 0.8306, "step": 9181 }, { "epoch": 0.5483100441896572, "grad_norm": 2.1558570861816406, "learning_rate": 5.019574016322739e-06, "loss": 0.8572, "step": 9182 }, { "epoch": 0.5483697599426729, "grad_norm": 2.2585701942443848, "learning_rate": 5.018910490345698e-06, "loss": 0.9, "step": 9183 }, { "epoch": 0.5484294756956886, "grad_norm": 2.157853126525879, "learning_rate": 5.018246964368656e-06, "loss": 0.8081, "step": 9184 }, { "epoch": 0.5484891914487041, "grad_norm": 1.9126566648483276, "learning_rate": 5.017583438391613e-06, "loss": 0.8704, "step": 9185 }, { "epoch": 0.5485489072017198, "grad_norm": 1.5941429138183594, "learning_rate": 5.016919912414571e-06, "loss": 0.8784, "step": 9186 }, { "epoch": 0.5486086229547354, "grad_norm": 2.25341796875, "learning_rate": 5.0162563864375295e-06, "loss": 0.8414, "step": 9187 }, { "epoch": 0.5486683387077511, "grad_norm": 3.900702953338623, "learning_rate": 5.015592860460488e-06, "loss": 0.8527, "step": 9188 }, { "epoch": 0.5487280544607668, "grad_norm": 2.0338635444641113, "learning_rate": 5.014929334483446e-06, "loss": 0.8642, "step": 9189 }, { "epoch": 0.5487877702137824, "grad_norm": 2.1041572093963623, "learning_rate": 5.014265808506403e-06, "loss": 0.8969, "step": 9190 }, { "epoch": 0.548847485966798, "grad_norm": 2.868593454360962, "learning_rate": 5.013602282529362e-06, "loss": 0.8779, "step": 9191 }, { "epoch": 0.5489072017198137, "grad_norm": 2.3550736904144287, "learning_rate": 5.012938756552319e-06, "loss": 0.87, "step": 9192 }, { "epoch": 0.5489669174728293, "grad_norm": 2.826714277267456, "learning_rate": 5.012275230575277e-06, "loss": 0.8533, "step": 9193 }, { "epoch": 0.549026633225845, "grad_norm": 2.287710189819336, "learning_rate": 5.011611704598236e-06, "loss": 0.8711, "step": 9194 }, { "epoch": 0.5490863489788607, "grad_norm": 1.9743244647979736, "learning_rate": 5.0109481786211934e-06, "loss": 0.8488, "step": 9195 }, { "epoch": 0.5491460647318762, "grad_norm": 4.435773849487305, "learning_rate": 5.0102846526441515e-06, "loss": 0.8744, "step": 9196 }, { "epoch": 0.5492057804848919, "grad_norm": 4.121305465698242, "learning_rate": 5.009621126667109e-06, "loss": 0.8715, "step": 9197 }, { "epoch": 0.5492654962379075, "grad_norm": 4.275781631469727, "learning_rate": 5.008957600690068e-06, "loss": 0.8616, "step": 9198 }, { "epoch": 0.5493252119909232, "grad_norm": 2.111546277999878, "learning_rate": 5.008294074713026e-06, "loss": 0.8468, "step": 9199 }, { "epoch": 0.5493849277439389, "grad_norm": 2.367408514022827, "learning_rate": 5.007630548735983e-06, "loss": 0.8778, "step": 9200 }, { "epoch": 0.5493849277439389, "eval_text_loss": 0.9101853370666504, "eval_text_runtime": 15.1989, "eval_text_samples_per_second": 263.177, "eval_text_steps_per_second": 0.526, "step": 9200 }, { "epoch": 0.5493849277439389, "eval_image_loss": 0.6232504844665527, "eval_image_runtime": 5.007, "eval_image_samples_per_second": 798.875, "eval_image_steps_per_second": 1.598, "step": 9200 }, { "epoch": 0.5493849277439389, "eval_video_loss": 1.0632827281951904, "eval_video_runtime": 77.0331, "eval_video_samples_per_second": 51.926, "eval_video_steps_per_second": 0.104, "step": 9200 }, { "epoch": 0.5494446434969545, "grad_norm": 4.297647953033447, "learning_rate": 5.006967022758942e-06, "loss": 0.853, "step": 9201 }, { "epoch": 0.5495043592499701, "grad_norm": 1.81730055809021, "learning_rate": 5.006303496781899e-06, "loss": 0.852, "step": 9202 }, { "epoch": 0.5495640750029858, "grad_norm": 3.0133047103881836, "learning_rate": 5.005639970804857e-06, "loss": 0.8133, "step": 9203 }, { "epoch": 0.5496237907560014, "grad_norm": 2.9118764400482178, "learning_rate": 5.004976444827816e-06, "loss": 0.8277, "step": 9204 }, { "epoch": 0.5496835065090171, "grad_norm": 2.751751184463501, "learning_rate": 5.004312918850774e-06, "loss": 0.8724, "step": 9205 }, { "epoch": 0.5497432222620328, "grad_norm": 2.8482232093811035, "learning_rate": 5.003649392873732e-06, "loss": 0.8683, "step": 9206 }, { "epoch": 0.5498029380150483, "grad_norm": 1.8905199766159058, "learning_rate": 5.002985866896689e-06, "loss": 0.8126, "step": 9207 }, { "epoch": 0.549862653768064, "grad_norm": 2.5888359546661377, "learning_rate": 5.002322340919648e-06, "loss": 0.8354, "step": 9208 }, { "epoch": 0.5499223695210796, "grad_norm": 2.3697853088378906, "learning_rate": 5.001658814942606e-06, "loss": 0.8233, "step": 9209 }, { "epoch": 0.5499820852740953, "grad_norm": 1.8667333126068115, "learning_rate": 5.000995288965563e-06, "loss": 0.8431, "step": 9210 }, { "epoch": 0.550041801027111, "grad_norm": 1.73068106174469, "learning_rate": 5.000331762988521e-06, "loss": 0.8188, "step": 9211 }, { "epoch": 0.5501015167801266, "grad_norm": 1.8632351160049438, "learning_rate": 4.9996682370114795e-06, "loss": 0.8442, "step": 9212 }, { "epoch": 0.5501612325331422, "grad_norm": 2.502072334289551, "learning_rate": 4.9990047110344376e-06, "loss": 0.8333, "step": 9213 }, { "epoch": 0.5502209482861579, "grad_norm": 3.673621654510498, "learning_rate": 4.998341185057396e-06, "loss": 0.8516, "step": 9214 }, { "epoch": 0.5502806640391735, "grad_norm": 1.8922284841537476, "learning_rate": 4.997677659080353e-06, "loss": 0.8689, "step": 9215 }, { "epoch": 0.5503403797921892, "grad_norm": 2.4315671920776367, "learning_rate": 4.997014133103311e-06, "loss": 0.8632, "step": 9216 }, { "epoch": 0.5504000955452049, "grad_norm": 2.346613645553589, "learning_rate": 4.99635060712627e-06, "loss": 0.7927, "step": 9217 }, { "epoch": 0.5504598112982205, "grad_norm": 2.302711248397827, "learning_rate": 4.995687081149227e-06, "loss": 0.8535, "step": 9218 }, { "epoch": 0.5505195270512361, "grad_norm": 3.0445902347564697, "learning_rate": 4.995023555172185e-06, "loss": 0.882, "step": 9219 }, { "epoch": 0.5505792428042517, "grad_norm": 2.244356393814087, "learning_rate": 4.994360029195143e-06, "loss": 0.8716, "step": 9220 }, { "epoch": 0.5506389585572674, "grad_norm": 2.2148780822753906, "learning_rate": 4.9936965032181015e-06, "loss": 0.8123, "step": 9221 }, { "epoch": 0.5506986743102831, "grad_norm": 1.9006965160369873, "learning_rate": 4.99303297724106e-06, "loss": 0.8432, "step": 9222 }, { "epoch": 0.5507583900632987, "grad_norm": 1.4122978448867798, "learning_rate": 4.992369451264018e-06, "loss": 0.8811, "step": 9223 }, { "epoch": 0.5508181058163143, "grad_norm": 2.128605365753174, "learning_rate": 4.991705925286976e-06, "loss": 0.8676, "step": 9224 }, { "epoch": 0.55087782156933, "grad_norm": 1.9248768091201782, "learning_rate": 4.991042399309933e-06, "loss": 0.8599, "step": 9225 }, { "epoch": 0.5509375373223456, "grad_norm": 1.7133408784866333, "learning_rate": 4.990378873332891e-06, "loss": 0.848, "step": 9226 }, { "epoch": 0.5509972530753613, "grad_norm": 2.4125759601593018, "learning_rate": 4.989715347355849e-06, "loss": 0.8196, "step": 9227 }, { "epoch": 0.551056968828377, "grad_norm": 1.951097011566162, "learning_rate": 4.989051821378807e-06, "loss": 0.846, "step": 9228 }, { "epoch": 0.5511166845813926, "grad_norm": 1.7069165706634521, "learning_rate": 4.9883882954017655e-06, "loss": 0.8869, "step": 9229 }, { "epoch": 0.5511764003344082, "grad_norm": 4.581113815307617, "learning_rate": 4.9877247694247236e-06, "loss": 0.8247, "step": 9230 }, { "epoch": 0.5512361160874238, "grad_norm": 2.0570714473724365, "learning_rate": 4.987061243447681e-06, "loss": 0.8349, "step": 9231 }, { "epoch": 0.5512958318404395, "grad_norm": 2.066584348678589, "learning_rate": 4.98639771747064e-06, "loss": 0.8889, "step": 9232 }, { "epoch": 0.5513555475934552, "grad_norm": 2.367664337158203, "learning_rate": 4.985734191493598e-06, "loss": 0.8406, "step": 9233 }, { "epoch": 0.5514152633464708, "grad_norm": 1.9276968240737915, "learning_rate": 4.985070665516555e-06, "loss": 0.8666, "step": 9234 }, { "epoch": 0.5514749790994864, "grad_norm": 4.948637962341309, "learning_rate": 4.984407139539513e-06, "loss": 0.8353, "step": 9235 }, { "epoch": 0.5515346948525021, "grad_norm": 2.5001020431518555, "learning_rate": 4.983743613562471e-06, "loss": 0.8542, "step": 9236 }, { "epoch": 0.5515944106055177, "grad_norm": 1.968751311302185, "learning_rate": 4.9830800875854294e-06, "loss": 0.8507, "step": 9237 }, { "epoch": 0.5516541263585334, "grad_norm": 1.7000656127929688, "learning_rate": 4.9824165616083875e-06, "loss": 0.8543, "step": 9238 }, { "epoch": 0.5517138421115491, "grad_norm": 1.9855936765670776, "learning_rate": 4.981753035631346e-06, "loss": 0.866, "step": 9239 }, { "epoch": 0.5517735578645647, "grad_norm": 2.5948009490966797, "learning_rate": 4.981089509654303e-06, "loss": 0.8311, "step": 9240 }, { "epoch": 0.5518332736175803, "grad_norm": 2.2497637271881104, "learning_rate": 4.980425983677261e-06, "loss": 0.8793, "step": 9241 }, { "epoch": 0.5518929893705959, "grad_norm": 1.965531349182129, "learning_rate": 4.97976245770022e-06, "loss": 0.8772, "step": 9242 }, { "epoch": 0.5519527051236116, "grad_norm": 2.581502676010132, "learning_rate": 4.979098931723177e-06, "loss": 0.8496, "step": 9243 }, { "epoch": 0.5520124208766273, "grad_norm": 1.7678990364074707, "learning_rate": 4.978435405746135e-06, "loss": 0.8722, "step": 9244 }, { "epoch": 0.5520721366296429, "grad_norm": 2.6116926670074463, "learning_rate": 4.977771879769093e-06, "loss": 0.8463, "step": 9245 }, { "epoch": 0.5521318523826585, "grad_norm": 2.224931001663208, "learning_rate": 4.9771083537920515e-06, "loss": 0.8905, "step": 9246 }, { "epoch": 0.5521915681356742, "grad_norm": 3.3558785915374756, "learning_rate": 4.97644482781501e-06, "loss": 0.8911, "step": 9247 }, { "epoch": 0.5522512838886898, "grad_norm": 7.4635910987854, "learning_rate": 4.975781301837968e-06, "loss": 0.8836, "step": 9248 }, { "epoch": 0.5523109996417055, "grad_norm": 1.6169614791870117, "learning_rate": 4.975117775860926e-06, "loss": 0.8423, "step": 9249 }, { "epoch": 0.5523707153947212, "grad_norm": 1.8853155374526978, "learning_rate": 4.974454249883883e-06, "loss": 0.8411, "step": 9250 }, { "epoch": 0.5524304311477368, "grad_norm": 1.7219122648239136, "learning_rate": 4.973790723906841e-06, "loss": 0.8275, "step": 9251 }, { "epoch": 0.5524901469007524, "grad_norm": 2.1283185482025146, "learning_rate": 4.973127197929799e-06, "loss": 0.8633, "step": 9252 }, { "epoch": 0.552549862653768, "grad_norm": 1.9649304151535034, "learning_rate": 4.972463671952757e-06, "loss": 0.8456, "step": 9253 }, { "epoch": 0.5526095784067837, "grad_norm": 5.841310501098633, "learning_rate": 4.9718001459757154e-06, "loss": 0.8476, "step": 9254 }, { "epoch": 0.5526692941597994, "grad_norm": 3.637848377227783, "learning_rate": 4.9711366199986735e-06, "loss": 0.8397, "step": 9255 }, { "epoch": 0.552729009912815, "grad_norm": 1.7189624309539795, "learning_rate": 4.970473094021631e-06, "loss": 0.8403, "step": 9256 }, { "epoch": 0.5527887256658306, "grad_norm": 2.606192111968994, "learning_rate": 4.96980956804459e-06, "loss": 0.8498, "step": 9257 }, { "epoch": 0.5528484414188463, "grad_norm": 2.5634994506835938, "learning_rate": 4.969146042067548e-06, "loss": 0.8583, "step": 9258 }, { "epoch": 0.5529081571718619, "grad_norm": 1.6385297775268555, "learning_rate": 4.968482516090505e-06, "loss": 0.8576, "step": 9259 }, { "epoch": 0.5529678729248776, "grad_norm": 2.202479600906372, "learning_rate": 4.967818990113463e-06, "loss": 0.8692, "step": 9260 }, { "epoch": 0.5530275886778933, "grad_norm": 2.5503387451171875, "learning_rate": 4.967155464136421e-06, "loss": 0.8452, "step": 9261 }, { "epoch": 0.5530873044309089, "grad_norm": 2.3299152851104736, "learning_rate": 4.966491938159379e-06, "loss": 0.9056, "step": 9262 }, { "epoch": 0.5531470201839245, "grad_norm": 1.602089285850525, "learning_rate": 4.9658284121823375e-06, "loss": 0.8535, "step": 9263 }, { "epoch": 0.5532067359369401, "grad_norm": 2.8841185569763184, "learning_rate": 4.965164886205296e-06, "loss": 0.8695, "step": 9264 }, { "epoch": 0.5532664516899558, "grad_norm": 1.9664514064788818, "learning_rate": 4.964501360228253e-06, "loss": 0.8794, "step": 9265 }, { "epoch": 0.5533261674429715, "grad_norm": 6.762874603271484, "learning_rate": 4.963837834251211e-06, "loss": 0.8677, "step": 9266 }, { "epoch": 0.5533858831959871, "grad_norm": 2.2536604404449463, "learning_rate": 4.96317430827417e-06, "loss": 0.8425, "step": 9267 }, { "epoch": 0.5534455989490027, "grad_norm": 1.7865486145019531, "learning_rate": 4.962510782297127e-06, "loss": 0.8241, "step": 9268 }, { "epoch": 0.5535053147020184, "grad_norm": 1.8383883237838745, "learning_rate": 4.961847256320085e-06, "loss": 0.8243, "step": 9269 }, { "epoch": 0.553565030455034, "grad_norm": 2.4229087829589844, "learning_rate": 4.961183730343043e-06, "loss": 0.8076, "step": 9270 }, { "epoch": 0.5536247462080497, "grad_norm": 1.5972352027893066, "learning_rate": 4.9605202043660015e-06, "loss": 0.8507, "step": 9271 }, { "epoch": 0.5536844619610654, "grad_norm": 3.6834959983825684, "learning_rate": 4.9598566783889596e-06, "loss": 0.821, "step": 9272 }, { "epoch": 0.553744177714081, "grad_norm": 2.5712480545043945, "learning_rate": 4.959193152411918e-06, "loss": 0.8793, "step": 9273 }, { "epoch": 0.5538038934670966, "grad_norm": 1.8913602828979492, "learning_rate": 4.958529626434876e-06, "loss": 0.8288, "step": 9274 }, { "epoch": 0.5538636092201122, "grad_norm": 1.7952500581741333, "learning_rate": 4.957866100457833e-06, "loss": 0.8461, "step": 9275 }, { "epoch": 0.5539233249731279, "grad_norm": 1.9122949838638306, "learning_rate": 4.957202574480791e-06, "loss": 0.8842, "step": 9276 }, { "epoch": 0.5539830407261436, "grad_norm": 2.386556386947632, "learning_rate": 4.956539048503749e-06, "loss": 0.8072, "step": 9277 }, { "epoch": 0.5540427564791592, "grad_norm": 2.1650545597076416, "learning_rate": 4.955875522526707e-06, "loss": 0.833, "step": 9278 }, { "epoch": 0.5541024722321749, "grad_norm": 1.4910399913787842, "learning_rate": 4.955211996549665e-06, "loss": 0.835, "step": 9279 }, { "epoch": 0.5541621879851905, "grad_norm": 2.4062323570251465, "learning_rate": 4.9545484705726235e-06, "loss": 0.8569, "step": 9280 }, { "epoch": 0.5542219037382061, "grad_norm": 3.3787713050842285, "learning_rate": 4.953884944595581e-06, "loss": 0.8358, "step": 9281 }, { "epoch": 0.5542816194912218, "grad_norm": 2.447791337966919, "learning_rate": 4.95322141861854e-06, "loss": 0.857, "step": 9282 }, { "epoch": 0.5543413352442375, "grad_norm": 2.2946956157684326, "learning_rate": 4.952557892641498e-06, "loss": 0.8337, "step": 9283 }, { "epoch": 0.5544010509972531, "grad_norm": 2.4920027256011963, "learning_rate": 4.951894366664455e-06, "loss": 0.8281, "step": 9284 }, { "epoch": 0.5544607667502687, "grad_norm": 2.2880373001098633, "learning_rate": 4.951230840687413e-06, "loss": 0.833, "step": 9285 }, { "epoch": 0.5545204825032843, "grad_norm": 1.6757943630218506, "learning_rate": 4.950567314710371e-06, "loss": 0.8175, "step": 9286 }, { "epoch": 0.5545801982563, "grad_norm": 2.469369649887085, "learning_rate": 4.949903788733329e-06, "loss": 0.8443, "step": 9287 }, { "epoch": 0.5546399140093157, "grad_norm": 2.0748841762542725, "learning_rate": 4.9492402627562875e-06, "loss": 0.8336, "step": 9288 }, { "epoch": 0.5546996297623313, "grad_norm": 1.8467005491256714, "learning_rate": 4.9485767367792456e-06, "loss": 0.8327, "step": 9289 }, { "epoch": 0.554759345515347, "grad_norm": 2.157773017883301, "learning_rate": 4.947913210802203e-06, "loss": 0.8551, "step": 9290 }, { "epoch": 0.5548190612683626, "grad_norm": 2.2747228145599365, "learning_rate": 4.947249684825161e-06, "loss": 0.8535, "step": 9291 }, { "epoch": 0.5548787770213782, "grad_norm": 2.0298290252685547, "learning_rate": 4.94658615884812e-06, "loss": 0.8463, "step": 9292 }, { "epoch": 0.5549384927743939, "grad_norm": 2.1794967651367188, "learning_rate": 4.945922632871077e-06, "loss": 0.8637, "step": 9293 }, { "epoch": 0.5549982085274096, "grad_norm": 2.8843071460723877, "learning_rate": 4.945259106894035e-06, "loss": 0.8557, "step": 9294 }, { "epoch": 0.5550579242804252, "grad_norm": 2.542541742324829, "learning_rate": 4.944595580916993e-06, "loss": 0.8812, "step": 9295 }, { "epoch": 0.5551176400334408, "grad_norm": 1.859993577003479, "learning_rate": 4.9439320549399514e-06, "loss": 0.8952, "step": 9296 }, { "epoch": 0.5551773557864564, "grad_norm": 2.57358717918396, "learning_rate": 4.9432685289629095e-06, "loss": 0.8326, "step": 9297 }, { "epoch": 0.5552370715394721, "grad_norm": 2.648235559463501, "learning_rate": 4.942605002985868e-06, "loss": 0.8631, "step": 9298 }, { "epoch": 0.5552967872924878, "grad_norm": 2.2890939712524414, "learning_rate": 4.941941477008826e-06, "loss": 0.8884, "step": 9299 }, { "epoch": 0.5553565030455034, "grad_norm": 1.7472096681594849, "learning_rate": 4.941277951031783e-06, "loss": 0.8464, "step": 9300 }, { "epoch": 0.5554162187985191, "grad_norm": 1.9962271451950073, "learning_rate": 4.940614425054741e-06, "loss": 0.8626, "step": 9301 }, { "epoch": 0.5554759345515347, "grad_norm": 1.9010159969329834, "learning_rate": 4.939950899077699e-06, "loss": 0.8281, "step": 9302 }, { "epoch": 0.5555356503045503, "grad_norm": 1.9880011081695557, "learning_rate": 4.939287373100657e-06, "loss": 0.8575, "step": 9303 }, { "epoch": 0.555595366057566, "grad_norm": 1.8324203491210938, "learning_rate": 4.938623847123615e-06, "loss": 0.827, "step": 9304 }, { "epoch": 0.5556550818105817, "grad_norm": 1.8110419511795044, "learning_rate": 4.9379603211465735e-06, "loss": 0.8411, "step": 9305 }, { "epoch": 0.5557147975635973, "grad_norm": 1.6713532209396362, "learning_rate": 4.937296795169531e-06, "loss": 0.8517, "step": 9306 }, { "epoch": 0.5557745133166129, "grad_norm": 2.398726463317871, "learning_rate": 4.93663326919249e-06, "loss": 0.8249, "step": 9307 }, { "epoch": 0.5558342290696285, "grad_norm": 2.6185507774353027, "learning_rate": 4.935969743215448e-06, "loss": 0.8279, "step": 9308 }, { "epoch": 0.5558939448226442, "grad_norm": 2.533576011657715, "learning_rate": 4.935306217238405e-06, "loss": 0.8239, "step": 9309 }, { "epoch": 0.5559536605756599, "grad_norm": 2.107611894607544, "learning_rate": 4.934642691261363e-06, "loss": 0.8311, "step": 9310 }, { "epoch": 0.5560133763286755, "grad_norm": 1.6307371854782104, "learning_rate": 4.933979165284321e-06, "loss": 0.8543, "step": 9311 }, { "epoch": 0.5560730920816912, "grad_norm": 2.4354395866394043, "learning_rate": 4.933315639307279e-06, "loss": 0.8675, "step": 9312 }, { "epoch": 0.5561328078347068, "grad_norm": 2.1606979370117188, "learning_rate": 4.9326521133302374e-06, "loss": 0.8408, "step": 9313 }, { "epoch": 0.5561925235877224, "grad_norm": 1.618348479270935, "learning_rate": 4.9319885873531955e-06, "loss": 0.8606, "step": 9314 }, { "epoch": 0.5562522393407381, "grad_norm": 2.1475160121917725, "learning_rate": 4.931325061376153e-06, "loss": 0.8316, "step": 9315 }, { "epoch": 0.5563119550937538, "grad_norm": 1.8828701972961426, "learning_rate": 4.930661535399111e-06, "loss": 0.8532, "step": 9316 }, { "epoch": 0.5563716708467694, "grad_norm": 1.820050597190857, "learning_rate": 4.92999800942207e-06, "loss": 0.867, "step": 9317 }, { "epoch": 0.556431386599785, "grad_norm": 2.051363706588745, "learning_rate": 4.929334483445027e-06, "loss": 0.8661, "step": 9318 }, { "epoch": 0.5564911023528006, "grad_norm": 2.204097270965576, "learning_rate": 4.928670957467985e-06, "loss": 0.8419, "step": 9319 }, { "epoch": 0.5565508181058163, "grad_norm": 1.9077376127243042, "learning_rate": 4.928007431490943e-06, "loss": 0.8513, "step": 9320 }, { "epoch": 0.556610533858832, "grad_norm": 3.114016056060791, "learning_rate": 4.927343905513901e-06, "loss": 0.8489, "step": 9321 }, { "epoch": 0.5566702496118476, "grad_norm": 2.639021635055542, "learning_rate": 4.9266803795368595e-06, "loss": 0.8085, "step": 9322 }, { "epoch": 0.5567299653648633, "grad_norm": 2.4143829345703125, "learning_rate": 4.926016853559818e-06, "loss": 0.862, "step": 9323 }, { "epoch": 0.5567896811178789, "grad_norm": 2.0678927898406982, "learning_rate": 4.925353327582776e-06, "loss": 0.8498, "step": 9324 }, { "epoch": 0.5568493968708945, "grad_norm": 3.53218150138855, "learning_rate": 4.924689801605733e-06, "loss": 0.8585, "step": 9325 }, { "epoch": 0.5569091126239102, "grad_norm": 4.120894908905029, "learning_rate": 4.924026275628691e-06, "loss": 0.8726, "step": 9326 }, { "epoch": 0.5569688283769259, "grad_norm": 2.1239593029022217, "learning_rate": 4.923362749651649e-06, "loss": 0.8244, "step": 9327 }, { "epoch": 0.5570285441299415, "grad_norm": 2.563098669052124, "learning_rate": 4.922699223674607e-06, "loss": 0.8683, "step": 9328 }, { "epoch": 0.5570882598829571, "grad_norm": 2.463892698287964, "learning_rate": 4.922035697697565e-06, "loss": 0.8391, "step": 9329 }, { "epoch": 0.5571479756359727, "grad_norm": 2.480191230773926, "learning_rate": 4.9213721717205235e-06, "loss": 0.8723, "step": 9330 }, { "epoch": 0.5572076913889884, "grad_norm": 1.958497166633606, "learning_rate": 4.920708645743481e-06, "loss": 0.8513, "step": 9331 }, { "epoch": 0.5572674071420041, "grad_norm": 8.396885871887207, "learning_rate": 4.92004511976644e-06, "loss": 0.867, "step": 9332 }, { "epoch": 0.5573271228950197, "grad_norm": 1.644284963607788, "learning_rate": 4.919381593789398e-06, "loss": 0.8704, "step": 9333 }, { "epoch": 0.5573868386480354, "grad_norm": 1.8533803224563599, "learning_rate": 4.918718067812355e-06, "loss": 0.8496, "step": 9334 }, { "epoch": 0.557446554401051, "grad_norm": 1.8162211179733276, "learning_rate": 4.918054541835313e-06, "loss": 0.8349, "step": 9335 }, { "epoch": 0.5575062701540666, "grad_norm": 1.8720266819000244, "learning_rate": 4.917391015858271e-06, "loss": 0.834, "step": 9336 }, { "epoch": 0.5575659859070823, "grad_norm": 2.26828932762146, "learning_rate": 4.916727489881229e-06, "loss": 0.8574, "step": 9337 }, { "epoch": 0.557625701660098, "grad_norm": 2.1349964141845703, "learning_rate": 4.916063963904187e-06, "loss": 0.8798, "step": 9338 }, { "epoch": 0.5576854174131136, "grad_norm": 2.1345479488372803, "learning_rate": 4.9154004379271455e-06, "loss": 0.858, "step": 9339 }, { "epoch": 0.5577451331661292, "grad_norm": 1.8004460334777832, "learning_rate": 4.914736911950103e-06, "loss": 0.8441, "step": 9340 }, { "epoch": 0.5578048489191448, "grad_norm": 2.5880019664764404, "learning_rate": 4.914073385973061e-06, "loss": 0.8745, "step": 9341 }, { "epoch": 0.5578645646721605, "grad_norm": 1.6943809986114502, "learning_rate": 4.91340985999602e-06, "loss": 0.8681, "step": 9342 }, { "epoch": 0.5579242804251762, "grad_norm": 1.681137204170227, "learning_rate": 4.912746334018977e-06, "loss": 0.8373, "step": 9343 }, { "epoch": 0.5579839961781918, "grad_norm": 1.9713505506515503, "learning_rate": 4.912082808041935e-06, "loss": 0.8406, "step": 9344 }, { "epoch": 0.5580437119312075, "grad_norm": 4.797002792358398, "learning_rate": 4.911419282064893e-06, "loss": 0.8662, "step": 9345 }, { "epoch": 0.558103427684223, "grad_norm": 2.154583692550659, "learning_rate": 4.910755756087851e-06, "loss": 0.8519, "step": 9346 }, { "epoch": 0.5581631434372387, "grad_norm": 2.547848701477051, "learning_rate": 4.9100922301108095e-06, "loss": 0.8268, "step": 9347 }, { "epoch": 0.5582228591902544, "grad_norm": 1.9667000770568848, "learning_rate": 4.9094287041337676e-06, "loss": 0.8883, "step": 9348 }, { "epoch": 0.55828257494327, "grad_norm": 1.8168456554412842, "learning_rate": 4.908765178156726e-06, "loss": 0.8097, "step": 9349 }, { "epoch": 0.5583422906962857, "grad_norm": 10.361024856567383, "learning_rate": 4.908101652179683e-06, "loss": 0.853, "step": 9350 }, { "epoch": 0.5584020064493014, "grad_norm": 2.525848865509033, "learning_rate": 4.907438126202641e-06, "loss": 0.8367, "step": 9351 }, { "epoch": 0.5584617222023169, "grad_norm": 1.9713252782821655, "learning_rate": 4.906774600225599e-06, "loss": 0.8701, "step": 9352 }, { "epoch": 0.5585214379553326, "grad_norm": 3.264378070831299, "learning_rate": 4.906111074248557e-06, "loss": 0.8608, "step": 9353 }, { "epoch": 0.5585811537083483, "grad_norm": 3.041085958480835, "learning_rate": 4.905447548271515e-06, "loss": 0.8815, "step": 9354 }, { "epoch": 0.5586408694613639, "grad_norm": 1.9564602375030518, "learning_rate": 4.9047840222944734e-06, "loss": 0.869, "step": 9355 }, { "epoch": 0.5587005852143796, "grad_norm": 2.303126573562622, "learning_rate": 4.904120496317431e-06, "loss": 0.8554, "step": 9356 }, { "epoch": 0.5587603009673952, "grad_norm": 2.1462907791137695, "learning_rate": 4.90345697034039e-06, "loss": 0.8532, "step": 9357 }, { "epoch": 0.5588200167204108, "grad_norm": 2.9487380981445312, "learning_rate": 4.902793444363348e-06, "loss": 0.8453, "step": 9358 }, { "epoch": 0.5588797324734265, "grad_norm": 2.14615535736084, "learning_rate": 4.902129918386305e-06, "loss": 0.844, "step": 9359 }, { "epoch": 0.5589394482264421, "grad_norm": 2.527378797531128, "learning_rate": 4.901466392409263e-06, "loss": 0.8606, "step": 9360 }, { "epoch": 0.5589991639794578, "grad_norm": 2.7557950019836426, "learning_rate": 4.900802866432221e-06, "loss": 0.8619, "step": 9361 }, { "epoch": 0.5590588797324735, "grad_norm": 2.185209035873413, "learning_rate": 4.900139340455179e-06, "loss": 0.8756, "step": 9362 }, { "epoch": 0.559118595485489, "grad_norm": 2.8787684440612793, "learning_rate": 4.899475814478137e-06, "loss": 0.8392, "step": 9363 }, { "epoch": 0.5591783112385047, "grad_norm": 3.142160415649414, "learning_rate": 4.8988122885010955e-06, "loss": 0.8522, "step": 9364 }, { "epoch": 0.5592380269915204, "grad_norm": 2.0606977939605713, "learning_rate": 4.898148762524053e-06, "loss": 0.8533, "step": 9365 }, { "epoch": 0.559297742744536, "grad_norm": 2.1173856258392334, "learning_rate": 4.897485236547011e-06, "loss": 0.8785, "step": 9366 }, { "epoch": 0.5593574584975517, "grad_norm": 2.2156765460968018, "learning_rate": 4.89682171056997e-06, "loss": 0.8407, "step": 9367 }, { "epoch": 0.5594171742505673, "grad_norm": 1.741743803024292, "learning_rate": 4.896158184592927e-06, "loss": 0.8184, "step": 9368 }, { "epoch": 0.5594768900035829, "grad_norm": 1.7020634412765503, "learning_rate": 4.895494658615885e-06, "loss": 0.8454, "step": 9369 }, { "epoch": 0.5595366057565986, "grad_norm": 3.408515691757202, "learning_rate": 4.894831132638843e-06, "loss": 0.8532, "step": 9370 }, { "epoch": 0.5595963215096142, "grad_norm": 1.8601503372192383, "learning_rate": 4.894167606661801e-06, "loss": 0.8398, "step": 9371 }, { "epoch": 0.5596560372626299, "grad_norm": 1.886938214302063, "learning_rate": 4.8935040806847594e-06, "loss": 0.8509, "step": 9372 }, { "epoch": 0.5597157530156456, "grad_norm": 1.7556296586990356, "learning_rate": 4.8928405547077175e-06, "loss": 0.8086, "step": 9373 }, { "epoch": 0.5597754687686611, "grad_norm": 2.617140769958496, "learning_rate": 4.892177028730676e-06, "loss": 0.8438, "step": 9374 }, { "epoch": 0.5598351845216768, "grad_norm": 2.351933240890503, "learning_rate": 4.891513502753633e-06, "loss": 0.8481, "step": 9375 }, { "epoch": 0.5598949002746925, "grad_norm": 2.1843056678771973, "learning_rate": 4.890849976776591e-06, "loss": 0.8529, "step": 9376 }, { "epoch": 0.5599546160277081, "grad_norm": 2.0374069213867188, "learning_rate": 4.890186450799549e-06, "loss": 0.8816, "step": 9377 }, { "epoch": 0.5600143317807238, "grad_norm": 2.540607213973999, "learning_rate": 4.889522924822507e-06, "loss": 0.8348, "step": 9378 }, { "epoch": 0.5600740475337393, "grad_norm": 2.2517993450164795, "learning_rate": 4.888859398845465e-06, "loss": 0.8468, "step": 9379 }, { "epoch": 0.560133763286755, "grad_norm": 2.510033369064331, "learning_rate": 4.888195872868423e-06, "loss": 0.8401, "step": 9380 }, { "epoch": 0.5601934790397707, "grad_norm": 2.6616322994232178, "learning_rate": 4.887532346891381e-06, "loss": 0.8781, "step": 9381 }, { "epoch": 0.5602531947927863, "grad_norm": 2.495131492614746, "learning_rate": 4.88686882091434e-06, "loss": 0.8459, "step": 9382 }, { "epoch": 0.560312910545802, "grad_norm": 3.2737205028533936, "learning_rate": 4.886205294937298e-06, "loss": 0.8655, "step": 9383 }, { "epoch": 0.5603726262988177, "grad_norm": 2.0473151206970215, "learning_rate": 4.885541768960255e-06, "loss": 0.8532, "step": 9384 }, { "epoch": 0.5604323420518332, "grad_norm": 2.251718044281006, "learning_rate": 4.884878242983213e-06, "loss": 0.8607, "step": 9385 }, { "epoch": 0.5604920578048489, "grad_norm": 1.7436143159866333, "learning_rate": 4.884214717006171e-06, "loss": 0.8712, "step": 9386 }, { "epoch": 0.5605517735578646, "grad_norm": 1.9463517665863037, "learning_rate": 4.883551191029129e-06, "loss": 0.8247, "step": 9387 }, { "epoch": 0.5606114893108802, "grad_norm": 2.255500078201294, "learning_rate": 4.882887665052087e-06, "loss": 0.8495, "step": 9388 }, { "epoch": 0.5606712050638959, "grad_norm": 2.1752138137817383, "learning_rate": 4.8822241390750455e-06, "loss": 0.8449, "step": 9389 }, { "epoch": 0.5607309208169114, "grad_norm": 2.8081233501434326, "learning_rate": 4.881560613098003e-06, "loss": 0.8658, "step": 9390 }, { "epoch": 0.5607906365699271, "grad_norm": 2.8917438983917236, "learning_rate": 4.880897087120961e-06, "loss": 0.8456, "step": 9391 }, { "epoch": 0.5608503523229428, "grad_norm": 2.2312846183776855, "learning_rate": 4.88023356114392e-06, "loss": 0.8776, "step": 9392 }, { "epoch": 0.5609100680759584, "grad_norm": 1.9276058673858643, "learning_rate": 4.879570035166877e-06, "loss": 0.8585, "step": 9393 }, { "epoch": 0.5609697838289741, "grad_norm": 1.9357632398605347, "learning_rate": 4.878906509189835e-06, "loss": 0.8832, "step": 9394 }, { "epoch": 0.5610294995819898, "grad_norm": 1.8044747114181519, "learning_rate": 4.878242983212793e-06, "loss": 0.8783, "step": 9395 }, { "epoch": 0.5610892153350053, "grad_norm": 2.0499274730682373, "learning_rate": 4.877579457235751e-06, "loss": 0.8735, "step": 9396 }, { "epoch": 0.561148931088021, "grad_norm": 2.4165663719177246, "learning_rate": 4.876915931258709e-06, "loss": 0.8433, "step": 9397 }, { "epoch": 0.5612086468410367, "grad_norm": 2.1562609672546387, "learning_rate": 4.8762524052816675e-06, "loss": 0.8693, "step": 9398 }, { "epoch": 0.5612683625940523, "grad_norm": 2.2224152088165283, "learning_rate": 4.875588879304626e-06, "loss": 0.8404, "step": 9399 }, { "epoch": 0.561328078347068, "grad_norm": 2.2329444885253906, "learning_rate": 4.874925353327583e-06, "loss": 0.8694, "step": 9400 }, { "epoch": 0.561328078347068, "eval_text_loss": 0.9094271659851074, "eval_text_runtime": 15.1967, "eval_text_samples_per_second": 263.215, "eval_text_steps_per_second": 0.526, "step": 9400 }, { "epoch": 0.561328078347068, "eval_image_loss": 0.6206604242324829, "eval_image_runtime": 5.0111, "eval_image_samples_per_second": 798.222, "eval_image_steps_per_second": 1.596, "step": 9400 }, { "epoch": 0.561328078347068, "eval_video_loss": 1.0609561204910278, "eval_video_runtime": 76.6776, "eval_video_samples_per_second": 52.166, "eval_video_steps_per_second": 0.104, "step": 9400 }, { "epoch": 0.5613877941000835, "grad_norm": 7.651241302490234, "learning_rate": 4.874261827350541e-06, "loss": 0.8443, "step": 9401 }, { "epoch": 0.5614475098530992, "grad_norm": 2.291935920715332, "learning_rate": 4.873598301373499e-06, "loss": 0.8264, "step": 9402 }, { "epoch": 0.5615072256061149, "grad_norm": 2.1015784740448, "learning_rate": 4.872934775396457e-06, "loss": 0.8785, "step": 9403 }, { "epoch": 0.5615669413591305, "grad_norm": 2.3269362449645996, "learning_rate": 4.872271249419415e-06, "loss": 0.8667, "step": 9404 }, { "epoch": 0.5616266571121462, "grad_norm": 2.5287184715270996, "learning_rate": 4.871607723442373e-06, "loss": 0.8758, "step": 9405 }, { "epoch": 0.5616863728651619, "grad_norm": 2.2085914611816406, "learning_rate": 4.870944197465331e-06, "loss": 0.8303, "step": 9406 }, { "epoch": 0.5617460886181774, "grad_norm": 1.8868378400802612, "learning_rate": 4.8702806714882896e-06, "loss": 0.8418, "step": 9407 }, { "epoch": 0.5618058043711931, "grad_norm": 2.647808790206909, "learning_rate": 4.869617145511248e-06, "loss": 0.8315, "step": 9408 }, { "epoch": 0.5618655201242088, "grad_norm": 2.6353557109832764, "learning_rate": 4.868953619534205e-06, "loss": 0.8502, "step": 9409 }, { "epoch": 0.5619252358772244, "grad_norm": 1.5030444860458374, "learning_rate": 4.868290093557163e-06, "loss": 0.8597, "step": 9410 }, { "epoch": 0.5619849516302401, "grad_norm": 2.7103307247161865, "learning_rate": 4.867626567580121e-06, "loss": 0.9123, "step": 9411 }, { "epoch": 0.5620446673832558, "grad_norm": 3.8277900218963623, "learning_rate": 4.866963041603079e-06, "loss": 0.882, "step": 9412 }, { "epoch": 0.5621043831362713, "grad_norm": 2.0411300659179688, "learning_rate": 4.866299515626037e-06, "loss": 0.8394, "step": 9413 }, { "epoch": 0.562164098889287, "grad_norm": 1.8524329662322998, "learning_rate": 4.8656359896489954e-06, "loss": 0.8744, "step": 9414 }, { "epoch": 0.5622238146423026, "grad_norm": 1.8580454587936401, "learning_rate": 4.864972463671953e-06, "loss": 0.8491, "step": 9415 }, { "epoch": 0.5622835303953183, "grad_norm": 2.0715343952178955, "learning_rate": 4.864308937694911e-06, "loss": 0.8381, "step": 9416 }, { "epoch": 0.562343246148334, "grad_norm": 1.965662956237793, "learning_rate": 4.86364541171787e-06, "loss": 0.8682, "step": 9417 }, { "epoch": 0.5624029619013495, "grad_norm": 3.6378848552703857, "learning_rate": 4.862981885740827e-06, "loss": 0.8619, "step": 9418 }, { "epoch": 0.5624626776543652, "grad_norm": 2.421529769897461, "learning_rate": 4.862318359763785e-06, "loss": 0.8694, "step": 9419 }, { "epoch": 0.5625223934073809, "grad_norm": 2.518432855606079, "learning_rate": 4.861654833786743e-06, "loss": 0.8333, "step": 9420 }, { "epoch": 0.5625821091603965, "grad_norm": 1.9135892391204834, "learning_rate": 4.860991307809701e-06, "loss": 0.8242, "step": 9421 }, { "epoch": 0.5626418249134122, "grad_norm": 3.370161294937134, "learning_rate": 4.860327781832659e-06, "loss": 0.8326, "step": 9422 }, { "epoch": 0.5627015406664279, "grad_norm": 2.274350881576538, "learning_rate": 4.8596642558556175e-06, "loss": 0.884, "step": 9423 }, { "epoch": 0.5627612564194434, "grad_norm": 1.7926138639450073, "learning_rate": 4.859000729878576e-06, "loss": 0.8189, "step": 9424 }, { "epoch": 0.5628209721724591, "grad_norm": 3.591963529586792, "learning_rate": 4.858337203901533e-06, "loss": 0.8707, "step": 9425 }, { "epoch": 0.5628806879254747, "grad_norm": 1.831335425376892, "learning_rate": 4.857673677924491e-06, "loss": 0.8356, "step": 9426 }, { "epoch": 0.5629404036784904, "grad_norm": 2.121485710144043, "learning_rate": 4.857010151947449e-06, "loss": 0.8506, "step": 9427 }, { "epoch": 0.5630001194315061, "grad_norm": 2.262817621231079, "learning_rate": 4.856346625970407e-06, "loss": 0.8837, "step": 9428 }, { "epoch": 0.5630598351845216, "grad_norm": 2.415257215499878, "learning_rate": 4.855683099993365e-06, "loss": 0.8757, "step": 9429 }, { "epoch": 0.5631195509375373, "grad_norm": 2.246344566345215, "learning_rate": 4.855019574016323e-06, "loss": 0.8506, "step": 9430 }, { "epoch": 0.563179266690553, "grad_norm": 2.492931842803955, "learning_rate": 4.854356048039281e-06, "loss": 0.8498, "step": 9431 }, { "epoch": 0.5632389824435686, "grad_norm": 2.031683921813965, "learning_rate": 4.8536925220622395e-06, "loss": 0.8467, "step": 9432 }, { "epoch": 0.5632986981965843, "grad_norm": 2.213690996170044, "learning_rate": 4.853028996085198e-06, "loss": 0.8617, "step": 9433 }, { "epoch": 0.5633584139496, "grad_norm": 2.7360990047454834, "learning_rate": 4.852365470108155e-06, "loss": 0.8569, "step": 9434 }, { "epoch": 0.5634181297026155, "grad_norm": 4.002805709838867, "learning_rate": 4.851701944131113e-06, "loss": 0.8334, "step": 9435 }, { "epoch": 0.5634778454556312, "grad_norm": 2.0668323040008545, "learning_rate": 4.851038418154071e-06, "loss": 0.8402, "step": 9436 }, { "epoch": 0.5635375612086468, "grad_norm": 1.8800450563430786, "learning_rate": 4.850374892177029e-06, "loss": 0.8114, "step": 9437 }, { "epoch": 0.5635972769616625, "grad_norm": 1.56464684009552, "learning_rate": 4.849711366199987e-06, "loss": 0.8516, "step": 9438 }, { "epoch": 0.5636569927146782, "grad_norm": 1.6906366348266602, "learning_rate": 4.849047840222945e-06, "loss": 0.8348, "step": 9439 }, { "epoch": 0.5637167084676937, "grad_norm": 2.0625650882720947, "learning_rate": 4.848384314245903e-06, "loss": 0.8404, "step": 9440 }, { "epoch": 0.5637764242207094, "grad_norm": 2.363064765930176, "learning_rate": 4.847720788268861e-06, "loss": 0.8498, "step": 9441 }, { "epoch": 0.5638361399737251, "grad_norm": 2.3021323680877686, "learning_rate": 4.84705726229182e-06, "loss": 0.858, "step": 9442 }, { "epoch": 0.5638958557267407, "grad_norm": 2.876246929168701, "learning_rate": 4.846393736314777e-06, "loss": 0.854, "step": 9443 }, { "epoch": 0.5639555714797564, "grad_norm": 2.0088069438934326, "learning_rate": 4.845730210337735e-06, "loss": 0.85, "step": 9444 }, { "epoch": 0.5640152872327721, "grad_norm": 1.7266863584518433, "learning_rate": 4.845066684360693e-06, "loss": 0.8638, "step": 9445 }, { "epoch": 0.5640750029857876, "grad_norm": 1.8943623304367065, "learning_rate": 4.844403158383651e-06, "loss": 0.8458, "step": 9446 }, { "epoch": 0.5641347187388033, "grad_norm": 1.6675843000411987, "learning_rate": 4.843739632406609e-06, "loss": 0.8594, "step": 9447 }, { "epoch": 0.5641944344918189, "grad_norm": 2.3520350456237793, "learning_rate": 4.8430761064295675e-06, "loss": 0.8251, "step": 9448 }, { "epoch": 0.5642541502448346, "grad_norm": 1.823771595954895, "learning_rate": 4.8424125804525256e-06, "loss": 0.8297, "step": 9449 }, { "epoch": 0.5643138659978503, "grad_norm": 3.9271881580352783, "learning_rate": 4.841749054475483e-06, "loss": 0.8393, "step": 9450 }, { "epoch": 0.5643735817508658, "grad_norm": 3.893141031265259, "learning_rate": 4.841085528498441e-06, "loss": 0.8948, "step": 9451 }, { "epoch": 0.5644332975038815, "grad_norm": 2.7446279525756836, "learning_rate": 4.840422002521399e-06, "loss": 0.8585, "step": 9452 }, { "epoch": 0.5644930132568972, "grad_norm": 2.3454842567443848, "learning_rate": 4.839758476544357e-06, "loss": 0.839, "step": 9453 }, { "epoch": 0.5645527290099128, "grad_norm": 1.8753981590270996, "learning_rate": 4.839094950567315e-06, "loss": 0.7992, "step": 9454 }, { "epoch": 0.5646124447629285, "grad_norm": 2.4044768810272217, "learning_rate": 4.838431424590273e-06, "loss": 0.8443, "step": 9455 }, { "epoch": 0.5646721605159442, "grad_norm": 2.14040207862854, "learning_rate": 4.8377678986132306e-06, "loss": 0.8377, "step": 9456 }, { "epoch": 0.5647318762689597, "grad_norm": 2.2182188034057617, "learning_rate": 4.8371043726361895e-06, "loss": 0.8631, "step": 9457 }, { "epoch": 0.5647915920219754, "grad_norm": 3.1205129623413086, "learning_rate": 4.836440846659148e-06, "loss": 0.8578, "step": 9458 }, { "epoch": 0.564851307774991, "grad_norm": 1.6797815561294556, "learning_rate": 4.835777320682105e-06, "loss": 0.8368, "step": 9459 }, { "epoch": 0.5649110235280067, "grad_norm": 4.707890510559082, "learning_rate": 4.835113794705063e-06, "loss": 0.8638, "step": 9460 }, { "epoch": 0.5649707392810224, "grad_norm": 1.9673614501953125, "learning_rate": 4.834450268728021e-06, "loss": 0.8213, "step": 9461 }, { "epoch": 0.5650304550340379, "grad_norm": 2.2021539211273193, "learning_rate": 4.833786742750979e-06, "loss": 0.8382, "step": 9462 }, { "epoch": 0.5650901707870536, "grad_norm": 2.333106279373169, "learning_rate": 4.833123216773937e-06, "loss": 0.8317, "step": 9463 }, { "epoch": 0.5651498865400693, "grad_norm": 2.358933448791504, "learning_rate": 4.832459690796895e-06, "loss": 0.8449, "step": 9464 }, { "epoch": 0.5652096022930849, "grad_norm": 1.86551034450531, "learning_rate": 4.831796164819853e-06, "loss": 0.8199, "step": 9465 }, { "epoch": 0.5652693180461006, "grad_norm": 2.4204330444335938, "learning_rate": 4.831132638842811e-06, "loss": 0.8641, "step": 9466 }, { "epoch": 0.5653290337991163, "grad_norm": 1.6426868438720703, "learning_rate": 4.83046911286577e-06, "loss": 0.8491, "step": 9467 }, { "epoch": 0.5653887495521318, "grad_norm": 2.5279488563537598, "learning_rate": 4.829805586888727e-06, "loss": 0.8454, "step": 9468 }, { "epoch": 0.5654484653051475, "grad_norm": 1.970816731452942, "learning_rate": 4.829142060911685e-06, "loss": 0.8364, "step": 9469 }, { "epoch": 0.5655081810581631, "grad_norm": 2.630810499191284, "learning_rate": 4.828478534934643e-06, "loss": 0.8527, "step": 9470 }, { "epoch": 0.5655678968111788, "grad_norm": 2.5605263710021973, "learning_rate": 4.827815008957601e-06, "loss": 0.8721, "step": 9471 }, { "epoch": 0.5656276125641945, "grad_norm": 2.2166104316711426, "learning_rate": 4.827151482980559e-06, "loss": 0.8272, "step": 9472 }, { "epoch": 0.56568732831721, "grad_norm": 1.6141612529754639, "learning_rate": 4.8264879570035174e-06, "loss": 0.8724, "step": 9473 }, { "epoch": 0.5657470440702257, "grad_norm": 2.799022912979126, "learning_rate": 4.8258244310264755e-06, "loss": 0.869, "step": 9474 }, { "epoch": 0.5658067598232414, "grad_norm": 2.563286066055298, "learning_rate": 4.825160905049433e-06, "loss": 0.8497, "step": 9475 }, { "epoch": 0.565866475576257, "grad_norm": 2.3224918842315674, "learning_rate": 4.824497379072391e-06, "loss": 0.8623, "step": 9476 }, { "epoch": 0.5659261913292727, "grad_norm": 2.0403552055358887, "learning_rate": 4.823833853095349e-06, "loss": 0.8712, "step": 9477 }, { "epoch": 0.5659859070822884, "grad_norm": 2.3741393089294434, "learning_rate": 4.823170327118307e-06, "loss": 0.8307, "step": 9478 }, { "epoch": 0.5660456228353039, "grad_norm": 1.7715169191360474, "learning_rate": 4.822506801141265e-06, "loss": 0.8641, "step": 9479 }, { "epoch": 0.5661053385883196, "grad_norm": 2.2169911861419678, "learning_rate": 4.821843275164223e-06, "loss": 0.8393, "step": 9480 }, { "epoch": 0.5661650543413352, "grad_norm": 1.9127379655838013, "learning_rate": 4.8211797491871805e-06, "loss": 0.8443, "step": 9481 }, { "epoch": 0.5662247700943509, "grad_norm": 1.6753939390182495, "learning_rate": 4.8205162232101395e-06, "loss": 0.8634, "step": 9482 }, { "epoch": 0.5662844858473666, "grad_norm": 1.907859444618225, "learning_rate": 4.819852697233098e-06, "loss": 0.8736, "step": 9483 }, { "epoch": 0.5663442016003822, "grad_norm": 2.888278007507324, "learning_rate": 4.819189171256055e-06, "loss": 0.8592, "step": 9484 }, { "epoch": 0.5664039173533978, "grad_norm": 2.135063648223877, "learning_rate": 4.818525645279013e-06, "loss": 0.8311, "step": 9485 }, { "epoch": 0.5664636331064135, "grad_norm": 2.766913414001465, "learning_rate": 4.817862119301971e-06, "loss": 0.8677, "step": 9486 }, { "epoch": 0.5665233488594291, "grad_norm": 3.4501447677612305, "learning_rate": 4.817198593324929e-06, "loss": 0.8689, "step": 9487 }, { "epoch": 0.5665830646124448, "grad_norm": 1.7815728187561035, "learning_rate": 4.816535067347887e-06, "loss": 0.8614, "step": 9488 }, { "epoch": 0.5666427803654605, "grad_norm": 3.265049457550049, "learning_rate": 4.815871541370845e-06, "loss": 0.8797, "step": 9489 }, { "epoch": 0.566702496118476, "grad_norm": 2.3679451942443848, "learning_rate": 4.815208015393803e-06, "loss": 0.8301, "step": 9490 }, { "epoch": 0.5667622118714917, "grad_norm": 2.838060140609741, "learning_rate": 4.814544489416761e-06, "loss": 0.8705, "step": 9491 }, { "epoch": 0.5668219276245073, "grad_norm": 4.228381156921387, "learning_rate": 4.81388096343972e-06, "loss": 0.8468, "step": 9492 }, { "epoch": 0.566881643377523, "grad_norm": 1.5647037029266357, "learning_rate": 4.813217437462677e-06, "loss": 0.8914, "step": 9493 }, { "epoch": 0.5669413591305387, "grad_norm": 2.961247682571411, "learning_rate": 4.812553911485635e-06, "loss": 0.8539, "step": 9494 }, { "epoch": 0.5670010748835543, "grad_norm": 6.405031681060791, "learning_rate": 4.811890385508593e-06, "loss": 0.8664, "step": 9495 }, { "epoch": 0.5670607906365699, "grad_norm": 1.809098720550537, "learning_rate": 4.811226859531551e-06, "loss": 0.8414, "step": 9496 }, { "epoch": 0.5671205063895856, "grad_norm": 2.917766809463501, "learning_rate": 4.810563333554509e-06, "loss": 0.871, "step": 9497 }, { "epoch": 0.5671802221426012, "grad_norm": 1.9940382242202759, "learning_rate": 4.809899807577467e-06, "loss": 0.8457, "step": 9498 }, { "epoch": 0.5672399378956169, "grad_norm": 1.9905484914779663, "learning_rate": 4.8092362816004255e-06, "loss": 0.8662, "step": 9499 }, { "epoch": 0.5672996536486326, "grad_norm": 1.7888375520706177, "learning_rate": 4.808572755623383e-06, "loss": 0.8313, "step": 9500 }, { "epoch": 0.5673593694016481, "grad_norm": 2.3706328868865967, "learning_rate": 4.807909229646341e-06, "loss": 0.8685, "step": 9501 }, { "epoch": 0.5674190851546638, "grad_norm": 6.457030773162842, "learning_rate": 4.807245703669299e-06, "loss": 0.8546, "step": 9502 }, { "epoch": 0.5674788009076794, "grad_norm": 3.0465095043182373, "learning_rate": 4.806582177692257e-06, "loss": 0.8627, "step": 9503 }, { "epoch": 0.5675385166606951, "grad_norm": 1.9958078861236572, "learning_rate": 4.805918651715215e-06, "loss": 0.829, "step": 9504 }, { "epoch": 0.5675982324137108, "grad_norm": 2.428253650665283, "learning_rate": 4.805255125738173e-06, "loss": 0.8262, "step": 9505 }, { "epoch": 0.5676579481667264, "grad_norm": 1.747117519378662, "learning_rate": 4.8045915997611305e-06, "loss": 0.8565, "step": 9506 }, { "epoch": 0.567717663919742, "grad_norm": 2.306152105331421, "learning_rate": 4.803928073784089e-06, "loss": 0.8488, "step": 9507 }, { "epoch": 0.5677773796727577, "grad_norm": 2.5548806190490723, "learning_rate": 4.8032645478070476e-06, "loss": 0.8399, "step": 9508 }, { "epoch": 0.5678370954257733, "grad_norm": 2.2291576862335205, "learning_rate": 4.802601021830005e-06, "loss": 0.8513, "step": 9509 }, { "epoch": 0.567896811178789, "grad_norm": 3.3318698406219482, "learning_rate": 4.801937495852963e-06, "loss": 0.8507, "step": 9510 }, { "epoch": 0.5679565269318047, "grad_norm": 2.3809092044830322, "learning_rate": 4.801273969875921e-06, "loss": 0.8857, "step": 9511 }, { "epoch": 0.5680162426848202, "grad_norm": 1.812103271484375, "learning_rate": 4.800610443898879e-06, "loss": 0.8458, "step": 9512 }, { "epoch": 0.5680759584378359, "grad_norm": 1.6539536714553833, "learning_rate": 4.799946917921837e-06, "loss": 0.8208, "step": 9513 }, { "epoch": 0.5681356741908515, "grad_norm": 1.9143942594528198, "learning_rate": 4.799283391944795e-06, "loss": 0.8356, "step": 9514 }, { "epoch": 0.5681953899438672, "grad_norm": 1.7830679416656494, "learning_rate": 4.7986198659677526e-06, "loss": 0.8681, "step": 9515 }, { "epoch": 0.5682551056968829, "grad_norm": 2.1137759685516357, "learning_rate": 4.797956339990711e-06, "loss": 0.8601, "step": 9516 }, { "epoch": 0.5683148214498985, "grad_norm": 2.0917038917541504, "learning_rate": 4.797292814013669e-06, "loss": 0.8456, "step": 9517 }, { "epoch": 0.5683745372029141, "grad_norm": 1.719225525856018, "learning_rate": 4.796629288036627e-06, "loss": 0.8112, "step": 9518 }, { "epoch": 0.5684342529559298, "grad_norm": 1.9588110446929932, "learning_rate": 4.795965762059585e-06, "loss": 0.8523, "step": 9519 }, { "epoch": 0.5684939687089454, "grad_norm": 1.7145161628723145, "learning_rate": 4.795302236082543e-06, "loss": 0.8564, "step": 9520 }, { "epoch": 0.5685536844619611, "grad_norm": 1.990539789199829, "learning_rate": 4.794638710105501e-06, "loss": 0.8518, "step": 9521 }, { "epoch": 0.5686134002149767, "grad_norm": 2.0722877979278564, "learning_rate": 4.793975184128458e-06, "loss": 0.8623, "step": 9522 }, { "epoch": 0.5686731159679923, "grad_norm": 8.319035530090332, "learning_rate": 4.793311658151417e-06, "loss": 0.8465, "step": 9523 }, { "epoch": 0.568732831721008, "grad_norm": 2.8095316886901855, "learning_rate": 4.7926481321743755e-06, "loss": 0.896, "step": 9524 }, { "epoch": 0.5687925474740236, "grad_norm": 1.9309309720993042, "learning_rate": 4.791984606197333e-06, "loss": 0.8312, "step": 9525 }, { "epoch": 0.5688522632270393, "grad_norm": 2.1229660511016846, "learning_rate": 4.791321080220291e-06, "loss": 0.8579, "step": 9526 }, { "epoch": 0.568911978980055, "grad_norm": 1.837435007095337, "learning_rate": 4.790657554243249e-06, "loss": 0.8848, "step": 9527 }, { "epoch": 0.5689716947330706, "grad_norm": 1.7918225526809692, "learning_rate": 4.789994028266207e-06, "loss": 0.8129, "step": 9528 }, { "epoch": 0.5690314104860862, "grad_norm": 3.445729970932007, "learning_rate": 4.789330502289165e-06, "loss": 0.8681, "step": 9529 }, { "epoch": 0.5690911262391019, "grad_norm": 1.848222017288208, "learning_rate": 4.788666976312123e-06, "loss": 0.8694, "step": 9530 }, { "epoch": 0.5691508419921175, "grad_norm": 2.296445369720459, "learning_rate": 4.7880034503350805e-06, "loss": 0.8072, "step": 9531 }, { "epoch": 0.5692105577451332, "grad_norm": 2.615736722946167, "learning_rate": 4.787339924358039e-06, "loss": 0.8747, "step": 9532 }, { "epoch": 0.5692702734981488, "grad_norm": 2.1082265377044678, "learning_rate": 4.7866763983809975e-06, "loss": 0.8625, "step": 9533 }, { "epoch": 0.5693299892511644, "grad_norm": 1.8244390487670898, "learning_rate": 4.786012872403955e-06, "loss": 0.8911, "step": 9534 }, { "epoch": 0.5693897050041801, "grad_norm": 2.3656435012817383, "learning_rate": 4.785349346426913e-06, "loss": 0.8685, "step": 9535 }, { "epoch": 0.5694494207571957, "grad_norm": 4.078604698181152, "learning_rate": 4.784685820449871e-06, "loss": 0.8687, "step": 9536 }, { "epoch": 0.5695091365102114, "grad_norm": 1.942193627357483, "learning_rate": 4.784022294472829e-06, "loss": 0.8731, "step": 9537 }, { "epoch": 0.5695688522632271, "grad_norm": 1.6495612859725952, "learning_rate": 4.783358768495787e-06, "loss": 0.8591, "step": 9538 }, { "epoch": 0.5696285680162427, "grad_norm": 2.744011402130127, "learning_rate": 4.782695242518745e-06, "loss": 0.8699, "step": 9539 }, { "epoch": 0.5696882837692583, "grad_norm": 2.956902503967285, "learning_rate": 4.7820317165417025e-06, "loss": 0.8786, "step": 9540 }, { "epoch": 0.569747999522274, "grad_norm": 2.5039188861846924, "learning_rate": 4.781368190564661e-06, "loss": 0.8678, "step": 9541 }, { "epoch": 0.5698077152752896, "grad_norm": 2.3490684032440186, "learning_rate": 4.780704664587619e-06, "loss": 0.8495, "step": 9542 }, { "epoch": 0.5698674310283053, "grad_norm": 2.077601671218872, "learning_rate": 4.780041138610577e-06, "loss": 0.8711, "step": 9543 }, { "epoch": 0.569927146781321, "grad_norm": 8.123702049255371, "learning_rate": 4.779377612633535e-06, "loss": 0.877, "step": 9544 }, { "epoch": 0.5699868625343365, "grad_norm": 5.422604084014893, "learning_rate": 4.778714086656493e-06, "loss": 0.8705, "step": 9545 }, { "epoch": 0.5700465782873522, "grad_norm": 4.120206832885742, "learning_rate": 4.778050560679451e-06, "loss": 0.8588, "step": 9546 }, { "epoch": 0.5701062940403678, "grad_norm": 5.772716522216797, "learning_rate": 4.777387034702408e-06, "loss": 0.8333, "step": 9547 }, { "epoch": 0.5701660097933835, "grad_norm": 2.6684255599975586, "learning_rate": 4.776723508725367e-06, "loss": 0.8572, "step": 9548 }, { "epoch": 0.5702257255463992, "grad_norm": 2.003786087036133, "learning_rate": 4.7760599827483254e-06, "loss": 0.8459, "step": 9549 }, { "epoch": 0.5702854412994148, "grad_norm": 5.026068687438965, "learning_rate": 4.775396456771283e-06, "loss": 0.853, "step": 9550 }, { "epoch": 0.5703451570524304, "grad_norm": 2.087275505065918, "learning_rate": 4.774732930794241e-06, "loss": 0.8754, "step": 9551 }, { "epoch": 0.570404872805446, "grad_norm": 5.57722806930542, "learning_rate": 4.774069404817199e-06, "loss": 0.8711, "step": 9552 }, { "epoch": 0.5704645885584617, "grad_norm": 2.633375406265259, "learning_rate": 4.773405878840157e-06, "loss": 0.8675, "step": 9553 }, { "epoch": 0.5705243043114774, "grad_norm": 2.9407694339752197, "learning_rate": 4.772742352863115e-06, "loss": 0.8451, "step": 9554 }, { "epoch": 0.570584020064493, "grad_norm": 1.8010032176971436, "learning_rate": 4.772078826886073e-06, "loss": 0.8305, "step": 9555 }, { "epoch": 0.5706437358175087, "grad_norm": 2.1419637203216553, "learning_rate": 4.7714153009090304e-06, "loss": 0.8468, "step": 9556 }, { "epoch": 0.5707034515705243, "grad_norm": 2.2957592010498047, "learning_rate": 4.7707517749319885e-06, "loss": 0.8323, "step": 9557 }, { "epoch": 0.5707631673235399, "grad_norm": 3.60621976852417, "learning_rate": 4.7700882489549475e-06, "loss": 0.9293, "step": 9558 }, { "epoch": 0.5708228830765556, "grad_norm": 4.340101718902588, "learning_rate": 4.769424722977905e-06, "loss": 0.8434, "step": 9559 }, { "epoch": 0.5708825988295713, "grad_norm": 2.7964162826538086, "learning_rate": 4.768761197000863e-06, "loss": 0.8741, "step": 9560 }, { "epoch": 0.5709423145825869, "grad_norm": 2.1349904537200928, "learning_rate": 4.768097671023821e-06, "loss": 0.8817, "step": 9561 }, { "epoch": 0.5710020303356025, "grad_norm": 2.165283441543579, "learning_rate": 4.767434145046779e-06, "loss": 0.8752, "step": 9562 }, { "epoch": 0.5710617460886181, "grad_norm": 3.4205284118652344, "learning_rate": 4.766770619069737e-06, "loss": 0.9115, "step": 9563 }, { "epoch": 0.5711214618416338, "grad_norm": 1.826751947402954, "learning_rate": 4.766107093092695e-06, "loss": 0.8105, "step": 9564 }, { "epoch": 0.5711811775946495, "grad_norm": 2.3466455936431885, "learning_rate": 4.7654435671156525e-06, "loss": 0.833, "step": 9565 }, { "epoch": 0.5712408933476651, "grad_norm": 17.670927047729492, "learning_rate": 4.764780041138611e-06, "loss": 0.8718, "step": 9566 }, { "epoch": 0.5713006091006808, "grad_norm": 1.5865283012390137, "learning_rate": 4.764116515161569e-06, "loss": 0.8376, "step": 9567 }, { "epoch": 0.5713603248536964, "grad_norm": 2.0458080768585205, "learning_rate": 4.763452989184527e-06, "loss": 0.8462, "step": 9568 }, { "epoch": 0.571420040606712, "grad_norm": 2.4558842182159424, "learning_rate": 4.762789463207485e-06, "loss": 0.8526, "step": 9569 }, { "epoch": 0.5714797563597277, "grad_norm": 4.00685977935791, "learning_rate": 4.762125937230443e-06, "loss": 0.8767, "step": 9570 }, { "epoch": 0.5715394721127434, "grad_norm": 2.5980632305145264, "learning_rate": 4.761462411253401e-06, "loss": 0.8698, "step": 9571 }, { "epoch": 0.571599187865759, "grad_norm": 1.7173516750335693, "learning_rate": 4.760798885276358e-06, "loss": 0.8716, "step": 9572 }, { "epoch": 0.5716589036187746, "grad_norm": 1.7055903673171997, "learning_rate": 4.760135359299317e-06, "loss": 0.8065, "step": 9573 }, { "epoch": 0.5717186193717902, "grad_norm": 5.599519729614258, "learning_rate": 4.759471833322275e-06, "loss": 0.8472, "step": 9574 }, { "epoch": 0.5717783351248059, "grad_norm": 1.7040114402770996, "learning_rate": 4.758808307345233e-06, "loss": 0.9142, "step": 9575 }, { "epoch": 0.5718380508778216, "grad_norm": 2.3167564868927, "learning_rate": 4.758144781368191e-06, "loss": 0.8445, "step": 9576 }, { "epoch": 0.5718977666308372, "grad_norm": 2.3264620304107666, "learning_rate": 4.757481255391149e-06, "loss": 0.8683, "step": 9577 }, { "epoch": 0.5719574823838529, "grad_norm": 1.9631818532943726, "learning_rate": 4.756817729414107e-06, "loss": 0.8486, "step": 9578 }, { "epoch": 0.5720171981368685, "grad_norm": 2.377037763595581, "learning_rate": 4.756154203437065e-06, "loss": 0.8254, "step": 9579 }, { "epoch": 0.5720769138898841, "grad_norm": 1.6896964311599731, "learning_rate": 4.755490677460023e-06, "loss": 0.8692, "step": 9580 }, { "epoch": 0.5721366296428998, "grad_norm": 2.6228582859039307, "learning_rate": 4.75482715148298e-06, "loss": 0.9019, "step": 9581 }, { "epoch": 0.5721963453959155, "grad_norm": 2.217527151107788, "learning_rate": 4.7541636255059385e-06, "loss": 0.8415, "step": 9582 }, { "epoch": 0.5722560611489311, "grad_norm": 2.3675525188446045, "learning_rate": 4.7535000995288975e-06, "loss": 0.841, "step": 9583 }, { "epoch": 0.5723157769019467, "grad_norm": 2.4884417057037354, "learning_rate": 4.752836573551855e-06, "loss": 0.8315, "step": 9584 }, { "epoch": 0.5723754926549623, "grad_norm": 2.4743316173553467, "learning_rate": 4.752173047574813e-06, "loss": 0.8623, "step": 9585 }, { "epoch": 0.572435208407978, "grad_norm": 2.0590875148773193, "learning_rate": 4.751509521597771e-06, "loss": 0.8436, "step": 9586 }, { "epoch": 0.5724949241609937, "grad_norm": 1.7364765405654907, "learning_rate": 4.750845995620729e-06, "loss": 0.8777, "step": 9587 }, { "epoch": 0.5725546399140093, "grad_norm": 7.426446914672852, "learning_rate": 4.750182469643687e-06, "loss": 0.8667, "step": 9588 }, { "epoch": 0.572614355667025, "grad_norm": 2.37436842918396, "learning_rate": 4.749518943666645e-06, "loss": 0.8174, "step": 9589 }, { "epoch": 0.5726740714200406, "grad_norm": 2.5508596897125244, "learning_rate": 4.7488554176896025e-06, "loss": 0.8379, "step": 9590 }, { "epoch": 0.5727337871730562, "grad_norm": 1.8013392686843872, "learning_rate": 4.748191891712561e-06, "loss": 0.8819, "step": 9591 }, { "epoch": 0.5727935029260719, "grad_norm": 2.571913957595825, "learning_rate": 4.747528365735519e-06, "loss": 0.8585, "step": 9592 }, { "epoch": 0.5728532186790876, "grad_norm": 2.358675003051758, "learning_rate": 4.746864839758477e-06, "loss": 0.8678, "step": 9593 }, { "epoch": 0.5729129344321032, "grad_norm": 2.542837619781494, "learning_rate": 4.746201313781435e-06, "loss": 0.8257, "step": 9594 }, { "epoch": 0.5729726501851188, "grad_norm": 2.110724687576294, "learning_rate": 4.745537787804393e-06, "loss": 0.8563, "step": 9595 }, { "epoch": 0.5730323659381344, "grad_norm": 1.8730648756027222, "learning_rate": 4.744874261827351e-06, "loss": 0.8088, "step": 9596 }, { "epoch": 0.5730920816911501, "grad_norm": 2.4436893463134766, "learning_rate": 4.744210735850308e-06, "loss": 0.8329, "step": 9597 }, { "epoch": 0.5731517974441658, "grad_norm": 2.27907657623291, "learning_rate": 4.743547209873267e-06, "loss": 0.8178, "step": 9598 }, { "epoch": 0.5732115131971814, "grad_norm": 4.390213966369629, "learning_rate": 4.742883683896225e-06, "loss": 0.8161, "step": 9599 }, { "epoch": 0.5732712289501971, "grad_norm": 2.0235800743103027, "learning_rate": 4.742220157919183e-06, "loss": 0.8534, "step": 9600 }, { "epoch": 0.5732712289501971, "eval_text_loss": 0.9123362898826599, "eval_text_runtime": 15.3276, "eval_text_samples_per_second": 260.967, "eval_text_steps_per_second": 0.522, "step": 9600 }, { "epoch": 0.5732712289501971, "eval_image_loss": 0.6187842488288879, "eval_image_runtime": 4.9353, "eval_image_samples_per_second": 810.484, "eval_image_steps_per_second": 1.621, "step": 9600 }, { "epoch": 0.5732712289501971, "eval_video_loss": 1.0579122304916382, "eval_video_runtime": 77.2475, "eval_video_samples_per_second": 51.782, "eval_video_steps_per_second": 0.104, "step": 9600 }, { "epoch": 0.5733309447032127, "grad_norm": 2.2867329120635986, "learning_rate": 4.741556631942141e-06, "loss": 0.8737, "step": 9601 }, { "epoch": 0.5733906604562283, "grad_norm": 4.6061110496521, "learning_rate": 4.740893105965099e-06, "loss": 0.8691, "step": 9602 }, { "epoch": 0.573450376209244, "grad_norm": 3.5023674964904785, "learning_rate": 4.740229579988057e-06, "loss": 0.8709, "step": 9603 }, { "epoch": 0.5735100919622597, "grad_norm": 2.0733494758605957, "learning_rate": 4.739566054011015e-06, "loss": 0.837, "step": 9604 }, { "epoch": 0.5735698077152753, "grad_norm": 1.8740699291229248, "learning_rate": 4.738902528033973e-06, "loss": 0.8194, "step": 9605 }, { "epoch": 0.5736295234682909, "grad_norm": 2.3600687980651855, "learning_rate": 4.73823900205693e-06, "loss": 0.8492, "step": 9606 }, { "epoch": 0.5736892392213065, "grad_norm": 2.1338114738464355, "learning_rate": 4.7375754760798885e-06, "loss": 0.8437, "step": 9607 }, { "epoch": 0.5737489549743222, "grad_norm": 1.8041093349456787, "learning_rate": 4.7369119501028474e-06, "loss": 0.8237, "step": 9608 }, { "epoch": 0.5738086707273379, "grad_norm": 1.7872092723846436, "learning_rate": 4.736248424125805e-06, "loss": 0.8456, "step": 9609 }, { "epoch": 0.5738683864803535, "grad_norm": 2.2710154056549072, "learning_rate": 4.735584898148763e-06, "loss": 0.8537, "step": 9610 }, { "epoch": 0.5739281022333692, "grad_norm": 2.4793155193328857, "learning_rate": 4.734921372171721e-06, "loss": 0.8769, "step": 9611 }, { "epoch": 0.5739878179863848, "grad_norm": 1.8090776205062866, "learning_rate": 4.734257846194679e-06, "loss": 0.8554, "step": 9612 }, { "epoch": 0.5740475337394004, "grad_norm": 5.093916893005371, "learning_rate": 4.733594320217637e-06, "loss": 0.8355, "step": 9613 }, { "epoch": 0.5741072494924161, "grad_norm": 2.4134912490844727, "learning_rate": 4.732930794240595e-06, "loss": 0.8088, "step": 9614 }, { "epoch": 0.5741669652454318, "grad_norm": 1.7852412462234497, "learning_rate": 4.732267268263553e-06, "loss": 0.8541, "step": 9615 }, { "epoch": 0.5742266809984474, "grad_norm": 2.970414876937866, "learning_rate": 4.7316037422865105e-06, "loss": 0.8425, "step": 9616 }, { "epoch": 0.5742863967514631, "grad_norm": 1.9497779607772827, "learning_rate": 4.730940216309469e-06, "loss": 0.8923, "step": 9617 }, { "epoch": 0.5743461125044786, "grad_norm": 2.1177709102630615, "learning_rate": 4.730276690332427e-06, "loss": 0.8409, "step": 9618 }, { "epoch": 0.5744058282574943, "grad_norm": 2.1625349521636963, "learning_rate": 4.729613164355385e-06, "loss": 0.8284, "step": 9619 }, { "epoch": 0.57446554401051, "grad_norm": 1.8980647325515747, "learning_rate": 4.728949638378343e-06, "loss": 0.9003, "step": 9620 }, { "epoch": 0.5745252597635256, "grad_norm": 1.7769392728805542, "learning_rate": 4.728286112401301e-06, "loss": 0.8345, "step": 9621 }, { "epoch": 0.5745849755165413, "grad_norm": 2.2684454917907715, "learning_rate": 4.727622586424258e-06, "loss": 0.8328, "step": 9622 }, { "epoch": 0.5746446912695569, "grad_norm": 1.7985825538635254, "learning_rate": 4.726959060447217e-06, "loss": 0.855, "step": 9623 }, { "epoch": 0.5747044070225725, "grad_norm": 2.574204683303833, "learning_rate": 4.726295534470175e-06, "loss": 0.8706, "step": 9624 }, { "epoch": 0.5747641227755882, "grad_norm": 1.6204386949539185, "learning_rate": 4.725632008493133e-06, "loss": 0.8195, "step": 9625 }, { "epoch": 0.5748238385286039, "grad_norm": 2.2616822719573975, "learning_rate": 4.724968482516091e-06, "loss": 0.8537, "step": 9626 }, { "epoch": 0.5748835542816195, "grad_norm": 1.7789970636367798, "learning_rate": 4.724304956539049e-06, "loss": 0.8786, "step": 9627 }, { "epoch": 0.5749432700346352, "grad_norm": 1.9727766513824463, "learning_rate": 4.723641430562007e-06, "loss": 0.8769, "step": 9628 }, { "epoch": 0.5750029857876507, "grad_norm": 2.148481607437134, "learning_rate": 4.722977904584965e-06, "loss": 0.8665, "step": 9629 }, { "epoch": 0.5750627015406664, "grad_norm": 2.960862398147583, "learning_rate": 4.722314378607923e-06, "loss": 0.8758, "step": 9630 }, { "epoch": 0.5751224172936821, "grad_norm": 2.013500452041626, "learning_rate": 4.72165085263088e-06, "loss": 0.8151, "step": 9631 }, { "epoch": 0.5751821330466977, "grad_norm": 2.194687604904175, "learning_rate": 4.7209873266538385e-06, "loss": 0.8642, "step": 9632 }, { "epoch": 0.5752418487997134, "grad_norm": 2.227696180343628, "learning_rate": 4.720323800676797e-06, "loss": 0.8809, "step": 9633 }, { "epoch": 0.575301564552729, "grad_norm": 4.908677577972412, "learning_rate": 4.719660274699755e-06, "loss": 0.8436, "step": 9634 }, { "epoch": 0.5753612803057446, "grad_norm": 2.035874366760254, "learning_rate": 4.718996748722713e-06, "loss": 0.821, "step": 9635 }, { "epoch": 0.5754209960587603, "grad_norm": 1.6133359670639038, "learning_rate": 4.718333222745671e-06, "loss": 0.8192, "step": 9636 }, { "epoch": 0.575480711811776, "grad_norm": 2.056939125061035, "learning_rate": 4.717669696768629e-06, "loss": 0.8371, "step": 9637 }, { "epoch": 0.5755404275647916, "grad_norm": 1.922611117362976, "learning_rate": 4.717006170791587e-06, "loss": 0.8249, "step": 9638 }, { "epoch": 0.5756001433178073, "grad_norm": 3.1223602294921875, "learning_rate": 4.716342644814545e-06, "loss": 0.8212, "step": 9639 }, { "epoch": 0.5756598590708228, "grad_norm": 1.8363486528396606, "learning_rate": 4.715679118837503e-06, "loss": 0.8731, "step": 9640 }, { "epoch": 0.5757195748238385, "grad_norm": 2.817385196685791, "learning_rate": 4.7150155928604605e-06, "loss": 0.8355, "step": 9641 }, { "epoch": 0.5757792905768542, "grad_norm": 2.9196999073028564, "learning_rate": 4.714352066883419e-06, "loss": 0.8642, "step": 9642 }, { "epoch": 0.5758390063298698, "grad_norm": 2.339099645614624, "learning_rate": 4.713688540906377e-06, "loss": 0.8326, "step": 9643 }, { "epoch": 0.5758987220828855, "grad_norm": 3.2129290103912354, "learning_rate": 4.713025014929335e-06, "loss": 0.8596, "step": 9644 }, { "epoch": 0.5759584378359011, "grad_norm": 1.971992015838623, "learning_rate": 4.712361488952293e-06, "loss": 0.865, "step": 9645 }, { "epoch": 0.5760181535889167, "grad_norm": 4.3233513832092285, "learning_rate": 4.711697962975251e-06, "loss": 0.8289, "step": 9646 }, { "epoch": 0.5760778693419324, "grad_norm": 3.7819724082946777, "learning_rate": 4.711034436998208e-06, "loss": 0.8492, "step": 9647 }, { "epoch": 0.5761375850949481, "grad_norm": 2.08652400970459, "learning_rate": 4.710370911021167e-06, "loss": 0.8576, "step": 9648 }, { "epoch": 0.5761973008479637, "grad_norm": 2.319718837738037, "learning_rate": 4.709707385044125e-06, "loss": 0.8257, "step": 9649 }, { "epoch": 0.5762570166009794, "grad_norm": 3.003934621810913, "learning_rate": 4.7090438590670826e-06, "loss": 0.8403, "step": 9650 }, { "epoch": 0.5763167323539949, "grad_norm": 2.8210175037384033, "learning_rate": 4.708380333090041e-06, "loss": 0.849, "step": 9651 }, { "epoch": 0.5763764481070106, "grad_norm": 2.136664628982544, "learning_rate": 4.707716807112999e-06, "loss": 0.8581, "step": 9652 }, { "epoch": 0.5764361638600263, "grad_norm": 4.441267490386963, "learning_rate": 4.707053281135957e-06, "loss": 0.875, "step": 9653 }, { "epoch": 0.5764958796130419, "grad_norm": 2.9552061557769775, "learning_rate": 4.706389755158915e-06, "loss": 0.8605, "step": 9654 }, { "epoch": 0.5765555953660576, "grad_norm": 3.4217379093170166, "learning_rate": 4.705726229181873e-06, "loss": 0.8866, "step": 9655 }, { "epoch": 0.5766153111190732, "grad_norm": 2.1514601707458496, "learning_rate": 4.70506270320483e-06, "loss": 0.8796, "step": 9656 }, { "epoch": 0.5766750268720888, "grad_norm": 2.300152063369751, "learning_rate": 4.7043991772277884e-06, "loss": 0.8496, "step": 9657 }, { "epoch": 0.5767347426251045, "grad_norm": 1.729878544807434, "learning_rate": 4.703735651250747e-06, "loss": 0.8358, "step": 9658 }, { "epoch": 0.5767944583781202, "grad_norm": 2.25055193901062, "learning_rate": 4.703072125273705e-06, "loss": 0.8601, "step": 9659 }, { "epoch": 0.5768541741311358, "grad_norm": 2.1556179523468018, "learning_rate": 4.702408599296663e-06, "loss": 0.8735, "step": 9660 }, { "epoch": 0.5769138898841515, "grad_norm": 3.3266561031341553, "learning_rate": 4.701745073319621e-06, "loss": 0.8964, "step": 9661 }, { "epoch": 0.576973605637167, "grad_norm": 4.009665489196777, "learning_rate": 4.701081547342579e-06, "loss": 0.8672, "step": 9662 }, { "epoch": 0.5770333213901827, "grad_norm": 1.7491750717163086, "learning_rate": 4.700418021365537e-06, "loss": 0.83, "step": 9663 }, { "epoch": 0.5770930371431984, "grad_norm": 2.024280071258545, "learning_rate": 4.699754495388495e-06, "loss": 0.8598, "step": 9664 }, { "epoch": 0.577152752896214, "grad_norm": 2.1901051998138428, "learning_rate": 4.699090969411453e-06, "loss": 0.8586, "step": 9665 }, { "epoch": 0.5772124686492297, "grad_norm": 4.9577741622924805, "learning_rate": 4.6984274434344105e-06, "loss": 0.8634, "step": 9666 }, { "epoch": 0.5772721844022453, "grad_norm": 2.245246171951294, "learning_rate": 4.697763917457369e-06, "loss": 0.8272, "step": 9667 }, { "epoch": 0.5773319001552609, "grad_norm": 2.3911874294281006, "learning_rate": 4.697100391480327e-06, "loss": 0.8283, "step": 9668 }, { "epoch": 0.5773916159082766, "grad_norm": 3.645880937576294, "learning_rate": 4.696436865503285e-06, "loss": 0.848, "step": 9669 }, { "epoch": 0.5774513316612923, "grad_norm": 2.6474034786224365, "learning_rate": 4.695773339526243e-06, "loss": 0.8252, "step": 9670 }, { "epoch": 0.5775110474143079, "grad_norm": 1.6819250583648682, "learning_rate": 4.695109813549201e-06, "loss": 0.8221, "step": 9671 }, { "epoch": 0.5775707631673236, "grad_norm": 1.5659717321395874, "learning_rate": 4.694446287572158e-06, "loss": 0.8557, "step": 9672 }, { "epoch": 0.5776304789203391, "grad_norm": 2.2039361000061035, "learning_rate": 4.693782761595117e-06, "loss": 0.8354, "step": 9673 }, { "epoch": 0.5776901946733548, "grad_norm": 4.08158540725708, "learning_rate": 4.693119235618075e-06, "loss": 0.839, "step": 9674 }, { "epoch": 0.5777499104263705, "grad_norm": 3.2958121299743652, "learning_rate": 4.6924557096410325e-06, "loss": 0.866, "step": 9675 }, { "epoch": 0.5778096261793861, "grad_norm": 3.2506186962127686, "learning_rate": 4.691792183663991e-06, "loss": 0.8091, "step": 9676 }, { "epoch": 0.5778693419324018, "grad_norm": 1.8421286344528198, "learning_rate": 4.691128657686949e-06, "loss": 0.8627, "step": 9677 }, { "epoch": 0.5779290576854174, "grad_norm": 2.16349196434021, "learning_rate": 4.690465131709907e-06, "loss": 0.8628, "step": 9678 }, { "epoch": 0.577988773438433, "grad_norm": 2.33404803276062, "learning_rate": 4.689801605732865e-06, "loss": 0.824, "step": 9679 }, { "epoch": 0.5780484891914487, "grad_norm": 2.675239086151123, "learning_rate": 4.689138079755823e-06, "loss": 0.8196, "step": 9680 }, { "epoch": 0.5781082049444644, "grad_norm": 4.224271774291992, "learning_rate": 4.68847455377878e-06, "loss": 0.8872, "step": 9681 }, { "epoch": 0.57816792069748, "grad_norm": 2.897153377532959, "learning_rate": 4.687811027801738e-06, "loss": 0.8588, "step": 9682 }, { "epoch": 0.5782276364504957, "grad_norm": 1.7230513095855713, "learning_rate": 4.687147501824697e-06, "loss": 0.8553, "step": 9683 }, { "epoch": 0.5782873522035112, "grad_norm": 1.8357850313186646, "learning_rate": 4.686483975847655e-06, "loss": 0.8496, "step": 9684 }, { "epoch": 0.5783470679565269, "grad_norm": 1.949015736579895, "learning_rate": 4.685820449870613e-06, "loss": 0.854, "step": 9685 }, { "epoch": 0.5784067837095426, "grad_norm": 2.15169358253479, "learning_rate": 4.685156923893571e-06, "loss": 0.8712, "step": 9686 }, { "epoch": 0.5784664994625582, "grad_norm": 1.6775522232055664, "learning_rate": 4.684493397916529e-06, "loss": 0.8293, "step": 9687 }, { "epoch": 0.5785262152155739, "grad_norm": 2.6650195121765137, "learning_rate": 4.683829871939487e-06, "loss": 0.831, "step": 9688 }, { "epoch": 0.5785859309685896, "grad_norm": 1.655709981918335, "learning_rate": 4.683166345962445e-06, "loss": 0.8647, "step": 9689 }, { "epoch": 0.5786456467216051, "grad_norm": 2.8024911880493164, "learning_rate": 4.682502819985403e-06, "loss": 0.8325, "step": 9690 }, { "epoch": 0.5787053624746208, "grad_norm": 1.819495677947998, "learning_rate": 4.6818392940083605e-06, "loss": 0.8649, "step": 9691 }, { "epoch": 0.5787650782276365, "grad_norm": 1.8526265621185303, "learning_rate": 4.6811757680313186e-06, "loss": 0.8512, "step": 9692 }, { "epoch": 0.5788247939806521, "grad_norm": 1.9447035789489746, "learning_rate": 4.680512242054277e-06, "loss": 0.8659, "step": 9693 }, { "epoch": 0.5788845097336678, "grad_norm": 3.197871446609497, "learning_rate": 4.679848716077235e-06, "loss": 0.8512, "step": 9694 }, { "epoch": 0.5789442254866833, "grad_norm": 2.4402618408203125, "learning_rate": 4.679185190100193e-06, "loss": 0.8541, "step": 9695 }, { "epoch": 0.579003941239699, "grad_norm": 2.611494779586792, "learning_rate": 4.678521664123151e-06, "loss": 0.8339, "step": 9696 }, { "epoch": 0.5790636569927147, "grad_norm": 2.622999668121338, "learning_rate": 4.677858138146108e-06, "loss": 0.8312, "step": 9697 }, { "epoch": 0.5791233727457303, "grad_norm": 1.7175668478012085, "learning_rate": 4.677194612169067e-06, "loss": 0.8473, "step": 9698 }, { "epoch": 0.579183088498746, "grad_norm": 1.9893646240234375, "learning_rate": 4.676531086192025e-06, "loss": 0.8221, "step": 9699 }, { "epoch": 0.5792428042517617, "grad_norm": 2.1037631034851074, "learning_rate": 4.6758675602149825e-06, "loss": 0.8549, "step": 9700 }, { "epoch": 0.5793025200047772, "grad_norm": 2.2782652378082275, "learning_rate": 4.675204034237941e-06, "loss": 0.85, "step": 9701 }, { "epoch": 0.5793622357577929, "grad_norm": 1.4848319292068481, "learning_rate": 4.674540508260899e-06, "loss": 0.842, "step": 9702 }, { "epoch": 0.5794219515108086, "grad_norm": 1.7550103664398193, "learning_rate": 4.673876982283857e-06, "loss": 0.8877, "step": 9703 }, { "epoch": 0.5794816672638242, "grad_norm": 3.0463359355926514, "learning_rate": 4.673213456306815e-06, "loss": 0.8612, "step": 9704 }, { "epoch": 0.5795413830168399, "grad_norm": 1.7779392004013062, "learning_rate": 4.672549930329773e-06, "loss": 0.8424, "step": 9705 }, { "epoch": 0.5796010987698554, "grad_norm": 1.8066340684890747, "learning_rate": 4.67188640435273e-06, "loss": 0.8203, "step": 9706 }, { "epoch": 0.5796608145228711, "grad_norm": 2.5625476837158203, "learning_rate": 4.671222878375688e-06, "loss": 0.8453, "step": 9707 }, { "epoch": 0.5797205302758868, "grad_norm": 2.677004098892212, "learning_rate": 4.670559352398647e-06, "loss": 0.8616, "step": 9708 }, { "epoch": 0.5797802460289024, "grad_norm": 3.0512404441833496, "learning_rate": 4.6698958264216046e-06, "loss": 0.8427, "step": 9709 }, { "epoch": 0.5798399617819181, "grad_norm": 2.157684087753296, "learning_rate": 4.669232300444563e-06, "loss": 0.8708, "step": 9710 }, { "epoch": 0.5798996775349338, "grad_norm": 2.2778892517089844, "learning_rate": 4.668568774467521e-06, "loss": 0.8445, "step": 9711 }, { "epoch": 0.5799593932879493, "grad_norm": 1.7318205833435059, "learning_rate": 4.667905248490479e-06, "loss": 0.8104, "step": 9712 }, { "epoch": 0.580019109040965, "grad_norm": 9.415493965148926, "learning_rate": 4.667241722513437e-06, "loss": 0.8691, "step": 9713 }, { "epoch": 0.5800788247939807, "grad_norm": 2.6666946411132812, "learning_rate": 4.666578196536395e-06, "loss": 0.8605, "step": 9714 }, { "epoch": 0.5801385405469963, "grad_norm": 2.5565268993377686, "learning_rate": 4.665914670559353e-06, "loss": 0.8599, "step": 9715 }, { "epoch": 0.580198256300012, "grad_norm": 3.1307308673858643, "learning_rate": 4.6652511445823104e-06, "loss": 0.8095, "step": 9716 }, { "epoch": 0.5802579720530275, "grad_norm": 1.9920707941055298, "learning_rate": 4.6645876186052685e-06, "loss": 0.8647, "step": 9717 }, { "epoch": 0.5803176878060432, "grad_norm": 2.5919270515441895, "learning_rate": 4.663924092628227e-06, "loss": 0.8526, "step": 9718 }, { "epoch": 0.5803774035590589, "grad_norm": 2.9275476932525635, "learning_rate": 4.663260566651185e-06, "loss": 0.8721, "step": 9719 }, { "epoch": 0.5804371193120745, "grad_norm": 1.9598912000656128, "learning_rate": 4.662597040674143e-06, "loss": 0.8756, "step": 9720 }, { "epoch": 0.5804968350650902, "grad_norm": 2.0718278884887695, "learning_rate": 4.661933514697101e-06, "loss": 0.873, "step": 9721 }, { "epoch": 0.5805565508181059, "grad_norm": 3.244798421859741, "learning_rate": 4.661269988720058e-06, "loss": 0.8543, "step": 9722 }, { "epoch": 0.5806162665711214, "grad_norm": 6.4575114250183105, "learning_rate": 4.660606462743017e-06, "loss": 0.8335, "step": 9723 }, { "epoch": 0.5806759823241371, "grad_norm": 2.610182762145996, "learning_rate": 4.659942936765975e-06, "loss": 0.8456, "step": 9724 }, { "epoch": 0.5807356980771528, "grad_norm": 1.732574224472046, "learning_rate": 4.6592794107889325e-06, "loss": 0.8294, "step": 9725 }, { "epoch": 0.5807954138301684, "grad_norm": 2.7637009620666504, "learning_rate": 4.658615884811891e-06, "loss": 0.8271, "step": 9726 }, { "epoch": 0.5808551295831841, "grad_norm": 2.466932535171509, "learning_rate": 4.657952358834849e-06, "loss": 0.8377, "step": 9727 }, { "epoch": 0.5809148453361996, "grad_norm": 3.8394880294799805, "learning_rate": 4.657288832857807e-06, "loss": 0.8509, "step": 9728 }, { "epoch": 0.5809745610892153, "grad_norm": 1.935686469078064, "learning_rate": 4.656625306880765e-06, "loss": 0.9083, "step": 9729 }, { "epoch": 0.581034276842231, "grad_norm": 1.56390380859375, "learning_rate": 4.655961780903723e-06, "loss": 0.8565, "step": 9730 }, { "epoch": 0.5810939925952466, "grad_norm": 3.553429126739502, "learning_rate": 4.65529825492668e-06, "loss": 0.8623, "step": 9731 }, { "epoch": 0.5811537083482623, "grad_norm": 2.2732841968536377, "learning_rate": 4.654634728949638e-06, "loss": 0.842, "step": 9732 }, { "epoch": 0.581213424101278, "grad_norm": 1.7859885692596436, "learning_rate": 4.653971202972597e-06, "loss": 0.8776, "step": 9733 }, { "epoch": 0.5812731398542935, "grad_norm": 1.9913522005081177, "learning_rate": 4.6533076769955545e-06, "loss": 0.8314, "step": 9734 }, { "epoch": 0.5813328556073092, "grad_norm": 3.1149046421051025, "learning_rate": 4.652644151018513e-06, "loss": 0.8914, "step": 9735 }, { "epoch": 0.5813925713603249, "grad_norm": 1.861186146736145, "learning_rate": 4.651980625041471e-06, "loss": 0.8778, "step": 9736 }, { "epoch": 0.5814522871133405, "grad_norm": 4.65740442276001, "learning_rate": 4.651317099064429e-06, "loss": 0.895, "step": 9737 }, { "epoch": 0.5815120028663562, "grad_norm": 2.148867130279541, "learning_rate": 4.650653573087387e-06, "loss": 0.8556, "step": 9738 }, { "epoch": 0.5815717186193717, "grad_norm": 1.8501278162002563, "learning_rate": 4.649990047110345e-06, "loss": 0.8573, "step": 9739 }, { "epoch": 0.5816314343723874, "grad_norm": 1.9185744524002075, "learning_rate": 4.649326521133303e-06, "loss": 0.8599, "step": 9740 }, { "epoch": 0.5816911501254031, "grad_norm": 2.1557974815368652, "learning_rate": 4.64866299515626e-06, "loss": 0.8064, "step": 9741 }, { "epoch": 0.5817508658784187, "grad_norm": 6.417911529541016, "learning_rate": 4.6479994691792185e-06, "loss": 0.8202, "step": 9742 }, { "epoch": 0.5818105816314344, "grad_norm": 2.0099732875823975, "learning_rate": 4.647335943202177e-06, "loss": 0.8395, "step": 9743 }, { "epoch": 0.5818702973844501, "grad_norm": 3.714869499206543, "learning_rate": 4.646672417225135e-06, "loss": 0.8705, "step": 9744 }, { "epoch": 0.5819300131374656, "grad_norm": 3.3482308387756348, "learning_rate": 4.646008891248093e-06, "loss": 0.8503, "step": 9745 }, { "epoch": 0.5819897288904813, "grad_norm": 2.0452077388763428, "learning_rate": 4.645345365271051e-06, "loss": 0.8376, "step": 9746 }, { "epoch": 0.582049444643497, "grad_norm": 1.963344693183899, "learning_rate": 4.644681839294008e-06, "loss": 0.8613, "step": 9747 }, { "epoch": 0.5821091603965126, "grad_norm": 2.77584171295166, "learning_rate": 4.644018313316967e-06, "loss": 0.8411, "step": 9748 }, { "epoch": 0.5821688761495283, "grad_norm": 2.452472686767578, "learning_rate": 4.643354787339925e-06, "loss": 0.8543, "step": 9749 }, { "epoch": 0.582228591902544, "grad_norm": 2.603233814239502, "learning_rate": 4.6426912613628825e-06, "loss": 0.8608, "step": 9750 }, { "epoch": 0.5822883076555595, "grad_norm": 3.996692657470703, "learning_rate": 4.6420277353858406e-06, "loss": 0.8029, "step": 9751 }, { "epoch": 0.5823480234085752, "grad_norm": 2.4115700721740723, "learning_rate": 4.641364209408799e-06, "loss": 0.8099, "step": 9752 }, { "epoch": 0.5824077391615908, "grad_norm": 2.0126616954803467, "learning_rate": 4.640700683431757e-06, "loss": 0.8438, "step": 9753 }, { "epoch": 0.5824674549146065, "grad_norm": 2.167245626449585, "learning_rate": 4.640037157454715e-06, "loss": 0.8198, "step": 9754 }, { "epoch": 0.5825271706676222, "grad_norm": 2.3687753677368164, "learning_rate": 4.639373631477673e-06, "loss": 0.8266, "step": 9755 }, { "epoch": 0.5825868864206377, "grad_norm": 2.0243008136749268, "learning_rate": 4.63871010550063e-06, "loss": 0.8728, "step": 9756 }, { "epoch": 0.5826466021736534, "grad_norm": 5.912217617034912, "learning_rate": 4.638046579523588e-06, "loss": 0.8305, "step": 9757 }, { "epoch": 0.582706317926669, "grad_norm": 15.926937103271484, "learning_rate": 4.637383053546547e-06, "loss": 0.8288, "step": 9758 }, { "epoch": 0.5827660336796847, "grad_norm": 2.458784580230713, "learning_rate": 4.6367195275695045e-06, "loss": 0.8719, "step": 9759 }, { "epoch": 0.5828257494327004, "grad_norm": 2.1488490104675293, "learning_rate": 4.636056001592463e-06, "loss": 0.8639, "step": 9760 }, { "epoch": 0.582885465185716, "grad_norm": 4.621596813201904, "learning_rate": 4.635392475615421e-06, "loss": 0.8607, "step": 9761 }, { "epoch": 0.5829451809387316, "grad_norm": 2.476743221282959, "learning_rate": 4.634728949638379e-06, "loss": 0.8775, "step": 9762 }, { "epoch": 0.5830048966917473, "grad_norm": 2.1836326122283936, "learning_rate": 4.634065423661337e-06, "loss": 0.8224, "step": 9763 }, { "epoch": 0.5830646124447629, "grad_norm": 2.6438682079315186, "learning_rate": 4.633401897684295e-06, "loss": 0.8664, "step": 9764 }, { "epoch": 0.5831243281977786, "grad_norm": 4.444763660430908, "learning_rate": 4.632738371707253e-06, "loss": 0.8666, "step": 9765 }, { "epoch": 0.5831840439507943, "grad_norm": 1.948673129081726, "learning_rate": 4.63207484573021e-06, "loss": 0.8429, "step": 9766 }, { "epoch": 0.5832437597038098, "grad_norm": 3.7210214138031006, "learning_rate": 4.6314113197531685e-06, "loss": 0.8197, "step": 9767 }, { "epoch": 0.5833034754568255, "grad_norm": 2.0644376277923584, "learning_rate": 4.6307477937761266e-06, "loss": 0.801, "step": 9768 }, { "epoch": 0.5833631912098411, "grad_norm": 1.810415267944336, "learning_rate": 4.630084267799085e-06, "loss": 0.8203, "step": 9769 }, { "epoch": 0.5834229069628568, "grad_norm": 2.3488471508026123, "learning_rate": 4.629420741822043e-06, "loss": 0.8402, "step": 9770 }, { "epoch": 0.5834826227158725, "grad_norm": 1.9028866291046143, "learning_rate": 4.628757215845001e-06, "loss": 0.8492, "step": 9771 }, { "epoch": 0.5835423384688881, "grad_norm": 1.8415727615356445, "learning_rate": 4.628093689867958e-06, "loss": 0.876, "step": 9772 }, { "epoch": 0.5836020542219037, "grad_norm": 4.606695652008057, "learning_rate": 4.627430163890917e-06, "loss": 0.8461, "step": 9773 }, { "epoch": 0.5836617699749194, "grad_norm": 2.1133711338043213, "learning_rate": 4.626766637913875e-06, "loss": 0.89, "step": 9774 }, { "epoch": 0.583721485727935, "grad_norm": 2.093114137649536, "learning_rate": 4.6261031119368324e-06, "loss": 0.8562, "step": 9775 }, { "epoch": 0.5837812014809507, "grad_norm": 4.13077974319458, "learning_rate": 4.6254395859597905e-06, "loss": 0.8611, "step": 9776 }, { "epoch": 0.5838409172339664, "grad_norm": 1.9410994052886963, "learning_rate": 4.624776059982749e-06, "loss": 0.8091, "step": 9777 }, { "epoch": 0.5839006329869819, "grad_norm": 3.514014482498169, "learning_rate": 4.624112534005707e-06, "loss": 0.8644, "step": 9778 }, { "epoch": 0.5839603487399976, "grad_norm": 4.2423095703125, "learning_rate": 4.623449008028665e-06, "loss": 0.8757, "step": 9779 }, { "epoch": 0.5840200644930132, "grad_norm": 1.7746368646621704, "learning_rate": 4.622785482051623e-06, "loss": 0.8668, "step": 9780 }, { "epoch": 0.5840797802460289, "grad_norm": 1.924607753753662, "learning_rate": 4.62212195607458e-06, "loss": 0.8229, "step": 9781 }, { "epoch": 0.5841394959990446, "grad_norm": 2.1866352558135986, "learning_rate": 4.621458430097538e-06, "loss": 0.8505, "step": 9782 }, { "epoch": 0.5841992117520602, "grad_norm": 2.133455276489258, "learning_rate": 4.620794904120497e-06, "loss": 0.8846, "step": 9783 }, { "epoch": 0.5842589275050758, "grad_norm": 2.564952850341797, "learning_rate": 4.6201313781434545e-06, "loss": 0.8625, "step": 9784 }, { "epoch": 0.5843186432580915, "grad_norm": 2.6276702880859375, "learning_rate": 4.619467852166413e-06, "loss": 0.8404, "step": 9785 }, { "epoch": 0.5843783590111071, "grad_norm": 1.6506381034851074, "learning_rate": 4.618804326189371e-06, "loss": 0.8296, "step": 9786 }, { "epoch": 0.5844380747641228, "grad_norm": 1.6723273992538452, "learning_rate": 4.618140800212329e-06, "loss": 0.8816, "step": 9787 }, { "epoch": 0.5844977905171385, "grad_norm": 2.1051111221313477, "learning_rate": 4.617477274235287e-06, "loss": 0.8614, "step": 9788 }, { "epoch": 0.584557506270154, "grad_norm": 2.193542957305908, "learning_rate": 4.616813748258245e-06, "loss": 0.8283, "step": 9789 }, { "epoch": 0.5846172220231697, "grad_norm": 2.113332986831665, "learning_rate": 4.616150222281203e-06, "loss": 0.8038, "step": 9790 }, { "epoch": 0.5846769377761853, "grad_norm": 2.6970255374908447, "learning_rate": 4.61548669630416e-06, "loss": 0.8527, "step": 9791 }, { "epoch": 0.584736653529201, "grad_norm": 1.8651899099349976, "learning_rate": 4.6148231703271184e-06, "loss": 0.8579, "step": 9792 }, { "epoch": 0.5847963692822167, "grad_norm": 1.8264355659484863, "learning_rate": 4.6141596443500765e-06, "loss": 0.8472, "step": 9793 }, { "epoch": 0.5848560850352323, "grad_norm": 3.432572841644287, "learning_rate": 4.613496118373035e-06, "loss": 0.8108, "step": 9794 }, { "epoch": 0.5849158007882479, "grad_norm": 3.149683952331543, "learning_rate": 4.612832592395993e-06, "loss": 0.8733, "step": 9795 }, { "epoch": 0.5849755165412636, "grad_norm": 1.9800047874450684, "learning_rate": 4.612169066418951e-06, "loss": 0.8098, "step": 9796 }, { "epoch": 0.5850352322942792, "grad_norm": 2.7550036907196045, "learning_rate": 4.611505540441908e-06, "loss": 0.8467, "step": 9797 }, { "epoch": 0.5850949480472949, "grad_norm": 2.0944290161132812, "learning_rate": 4.610842014464867e-06, "loss": 0.8468, "step": 9798 }, { "epoch": 0.5851546638003106, "grad_norm": 5.118463039398193, "learning_rate": 4.610178488487825e-06, "loss": 0.8303, "step": 9799 }, { "epoch": 0.5852143795533261, "grad_norm": 3.6018309593200684, "learning_rate": 4.609514962510782e-06, "loss": 0.8348, "step": 9800 }, { "epoch": 0.5852143795533261, "eval_text_loss": 0.9076173901557922, "eval_text_runtime": 15.1729, "eval_text_samples_per_second": 263.629, "eval_text_steps_per_second": 0.527, "step": 9800 }, { "epoch": 0.5852143795533261, "eval_image_loss": 0.6163735389709473, "eval_image_runtime": 5.0275, "eval_image_samples_per_second": 795.632, "eval_image_steps_per_second": 1.591, "step": 9800 }, { "epoch": 0.5852143795533261, "eval_video_loss": 1.055126667022705, "eval_video_runtime": 76.6371, "eval_video_samples_per_second": 52.194, "eval_video_steps_per_second": 0.104, "step": 9800 }, { "epoch": 0.5852740953063418, "grad_norm": 3.394289016723633, "learning_rate": 4.6088514365337405e-06, "loss": 0.8707, "step": 9801 }, { "epoch": 0.5853338110593574, "grad_norm": 2.6555609703063965, "learning_rate": 4.608187910556699e-06, "loss": 0.8329, "step": 9802 }, { "epoch": 0.5853935268123731, "grad_norm": 1.7916792631149292, "learning_rate": 4.607524384579657e-06, "loss": 0.8015, "step": 9803 }, { "epoch": 0.5854532425653888, "grad_norm": 1.9857149124145508, "learning_rate": 4.606860858602615e-06, "loss": 0.8403, "step": 9804 }, { "epoch": 0.5855129583184044, "grad_norm": 1.7006531953811646, "learning_rate": 4.606197332625573e-06, "loss": 0.8515, "step": 9805 }, { "epoch": 0.58557267407142, "grad_norm": 2.075517416000366, "learning_rate": 4.60553380664853e-06, "loss": 0.8734, "step": 9806 }, { "epoch": 0.5856323898244357, "grad_norm": 2.048314332962036, "learning_rate": 4.604870280671488e-06, "loss": 0.8763, "step": 9807 }, { "epoch": 0.5856921055774513, "grad_norm": 3.8433022499084473, "learning_rate": 4.604206754694447e-06, "loss": 0.8433, "step": 9808 }, { "epoch": 0.585751821330467, "grad_norm": 8.711873054504395, "learning_rate": 4.6035432287174045e-06, "loss": 0.8475, "step": 9809 }, { "epoch": 0.5858115370834827, "grad_norm": 1.7283726930618286, "learning_rate": 4.6028797027403626e-06, "loss": 0.8309, "step": 9810 }, { "epoch": 0.5858712528364982, "grad_norm": 2.3291282653808594, "learning_rate": 4.602216176763321e-06, "loss": 0.7841, "step": 9811 }, { "epoch": 0.5859309685895139, "grad_norm": 3.468174934387207, "learning_rate": 4.601552650786279e-06, "loss": 0.8202, "step": 9812 }, { "epoch": 0.5859906843425295, "grad_norm": 2.0143160820007324, "learning_rate": 4.600889124809237e-06, "loss": 0.8581, "step": 9813 }, { "epoch": 0.5860504000955452, "grad_norm": 2.6504902839660645, "learning_rate": 4.600225598832195e-06, "loss": 0.8332, "step": 9814 }, { "epoch": 0.5861101158485609, "grad_norm": 2.3900015354156494, "learning_rate": 4.599562072855153e-06, "loss": 0.8565, "step": 9815 }, { "epoch": 0.5861698316015765, "grad_norm": 2.8762118816375732, "learning_rate": 4.59889854687811e-06, "loss": 0.9045, "step": 9816 }, { "epoch": 0.5862295473545921, "grad_norm": 1.9001286029815674, "learning_rate": 4.598235020901068e-06, "loss": 0.8513, "step": 9817 }, { "epoch": 0.5862892631076078, "grad_norm": 2.395024061203003, "learning_rate": 4.5975714949240265e-06, "loss": 0.8334, "step": 9818 }, { "epoch": 0.5863489788606234, "grad_norm": 2.754429340362549, "learning_rate": 4.596907968946985e-06, "loss": 0.8756, "step": 9819 }, { "epoch": 0.5864086946136391, "grad_norm": 2.325119733810425, "learning_rate": 4.596244442969943e-06, "loss": 0.8474, "step": 9820 }, { "epoch": 0.5864684103666548, "grad_norm": 1.9855787754058838, "learning_rate": 4.595580916992901e-06, "loss": 0.8709, "step": 9821 }, { "epoch": 0.5865281261196704, "grad_norm": 1.9259202480316162, "learning_rate": 4.594917391015858e-06, "loss": 0.8247, "step": 9822 }, { "epoch": 0.586587841872686, "grad_norm": 2.472327470779419, "learning_rate": 4.594253865038817e-06, "loss": 0.856, "step": 9823 }, { "epoch": 0.5866475576257016, "grad_norm": 1.700358271598816, "learning_rate": 4.593590339061775e-06, "loss": 0.8483, "step": 9824 }, { "epoch": 0.5867072733787173, "grad_norm": 1.7327522039413452, "learning_rate": 4.592926813084732e-06, "loss": 0.8778, "step": 9825 }, { "epoch": 0.586766989131733, "grad_norm": 2.006208896636963, "learning_rate": 4.5922632871076905e-06, "loss": 0.8381, "step": 9826 }, { "epoch": 0.5868267048847486, "grad_norm": 3.0724165439605713, "learning_rate": 4.5915997611306486e-06, "loss": 0.8041, "step": 9827 }, { "epoch": 0.5868864206377642, "grad_norm": 1.9915515184402466, "learning_rate": 4.590936235153607e-06, "loss": 0.8561, "step": 9828 }, { "epoch": 0.5869461363907799, "grad_norm": 6.047802448272705, "learning_rate": 4.590272709176565e-06, "loss": 0.8228, "step": 9829 }, { "epoch": 0.5870058521437955, "grad_norm": 2.0911738872528076, "learning_rate": 4.589609183199523e-06, "loss": 0.8617, "step": 9830 }, { "epoch": 0.5870655678968112, "grad_norm": 2.1226837635040283, "learning_rate": 4.58894565722248e-06, "loss": 0.8481, "step": 9831 }, { "epoch": 0.5871252836498269, "grad_norm": 2.2057876586914062, "learning_rate": 4.588282131245438e-06, "loss": 0.8804, "step": 9832 }, { "epoch": 0.5871849994028425, "grad_norm": 1.6129764318466187, "learning_rate": 4.587618605268397e-06, "loss": 0.8359, "step": 9833 }, { "epoch": 0.5872447151558581, "grad_norm": 2.683440685272217, "learning_rate": 4.5869550792913544e-06, "loss": 0.8416, "step": 9834 }, { "epoch": 0.5873044309088737, "grad_norm": 2.2553508281707764, "learning_rate": 4.5862915533143125e-06, "loss": 0.8548, "step": 9835 }, { "epoch": 0.5873641466618894, "grad_norm": 2.8372645378112793, "learning_rate": 4.585628027337271e-06, "loss": 0.8728, "step": 9836 }, { "epoch": 0.5874238624149051, "grad_norm": 1.9077948331832886, "learning_rate": 4.584964501360229e-06, "loss": 0.8692, "step": 9837 }, { "epoch": 0.5874835781679207, "grad_norm": 2.3099489212036133, "learning_rate": 4.584300975383187e-06, "loss": 0.8227, "step": 9838 }, { "epoch": 0.5875432939209363, "grad_norm": 2.029076099395752, "learning_rate": 4.583637449406145e-06, "loss": 0.7823, "step": 9839 }, { "epoch": 0.587603009673952, "grad_norm": 2.791740894317627, "learning_rate": 4.582973923429103e-06, "loss": 0.8611, "step": 9840 }, { "epoch": 0.5876627254269676, "grad_norm": 4.765145778656006, "learning_rate": 4.58231039745206e-06, "loss": 0.8497, "step": 9841 }, { "epoch": 0.5877224411799833, "grad_norm": 1.7896909713745117, "learning_rate": 4.581646871475018e-06, "loss": 0.833, "step": 9842 }, { "epoch": 0.587782156932999, "grad_norm": 1.6204431056976318, "learning_rate": 4.5809833454979765e-06, "loss": 0.8459, "step": 9843 }, { "epoch": 0.5878418726860146, "grad_norm": 2.37488055229187, "learning_rate": 4.580319819520935e-06, "loss": 0.8139, "step": 9844 }, { "epoch": 0.5879015884390302, "grad_norm": 1.9158570766448975, "learning_rate": 4.579656293543893e-06, "loss": 0.8626, "step": 9845 }, { "epoch": 0.5879613041920458, "grad_norm": 2.1202359199523926, "learning_rate": 4.578992767566851e-06, "loss": 0.8471, "step": 9846 }, { "epoch": 0.5880210199450615, "grad_norm": 2.6596577167510986, "learning_rate": 4.578329241589808e-06, "loss": 0.8276, "step": 9847 }, { "epoch": 0.5880807356980772, "grad_norm": 1.8584697246551514, "learning_rate": 4.577665715612767e-06, "loss": 0.8796, "step": 9848 }, { "epoch": 0.5881404514510928, "grad_norm": 3.27592134475708, "learning_rate": 4.577002189635725e-06, "loss": 0.8125, "step": 9849 }, { "epoch": 0.5882001672041084, "grad_norm": 1.8605836629867554, "learning_rate": 4.576338663658682e-06, "loss": 0.8487, "step": 9850 }, { "epoch": 0.5882598829571241, "grad_norm": 1.960606575012207, "learning_rate": 4.5756751376816404e-06, "loss": 0.841, "step": 9851 }, { "epoch": 0.5883195987101397, "grad_norm": 2.660177707672119, "learning_rate": 4.5750116117045985e-06, "loss": 0.8254, "step": 9852 }, { "epoch": 0.5883793144631554, "grad_norm": 2.4755802154541016, "learning_rate": 4.574348085727557e-06, "loss": 0.8493, "step": 9853 }, { "epoch": 0.588439030216171, "grad_norm": 1.8714869022369385, "learning_rate": 4.573684559750515e-06, "loss": 0.8384, "step": 9854 }, { "epoch": 0.5884987459691867, "grad_norm": 1.7931123971939087, "learning_rate": 4.573021033773473e-06, "loss": 0.8247, "step": 9855 }, { "epoch": 0.5885584617222023, "grad_norm": 3.1745986938476562, "learning_rate": 4.57235750779643e-06, "loss": 0.8919, "step": 9856 }, { "epoch": 0.5886181774752179, "grad_norm": 2.136906862258911, "learning_rate": 4.571693981819388e-06, "loss": 0.8625, "step": 9857 }, { "epoch": 0.5886778932282336, "grad_norm": 4.172841548919678, "learning_rate": 4.571030455842347e-06, "loss": 0.8569, "step": 9858 }, { "epoch": 0.5887376089812493, "grad_norm": 2.0366063117980957, "learning_rate": 4.570366929865304e-06, "loss": 0.8302, "step": 9859 }, { "epoch": 0.5887973247342649, "grad_norm": 1.9296919107437134, "learning_rate": 4.5697034038882625e-06, "loss": 0.854, "step": 9860 }, { "epoch": 0.5888570404872805, "grad_norm": 2.046773910522461, "learning_rate": 4.569039877911221e-06, "loss": 0.8291, "step": 9861 }, { "epoch": 0.5889167562402962, "grad_norm": 2.188145875930786, "learning_rate": 4.568376351934179e-06, "loss": 0.8194, "step": 9862 }, { "epoch": 0.5889764719933118, "grad_norm": 1.5648908615112305, "learning_rate": 4.567712825957137e-06, "loss": 0.829, "step": 9863 }, { "epoch": 0.5890361877463275, "grad_norm": 2.4065210819244385, "learning_rate": 4.567049299980095e-06, "loss": 0.8976, "step": 9864 }, { "epoch": 0.5890959034993432, "grad_norm": 2.5115907192230225, "learning_rate": 4.566385774003053e-06, "loss": 0.8465, "step": 9865 }, { "epoch": 0.5891556192523588, "grad_norm": 2.299729108810425, "learning_rate": 4.56572224802601e-06, "loss": 0.8357, "step": 9866 }, { "epoch": 0.5892153350053744, "grad_norm": 1.78577721118927, "learning_rate": 4.565058722048968e-06, "loss": 0.8505, "step": 9867 }, { "epoch": 0.58927505075839, "grad_norm": 2.7561142444610596, "learning_rate": 4.5643951960719265e-06, "loss": 0.8497, "step": 9868 }, { "epoch": 0.5893347665114057, "grad_norm": 2.0895731449127197, "learning_rate": 4.5637316700948846e-06, "loss": 0.8617, "step": 9869 }, { "epoch": 0.5893944822644214, "grad_norm": 1.7913788557052612, "learning_rate": 4.563068144117843e-06, "loss": 0.8752, "step": 9870 }, { "epoch": 0.589454198017437, "grad_norm": 1.9187259674072266, "learning_rate": 4.562404618140801e-06, "loss": 0.8741, "step": 9871 }, { "epoch": 0.5895139137704526, "grad_norm": 2.3346781730651855, "learning_rate": 4.561741092163758e-06, "loss": 0.7883, "step": 9872 }, { "epoch": 0.5895736295234683, "grad_norm": 2.749715566635132, "learning_rate": 4.561077566186717e-06, "loss": 0.8897, "step": 9873 }, { "epoch": 0.5896333452764839, "grad_norm": 1.437634825706482, "learning_rate": 4.560414040209675e-06, "loss": 0.8686, "step": 9874 }, { "epoch": 0.5896930610294996, "grad_norm": 1.9545179605484009, "learning_rate": 4.559750514232632e-06, "loss": 0.8287, "step": 9875 }, { "epoch": 0.5897527767825153, "grad_norm": 2.7166686058044434, "learning_rate": 4.55908698825559e-06, "loss": 0.8653, "step": 9876 }, { "epoch": 0.5898124925355309, "grad_norm": 2.8694703578948975, "learning_rate": 4.5584234622785485e-06, "loss": 0.8266, "step": 9877 }, { "epoch": 0.5898722082885465, "grad_norm": 2.1100120544433594, "learning_rate": 4.557759936301507e-06, "loss": 0.8339, "step": 9878 }, { "epoch": 0.5899319240415621, "grad_norm": 1.7858814001083374, "learning_rate": 4.557096410324465e-06, "loss": 0.8894, "step": 9879 }, { "epoch": 0.5899916397945778, "grad_norm": 2.5741729736328125, "learning_rate": 4.556432884347423e-06, "loss": 0.8372, "step": 9880 }, { "epoch": 0.5900513555475935, "grad_norm": 2.9994544982910156, "learning_rate": 4.55576935837038e-06, "loss": 0.8437, "step": 9881 }, { "epoch": 0.5901110713006091, "grad_norm": 2.2654993534088135, "learning_rate": 4.555105832393338e-06, "loss": 0.8574, "step": 9882 }, { "epoch": 0.5901707870536248, "grad_norm": 2.399690866470337, "learning_rate": 4.554442306416297e-06, "loss": 0.8452, "step": 9883 }, { "epoch": 0.5902305028066404, "grad_norm": 1.9277480840682983, "learning_rate": 4.553778780439254e-06, "loss": 0.8646, "step": 9884 }, { "epoch": 0.590290218559656, "grad_norm": 1.9017446041107178, "learning_rate": 4.5531152544622125e-06, "loss": 0.8526, "step": 9885 }, { "epoch": 0.5903499343126717, "grad_norm": 2.253796100616455, "learning_rate": 4.5524517284851706e-06, "loss": 0.8644, "step": 9886 }, { "epoch": 0.5904096500656874, "grad_norm": 2.326847553253174, "learning_rate": 4.551788202508129e-06, "loss": 0.8485, "step": 9887 }, { "epoch": 0.590469365818703, "grad_norm": 2.666926145553589, "learning_rate": 4.551124676531087e-06, "loss": 0.8385, "step": 9888 }, { "epoch": 0.5905290815717186, "grad_norm": 2.5659713745117188, "learning_rate": 4.550461150554045e-06, "loss": 0.8342, "step": 9889 }, { "epoch": 0.5905887973247342, "grad_norm": 3.1641592979431152, "learning_rate": 4.549797624577003e-06, "loss": 0.8661, "step": 9890 }, { "epoch": 0.5906485130777499, "grad_norm": 4.32320499420166, "learning_rate": 4.54913409859996e-06, "loss": 0.8595, "step": 9891 }, { "epoch": 0.5907082288307656, "grad_norm": 3.67785906791687, "learning_rate": 4.548470572622918e-06, "loss": 0.8682, "step": 9892 }, { "epoch": 0.5907679445837812, "grad_norm": 2.1942176818847656, "learning_rate": 4.5478070466458764e-06, "loss": 0.8353, "step": 9893 }, { "epoch": 0.5908276603367969, "grad_norm": 2.081061363220215, "learning_rate": 4.5471435206688345e-06, "loss": 0.8469, "step": 9894 }, { "epoch": 0.5908873760898125, "grad_norm": 1.6319035291671753, "learning_rate": 4.546479994691793e-06, "loss": 0.8435, "step": 9895 }, { "epoch": 0.5909470918428281, "grad_norm": 1.8311494588851929, "learning_rate": 4.545816468714751e-06, "loss": 0.8427, "step": 9896 }, { "epoch": 0.5910068075958438, "grad_norm": 2.1764724254608154, "learning_rate": 4.545152942737708e-06, "loss": 0.8331, "step": 9897 }, { "epoch": 0.5910665233488595, "grad_norm": 2.108311653137207, "learning_rate": 4.544489416760667e-06, "loss": 0.8313, "step": 9898 }, { "epoch": 0.5911262391018751, "grad_norm": 2.060800075531006, "learning_rate": 4.543825890783625e-06, "loss": 0.8374, "step": 9899 }, { "epoch": 0.5911859548548907, "grad_norm": 3.1354854106903076, "learning_rate": 4.543162364806582e-06, "loss": 0.8842, "step": 9900 }, { "epoch": 0.5912456706079063, "grad_norm": 5.4853363037109375, "learning_rate": 4.54249883882954e-06, "loss": 0.8532, "step": 9901 }, { "epoch": 0.591305386360922, "grad_norm": 2.765148639678955, "learning_rate": 4.5418353128524985e-06, "loss": 0.8931, "step": 9902 }, { "epoch": 0.5913651021139377, "grad_norm": 2.207677125930786, "learning_rate": 4.541171786875457e-06, "loss": 0.8228, "step": 9903 }, { "epoch": 0.5914248178669533, "grad_norm": 1.558311939239502, "learning_rate": 4.540508260898415e-06, "loss": 0.839, "step": 9904 }, { "epoch": 0.591484533619969, "grad_norm": 1.8501665592193604, "learning_rate": 4.539844734921373e-06, "loss": 0.9001, "step": 9905 }, { "epoch": 0.5915442493729846, "grad_norm": 2.6074867248535156, "learning_rate": 4.53918120894433e-06, "loss": 0.7994, "step": 9906 }, { "epoch": 0.5916039651260002, "grad_norm": 2.2010498046875, "learning_rate": 4.538517682967288e-06, "loss": 0.8626, "step": 9907 }, { "epoch": 0.5916636808790159, "grad_norm": 2.2737481594085693, "learning_rate": 4.537854156990247e-06, "loss": 0.8343, "step": 9908 }, { "epoch": 0.5917233966320316, "grad_norm": 2.6168980598449707, "learning_rate": 4.537190631013204e-06, "loss": 0.8696, "step": 9909 }, { "epoch": 0.5917831123850472, "grad_norm": 3.3032636642456055, "learning_rate": 4.5365271050361624e-06, "loss": 0.8529, "step": 9910 }, { "epoch": 0.5918428281380628, "grad_norm": 2.007991075515747, "learning_rate": 4.5358635790591205e-06, "loss": 0.8811, "step": 9911 }, { "epoch": 0.5919025438910784, "grad_norm": 2.459043502807617, "learning_rate": 4.535200053082079e-06, "loss": 0.8774, "step": 9912 }, { "epoch": 0.5919622596440941, "grad_norm": 1.7423540353775024, "learning_rate": 4.534536527105037e-06, "loss": 0.8554, "step": 9913 }, { "epoch": 0.5920219753971098, "grad_norm": 2.022404193878174, "learning_rate": 4.533873001127995e-06, "loss": 0.8641, "step": 9914 }, { "epoch": 0.5920816911501254, "grad_norm": 1.9937193393707275, "learning_rate": 4.533209475150953e-06, "loss": 0.8337, "step": 9915 }, { "epoch": 0.5921414069031411, "grad_norm": 2.7534358501434326, "learning_rate": 4.53254594917391e-06, "loss": 0.8638, "step": 9916 }, { "epoch": 0.5922011226561567, "grad_norm": 2.2313363552093506, "learning_rate": 4.531882423196868e-06, "loss": 0.8217, "step": 9917 }, { "epoch": 0.5922608384091723, "grad_norm": 3.442662477493286, "learning_rate": 4.531218897219826e-06, "loss": 0.8725, "step": 9918 }, { "epoch": 0.592320554162188, "grad_norm": 1.922140121459961, "learning_rate": 4.5305553712427845e-06, "loss": 0.8517, "step": 9919 }, { "epoch": 0.5923802699152036, "grad_norm": 3.6166298389434814, "learning_rate": 4.529891845265743e-06, "loss": 0.8723, "step": 9920 }, { "epoch": 0.5924399856682193, "grad_norm": 2.5617563724517822, "learning_rate": 4.529228319288701e-06, "loss": 0.8611, "step": 9921 }, { "epoch": 0.5924997014212349, "grad_norm": 2.177489995956421, "learning_rate": 4.528564793311658e-06, "loss": 0.8141, "step": 9922 }, { "epoch": 0.5925594171742505, "grad_norm": 2.104306936264038, "learning_rate": 4.527901267334617e-06, "loss": 0.8196, "step": 9923 }, { "epoch": 0.5926191329272662, "grad_norm": 1.740794062614441, "learning_rate": 4.527237741357575e-06, "loss": 0.7813, "step": 9924 }, { "epoch": 0.5926788486802819, "grad_norm": 2.886704921722412, "learning_rate": 4.526574215380532e-06, "loss": 0.8817, "step": 9925 }, { "epoch": 0.5927385644332975, "grad_norm": 6.8584885597229, "learning_rate": 4.52591068940349e-06, "loss": 0.8476, "step": 9926 }, { "epoch": 0.5927982801863132, "grad_norm": 1.959801435470581, "learning_rate": 4.5252471634264485e-06, "loss": 0.8145, "step": 9927 }, { "epoch": 0.5928579959393288, "grad_norm": 1.7937977313995361, "learning_rate": 4.5245836374494066e-06, "loss": 0.868, "step": 9928 }, { "epoch": 0.5929177116923444, "grad_norm": 2.2962377071380615, "learning_rate": 4.523920111472365e-06, "loss": 0.8748, "step": 9929 }, { "epoch": 0.5929774274453601, "grad_norm": 2.4341700077056885, "learning_rate": 4.523256585495323e-06, "loss": 0.8438, "step": 9930 }, { "epoch": 0.5930371431983757, "grad_norm": 2.1290152072906494, "learning_rate": 4.52259305951828e-06, "loss": 0.8525, "step": 9931 }, { "epoch": 0.5930968589513914, "grad_norm": 1.7525438070297241, "learning_rate": 4.521929533541238e-06, "loss": 0.814, "step": 9932 }, { "epoch": 0.593156574704407, "grad_norm": 2.1577117443084717, "learning_rate": 4.521266007564197e-06, "loss": 0.8541, "step": 9933 }, { "epoch": 0.5932162904574226, "grad_norm": 2.6226229667663574, "learning_rate": 4.520602481587154e-06, "loss": 0.884, "step": 9934 }, { "epoch": 0.5932760062104383, "grad_norm": 2.408637046813965, "learning_rate": 4.519938955610112e-06, "loss": 0.8594, "step": 9935 }, { "epoch": 0.593335721963454, "grad_norm": 4.4232177734375, "learning_rate": 4.5192754296330705e-06, "loss": 0.8401, "step": 9936 }, { "epoch": 0.5933954377164696, "grad_norm": 1.8550399541854858, "learning_rate": 4.518611903656029e-06, "loss": 0.826, "step": 9937 }, { "epoch": 0.5934551534694853, "grad_norm": 2.1138429641723633, "learning_rate": 4.517948377678987e-06, "loss": 0.8648, "step": 9938 }, { "epoch": 0.5935148692225009, "grad_norm": 4.245491981506348, "learning_rate": 4.517284851701945e-06, "loss": 0.8317, "step": 9939 }, { "epoch": 0.5935745849755165, "grad_norm": 2.35406494140625, "learning_rate": 4.516621325724903e-06, "loss": 0.8526, "step": 9940 }, { "epoch": 0.5936343007285322, "grad_norm": 3.058854341506958, "learning_rate": 4.51595779974786e-06, "loss": 0.8332, "step": 9941 }, { "epoch": 0.5936940164815478, "grad_norm": 2.0605311393737793, "learning_rate": 4.515294273770818e-06, "loss": 0.8032, "step": 9942 }, { "epoch": 0.5937537322345635, "grad_norm": 4.210663795471191, "learning_rate": 4.514630747793776e-06, "loss": 0.886, "step": 9943 }, { "epoch": 0.5938134479875791, "grad_norm": 1.8831422328948975, "learning_rate": 4.5139672218167345e-06, "loss": 0.7948, "step": 9944 }, { "epoch": 0.5938731637405947, "grad_norm": 2.37080717086792, "learning_rate": 4.5133036958396926e-06, "loss": 0.8399, "step": 9945 }, { "epoch": 0.5939328794936104, "grad_norm": 2.1413209438323975, "learning_rate": 4.512640169862651e-06, "loss": 0.8295, "step": 9946 }, { "epoch": 0.5939925952466261, "grad_norm": 1.843885064125061, "learning_rate": 4.511976643885608e-06, "loss": 0.8751, "step": 9947 }, { "epoch": 0.5940523109996417, "grad_norm": 1.7605869770050049, "learning_rate": 4.511313117908567e-06, "loss": 0.8266, "step": 9948 }, { "epoch": 0.5941120267526574, "grad_norm": 2.362232208251953, "learning_rate": 4.510649591931525e-06, "loss": 0.8185, "step": 9949 }, { "epoch": 0.594171742505673, "grad_norm": 2.523344039916992, "learning_rate": 4.509986065954482e-06, "loss": 0.8416, "step": 9950 }, { "epoch": 0.5942314582586886, "grad_norm": 2.37131929397583, "learning_rate": 4.50932253997744e-06, "loss": 0.8363, "step": 9951 }, { "epoch": 0.5942911740117043, "grad_norm": 2.4159677028656006, "learning_rate": 4.5086590140003984e-06, "loss": 0.858, "step": 9952 }, { "epoch": 0.59435088976472, "grad_norm": 1.9189517498016357, "learning_rate": 4.5079954880233565e-06, "loss": 0.8652, "step": 9953 }, { "epoch": 0.5944106055177356, "grad_norm": 2.6438353061676025, "learning_rate": 4.507331962046315e-06, "loss": 0.8676, "step": 9954 }, { "epoch": 0.5944703212707513, "grad_norm": 2.1244523525238037, "learning_rate": 4.506668436069273e-06, "loss": 0.8303, "step": 9955 }, { "epoch": 0.5945300370237668, "grad_norm": 5.180994987487793, "learning_rate": 4.50600491009223e-06, "loss": 0.8308, "step": 9956 }, { "epoch": 0.5945897527767825, "grad_norm": 1.8705908060073853, "learning_rate": 4.505341384115188e-06, "loss": 0.8365, "step": 9957 }, { "epoch": 0.5946494685297982, "grad_norm": 1.7697570323944092, "learning_rate": 4.504677858138147e-06, "loss": 0.8292, "step": 9958 }, { "epoch": 0.5947091842828138, "grad_norm": 4.468695640563965, "learning_rate": 4.504014332161104e-06, "loss": 0.8603, "step": 9959 }, { "epoch": 0.5947689000358295, "grad_norm": 2.049905300140381, "learning_rate": 4.503350806184062e-06, "loss": 0.8401, "step": 9960 }, { "epoch": 0.594828615788845, "grad_norm": 2.565548896789551, "learning_rate": 4.5026872802070205e-06, "loss": 0.8325, "step": 9961 }, { "epoch": 0.5948883315418607, "grad_norm": 1.8629056215286255, "learning_rate": 4.502023754229979e-06, "loss": 0.8502, "step": 9962 }, { "epoch": 0.5949480472948764, "grad_norm": 2.2379772663116455, "learning_rate": 4.501360228252937e-06, "loss": 0.8125, "step": 9963 }, { "epoch": 0.595007763047892, "grad_norm": 2.729041814804077, "learning_rate": 4.500696702275895e-06, "loss": 0.8967, "step": 9964 }, { "epoch": 0.5950674788009077, "grad_norm": 2.8091933727264404, "learning_rate": 4.500033176298853e-06, "loss": 0.846, "step": 9965 }, { "epoch": 0.5951271945539234, "grad_norm": 2.313028573989868, "learning_rate": 4.49936965032181e-06, "loss": 0.8174, "step": 9966 }, { "epoch": 0.5951869103069389, "grad_norm": 1.9658656120300293, "learning_rate": 4.498706124344768e-06, "loss": 0.8359, "step": 9967 }, { "epoch": 0.5952466260599546, "grad_norm": 2.227505922317505, "learning_rate": 4.498042598367726e-06, "loss": 0.7946, "step": 9968 }, { "epoch": 0.5953063418129703, "grad_norm": 2.4503533840179443, "learning_rate": 4.4973790723906844e-06, "loss": 0.8116, "step": 9969 }, { "epoch": 0.5953660575659859, "grad_norm": 4.20143461227417, "learning_rate": 4.4967155464136425e-06, "loss": 0.8545, "step": 9970 }, { "epoch": 0.5954257733190016, "grad_norm": 2.3160276412963867, "learning_rate": 4.496052020436601e-06, "loss": 0.8508, "step": 9971 }, { "epoch": 0.5954854890720171, "grad_norm": 3.140936851501465, "learning_rate": 4.495388494459558e-06, "loss": 0.8649, "step": 9972 }, { "epoch": 0.5955452048250328, "grad_norm": 3.0025758743286133, "learning_rate": 4.494724968482517e-06, "loss": 0.8388, "step": 9973 }, { "epoch": 0.5956049205780485, "grad_norm": 3.1459076404571533, "learning_rate": 4.494061442505475e-06, "loss": 0.8229, "step": 9974 }, { "epoch": 0.5956646363310641, "grad_norm": 4.304406642913818, "learning_rate": 4.493397916528432e-06, "loss": 0.8672, "step": 9975 }, { "epoch": 0.5957243520840798, "grad_norm": 2.0218114852905273, "learning_rate": 4.49273439055139e-06, "loss": 0.8341, "step": 9976 }, { "epoch": 0.5957840678370955, "grad_norm": 2.2390174865722656, "learning_rate": 4.492070864574348e-06, "loss": 0.8611, "step": 9977 }, { "epoch": 0.595843783590111, "grad_norm": 2.7432498931884766, "learning_rate": 4.4914073385973065e-06, "loss": 0.8545, "step": 9978 }, { "epoch": 0.5959034993431267, "grad_norm": 2.323922872543335, "learning_rate": 4.490743812620265e-06, "loss": 0.8929, "step": 9979 }, { "epoch": 0.5959632150961424, "grad_norm": 2.6775145530700684, "learning_rate": 4.490080286643223e-06, "loss": 0.8306, "step": 9980 }, { "epoch": 0.596022930849158, "grad_norm": 3.063709020614624, "learning_rate": 4.48941676066618e-06, "loss": 0.8256, "step": 9981 }, { "epoch": 0.5960826466021737, "grad_norm": 1.6898689270019531, "learning_rate": 4.488753234689138e-06, "loss": 0.856, "step": 9982 }, { "epoch": 0.5961423623551892, "grad_norm": 2.343029737472534, "learning_rate": 4.488089708712097e-06, "loss": 0.8519, "step": 9983 }, { "epoch": 0.5962020781082049, "grad_norm": 3.5134103298187256, "learning_rate": 4.487426182735054e-06, "loss": 0.8525, "step": 9984 }, { "epoch": 0.5962617938612206, "grad_norm": 2.2289955615997314, "learning_rate": 4.486762656758012e-06, "loss": 0.8536, "step": 9985 }, { "epoch": 0.5963215096142362, "grad_norm": 2.3494927883148193, "learning_rate": 4.4860991307809705e-06, "loss": 0.8675, "step": 9986 }, { "epoch": 0.5963812253672519, "grad_norm": 1.7261971235275269, "learning_rate": 4.4854356048039286e-06, "loss": 0.8477, "step": 9987 }, { "epoch": 0.5964409411202676, "grad_norm": 2.6993494033813477, "learning_rate": 4.484772078826887e-06, "loss": 0.8164, "step": 9988 }, { "epoch": 0.5965006568732831, "grad_norm": 3.577415704727173, "learning_rate": 4.484108552849845e-06, "loss": 0.8557, "step": 9989 }, { "epoch": 0.5965603726262988, "grad_norm": 2.057440757751465, "learning_rate": 4.483445026872803e-06, "loss": 0.8896, "step": 9990 }, { "epoch": 0.5966200883793145, "grad_norm": 1.879799246788025, "learning_rate": 4.48278150089576e-06, "loss": 0.7972, "step": 9991 }, { "epoch": 0.5966798041323301, "grad_norm": 1.892403244972229, "learning_rate": 4.482117974918718e-06, "loss": 0.8307, "step": 9992 }, { "epoch": 0.5967395198853458, "grad_norm": 2.4608101844787598, "learning_rate": 4.481454448941676e-06, "loss": 0.8477, "step": 9993 }, { "epoch": 0.5967992356383613, "grad_norm": 2.5101027488708496, "learning_rate": 4.480790922964634e-06, "loss": 0.8422, "step": 9994 }, { "epoch": 0.596858951391377, "grad_norm": 1.783280849456787, "learning_rate": 4.4801273969875925e-06, "loss": 0.8541, "step": 9995 }, { "epoch": 0.5969186671443927, "grad_norm": 2.1649553775787354, "learning_rate": 4.479463871010551e-06, "loss": 0.8323, "step": 9996 }, { "epoch": 0.5969783828974083, "grad_norm": 2.2725648880004883, "learning_rate": 4.478800345033508e-06, "loss": 0.8266, "step": 9997 }, { "epoch": 0.597038098650424, "grad_norm": 2.0502219200134277, "learning_rate": 4.478136819056467e-06, "loss": 0.8255, "step": 9998 }, { "epoch": 0.5970978144034397, "grad_norm": 3.214050531387329, "learning_rate": 4.477473293079425e-06, "loss": 0.8402, "step": 9999 }, { "epoch": 0.5971575301564552, "grad_norm": 2.0987610816955566, "learning_rate": 4.476809767102382e-06, "loss": 0.8794, "step": 10000 }, { "epoch": 0.5971575301564552, "eval_text_loss": 0.9065711498260498, "eval_text_runtime": 15.2186, "eval_text_samples_per_second": 262.837, "eval_text_steps_per_second": 0.526, "step": 10000 }, { "epoch": 0.5971575301564552, "eval_image_loss": 0.6151543855667114, "eval_image_runtime": 4.9695, "eval_image_samples_per_second": 804.914, "eval_image_steps_per_second": 1.61, "step": 10000 }, { "epoch": 0.5971575301564552, "eval_video_loss": 1.0524243116378784, "eval_video_runtime": 76.8811, "eval_video_samples_per_second": 52.028, "eval_video_steps_per_second": 0.104, "step": 10000 }, { "epoch": 0.5972172459094709, "grad_norm": 2.06948184967041, "learning_rate": 4.47614624112534e-06, "loss": 0.8596, "step": 10001 }, { "epoch": 0.5972769616624866, "grad_norm": 2.717416763305664, "learning_rate": 4.475482715148298e-06, "loss": 0.8585, "step": 10002 }, { "epoch": 0.5973366774155022, "grad_norm": 1.873706340789795, "learning_rate": 4.4748191891712565e-06, "loss": 0.8472, "step": 10003 }, { "epoch": 0.5973963931685179, "grad_norm": 2.341996908187866, "learning_rate": 4.4741556631942146e-06, "loss": 0.811, "step": 10004 }, { "epoch": 0.5974561089215334, "grad_norm": 2.776460647583008, "learning_rate": 4.473492137217173e-06, "loss": 0.8504, "step": 10005 }, { "epoch": 0.5975158246745491, "grad_norm": 2.310204267501831, "learning_rate": 4.47282861124013e-06, "loss": 0.8488, "step": 10006 }, { "epoch": 0.5975755404275648, "grad_norm": 3.2241125106811523, "learning_rate": 4.472165085263088e-06, "loss": 0.8174, "step": 10007 }, { "epoch": 0.5976352561805804, "grad_norm": 2.31137752532959, "learning_rate": 4.471501559286047e-06, "loss": 0.8152, "step": 10008 }, { "epoch": 0.5976949719335961, "grad_norm": 2.2385129928588867, "learning_rate": 4.470838033309004e-06, "loss": 0.8786, "step": 10009 }, { "epoch": 0.5977546876866118, "grad_norm": 2.52616024017334, "learning_rate": 4.470174507331962e-06, "loss": 0.8289, "step": 10010 }, { "epoch": 0.5978144034396273, "grad_norm": 4.323481559753418, "learning_rate": 4.4695109813549204e-06, "loss": 0.8464, "step": 10011 }, { "epoch": 0.597874119192643, "grad_norm": 1.8040833473205566, "learning_rate": 4.4688474553778785e-06, "loss": 0.8666, "step": 10012 }, { "epoch": 0.5979338349456587, "grad_norm": 3.5490846633911133, "learning_rate": 4.468183929400837e-06, "loss": 0.9032, "step": 10013 }, { "epoch": 0.5979935506986743, "grad_norm": 1.7649452686309814, "learning_rate": 4.467520403423795e-06, "loss": 0.8262, "step": 10014 }, { "epoch": 0.59805326645169, "grad_norm": 2.6955137252807617, "learning_rate": 4.466856877446753e-06, "loss": 0.8415, "step": 10015 }, { "epoch": 0.5981129822047057, "grad_norm": 4.035462379455566, "learning_rate": 4.46619335146971e-06, "loss": 0.8399, "step": 10016 }, { "epoch": 0.5981726979577212, "grad_norm": 3.3744499683380127, "learning_rate": 4.465529825492668e-06, "loss": 0.8651, "step": 10017 }, { "epoch": 0.5982324137107369, "grad_norm": 2.6126954555511475, "learning_rate": 4.464866299515626e-06, "loss": 0.8383, "step": 10018 }, { "epoch": 0.5982921294637525, "grad_norm": 1.9532175064086914, "learning_rate": 4.464202773538584e-06, "loss": 0.8539, "step": 10019 }, { "epoch": 0.5983518452167682, "grad_norm": 2.362661123275757, "learning_rate": 4.4635392475615425e-06, "loss": 0.8689, "step": 10020 }, { "epoch": 0.5984115609697839, "grad_norm": 2.761744499206543, "learning_rate": 4.462875721584501e-06, "loss": 0.832, "step": 10021 }, { "epoch": 0.5984712767227994, "grad_norm": 2.7435302734375, "learning_rate": 4.462212195607458e-06, "loss": 0.8787, "step": 10022 }, { "epoch": 0.5985309924758151, "grad_norm": 2.7897000312805176, "learning_rate": 4.461548669630417e-06, "loss": 0.8797, "step": 10023 }, { "epoch": 0.5985907082288308, "grad_norm": 2.041602849960327, "learning_rate": 4.460885143653375e-06, "loss": 0.8631, "step": 10024 }, { "epoch": 0.5986504239818464, "grad_norm": 2.013538360595703, "learning_rate": 4.460221617676332e-06, "loss": 0.8387, "step": 10025 }, { "epoch": 0.5987101397348621, "grad_norm": 2.3130598068237305, "learning_rate": 4.45955809169929e-06, "loss": 0.8193, "step": 10026 }, { "epoch": 0.5987698554878778, "grad_norm": 2.3666539192199707, "learning_rate": 4.458894565722248e-06, "loss": 0.8315, "step": 10027 }, { "epoch": 0.5988295712408933, "grad_norm": 2.864400863647461, "learning_rate": 4.4582310397452064e-06, "loss": 0.8589, "step": 10028 }, { "epoch": 0.598889286993909, "grad_norm": 4.424167633056641, "learning_rate": 4.4575675137681645e-06, "loss": 0.8369, "step": 10029 }, { "epoch": 0.5989490027469246, "grad_norm": 3.7332024574279785, "learning_rate": 4.456903987791123e-06, "loss": 0.8895, "step": 10030 }, { "epoch": 0.5990087184999403, "grad_norm": 3.8061344623565674, "learning_rate": 4.456240461814081e-06, "loss": 0.8699, "step": 10031 }, { "epoch": 0.599068434252956, "grad_norm": 2.3845207691192627, "learning_rate": 4.455576935837038e-06, "loss": 0.8226, "step": 10032 }, { "epoch": 0.5991281500059715, "grad_norm": 2.646876573562622, "learning_rate": 4.454913409859997e-06, "loss": 0.8306, "step": 10033 }, { "epoch": 0.5991878657589872, "grad_norm": 3.120388984680176, "learning_rate": 4.454249883882954e-06, "loss": 0.8367, "step": 10034 }, { "epoch": 0.5992475815120029, "grad_norm": 10.552834510803223, "learning_rate": 4.453586357905912e-06, "loss": 0.8342, "step": 10035 }, { "epoch": 0.5993072972650185, "grad_norm": 1.904563546180725, "learning_rate": 4.45292283192887e-06, "loss": 0.8595, "step": 10036 }, { "epoch": 0.5993670130180342, "grad_norm": 2.2085702419281006, "learning_rate": 4.4522593059518285e-06, "loss": 0.8767, "step": 10037 }, { "epoch": 0.5994267287710499, "grad_norm": 2.073107957839966, "learning_rate": 4.451595779974787e-06, "loss": 0.8288, "step": 10038 }, { "epoch": 0.5994864445240654, "grad_norm": 2.734130382537842, "learning_rate": 4.450932253997745e-06, "loss": 0.8857, "step": 10039 }, { "epoch": 0.5995461602770811, "grad_norm": 2.9305202960968018, "learning_rate": 4.450268728020703e-06, "loss": 0.8623, "step": 10040 }, { "epoch": 0.5996058760300967, "grad_norm": 4.8260955810546875, "learning_rate": 4.44960520204366e-06, "loss": 0.8368, "step": 10041 }, { "epoch": 0.5996655917831124, "grad_norm": 1.9538145065307617, "learning_rate": 4.448941676066618e-06, "loss": 0.8148, "step": 10042 }, { "epoch": 0.5997253075361281, "grad_norm": 2.3413033485412598, "learning_rate": 4.448278150089576e-06, "loss": 0.8376, "step": 10043 }, { "epoch": 0.5997850232891436, "grad_norm": 2.889474391937256, "learning_rate": 4.447614624112534e-06, "loss": 0.8423, "step": 10044 }, { "epoch": 0.5998447390421593, "grad_norm": 3.6977009773254395, "learning_rate": 4.4469510981354925e-06, "loss": 0.835, "step": 10045 }, { "epoch": 0.599904454795175, "grad_norm": 1.8778598308563232, "learning_rate": 4.4462875721584506e-06, "loss": 0.8746, "step": 10046 }, { "epoch": 0.5999641705481906, "grad_norm": 3.1936874389648438, "learning_rate": 4.445624046181408e-06, "loss": 0.8445, "step": 10047 }, { "epoch": 0.6000238863012063, "grad_norm": 3.589998722076416, "learning_rate": 4.444960520204367e-06, "loss": 0.8474, "step": 10048 }, { "epoch": 0.600083602054222, "grad_norm": 1.8708946704864502, "learning_rate": 4.444296994227325e-06, "loss": 0.823, "step": 10049 }, { "epoch": 0.6001433178072375, "grad_norm": 2.29292368888855, "learning_rate": 4.443633468250282e-06, "loss": 0.8149, "step": 10050 }, { "epoch": 0.6002030335602532, "grad_norm": 2.3029870986938477, "learning_rate": 4.44296994227324e-06, "loss": 0.8251, "step": 10051 }, { "epoch": 0.6002627493132688, "grad_norm": 1.8258635997772217, "learning_rate": 4.442306416296198e-06, "loss": 0.8512, "step": 10052 }, { "epoch": 0.6003224650662845, "grad_norm": 2.1243934631347656, "learning_rate": 4.441642890319156e-06, "loss": 0.8636, "step": 10053 }, { "epoch": 0.6003821808193002, "grad_norm": 1.6159220933914185, "learning_rate": 4.4409793643421145e-06, "loss": 0.7905, "step": 10054 }, { "epoch": 0.6004418965723157, "grad_norm": 2.5943448543548584, "learning_rate": 4.440315838365073e-06, "loss": 0.8566, "step": 10055 }, { "epoch": 0.6005016123253314, "grad_norm": 1.9941686391830444, "learning_rate": 4.439652312388031e-06, "loss": 0.852, "step": 10056 }, { "epoch": 0.6005613280783471, "grad_norm": 2.895618200302124, "learning_rate": 4.438988786410988e-06, "loss": 0.8095, "step": 10057 }, { "epoch": 0.6006210438313627, "grad_norm": 2.0801987648010254, "learning_rate": 4.438325260433947e-06, "loss": 0.8533, "step": 10058 }, { "epoch": 0.6006807595843784, "grad_norm": 3.6834816932678223, "learning_rate": 4.437661734456904e-06, "loss": 0.8082, "step": 10059 }, { "epoch": 0.600740475337394, "grad_norm": 2.1756978034973145, "learning_rate": 4.436998208479862e-06, "loss": 0.8307, "step": 10060 }, { "epoch": 0.6008001910904096, "grad_norm": 2.009565830230713, "learning_rate": 4.43633468250282e-06, "loss": 0.8134, "step": 10061 }, { "epoch": 0.6008599068434253, "grad_norm": 2.935760259628296, "learning_rate": 4.4356711565257785e-06, "loss": 0.8786, "step": 10062 }, { "epoch": 0.6009196225964409, "grad_norm": 1.8003572225570679, "learning_rate": 4.4350076305487366e-06, "loss": 0.8182, "step": 10063 }, { "epoch": 0.6009793383494566, "grad_norm": 2.1736786365509033, "learning_rate": 4.434344104571695e-06, "loss": 0.8343, "step": 10064 }, { "epoch": 0.6010390541024723, "grad_norm": 3.299910545349121, "learning_rate": 4.433680578594653e-06, "loss": 0.818, "step": 10065 }, { "epoch": 0.6010987698554878, "grad_norm": 1.6928069591522217, "learning_rate": 4.43301705261761e-06, "loss": 0.869, "step": 10066 }, { "epoch": 0.6011584856085035, "grad_norm": 3.0422613620758057, "learning_rate": 4.432353526640568e-06, "loss": 0.8548, "step": 10067 }, { "epoch": 0.6012182013615192, "grad_norm": 2.168828248977661, "learning_rate": 4.431690000663526e-06, "loss": 0.793, "step": 10068 }, { "epoch": 0.6012779171145348, "grad_norm": 2.126068592071533, "learning_rate": 4.431026474686484e-06, "loss": 0.8006, "step": 10069 }, { "epoch": 0.6013376328675505, "grad_norm": 2.497312068939209, "learning_rate": 4.4303629487094424e-06, "loss": 0.8549, "step": 10070 }, { "epoch": 0.6013973486205662, "grad_norm": 2.2931108474731445, "learning_rate": 4.4296994227324005e-06, "loss": 0.8198, "step": 10071 }, { "epoch": 0.6014570643735817, "grad_norm": 4.061738014221191, "learning_rate": 4.429035896755358e-06, "loss": 0.8281, "step": 10072 }, { "epoch": 0.6015167801265974, "grad_norm": 2.033963918685913, "learning_rate": 4.428372370778317e-06, "loss": 0.889, "step": 10073 }, { "epoch": 0.601576495879613, "grad_norm": 2.662141799926758, "learning_rate": 4.427708844801275e-06, "loss": 0.8478, "step": 10074 }, { "epoch": 0.6016362116326287, "grad_norm": 3.0273523330688477, "learning_rate": 4.427045318824232e-06, "loss": 0.8351, "step": 10075 }, { "epoch": 0.6016959273856444, "grad_norm": 2.3474185466766357, "learning_rate": 4.42638179284719e-06, "loss": 0.8153, "step": 10076 }, { "epoch": 0.6017556431386599, "grad_norm": 2.290884494781494, "learning_rate": 4.425718266870148e-06, "loss": 0.8149, "step": 10077 }, { "epoch": 0.6018153588916756, "grad_norm": 2.4670915603637695, "learning_rate": 4.425054740893106e-06, "loss": 0.8595, "step": 10078 }, { "epoch": 0.6018750746446913, "grad_norm": 2.543459415435791, "learning_rate": 4.4243912149160645e-06, "loss": 0.8673, "step": 10079 }, { "epoch": 0.6019347903977069, "grad_norm": 2.7344422340393066, "learning_rate": 4.423727688939023e-06, "loss": 0.8253, "step": 10080 }, { "epoch": 0.6019945061507226, "grad_norm": 2.571650266647339, "learning_rate": 4.423064162961981e-06, "loss": 0.8309, "step": 10081 }, { "epoch": 0.6020542219037383, "grad_norm": 2.585686445236206, "learning_rate": 4.422400636984938e-06, "loss": 0.8264, "step": 10082 }, { "epoch": 0.6021139376567538, "grad_norm": 1.8967522382736206, "learning_rate": 4.421737111007896e-06, "loss": 0.8842, "step": 10083 }, { "epoch": 0.6021736534097695, "grad_norm": 2.157486915588379, "learning_rate": 4.421073585030854e-06, "loss": 0.8445, "step": 10084 }, { "epoch": 0.6022333691627851, "grad_norm": 1.7116305828094482, "learning_rate": 4.420410059053812e-06, "loss": 0.8391, "step": 10085 }, { "epoch": 0.6022930849158008, "grad_norm": 2.570289134979248, "learning_rate": 4.41974653307677e-06, "loss": 0.8282, "step": 10086 }, { "epoch": 0.6023528006688165, "grad_norm": 3.6703569889068604, "learning_rate": 4.4190830070997284e-06, "loss": 0.8309, "step": 10087 }, { "epoch": 0.6024125164218321, "grad_norm": 2.4445865154266357, "learning_rate": 4.418419481122686e-06, "loss": 0.8281, "step": 10088 }, { "epoch": 0.6024722321748477, "grad_norm": 3.9947669506073, "learning_rate": 4.417755955145645e-06, "loss": 0.8707, "step": 10089 }, { "epoch": 0.6025319479278634, "grad_norm": 2.6068809032440186, "learning_rate": 4.417092429168603e-06, "loss": 0.8628, "step": 10090 }, { "epoch": 0.602591663680879, "grad_norm": 1.7531365156173706, "learning_rate": 4.41642890319156e-06, "loss": 0.8448, "step": 10091 }, { "epoch": 0.6026513794338947, "grad_norm": 2.290346622467041, "learning_rate": 4.415765377214518e-06, "loss": 0.8532, "step": 10092 }, { "epoch": 0.6027110951869104, "grad_norm": 2.01517653465271, "learning_rate": 4.415101851237476e-06, "loss": 0.9097, "step": 10093 }, { "epoch": 0.6027708109399259, "grad_norm": 4.581943035125732, "learning_rate": 4.414438325260434e-06, "loss": 0.8362, "step": 10094 }, { "epoch": 0.6028305266929416, "grad_norm": 3.108515739440918, "learning_rate": 4.413774799283392e-06, "loss": 0.8435, "step": 10095 }, { "epoch": 0.6028902424459572, "grad_norm": 2.4195668697357178, "learning_rate": 4.4131112733063505e-06, "loss": 0.8156, "step": 10096 }, { "epoch": 0.6029499581989729, "grad_norm": 2.2031126022338867, "learning_rate": 4.412447747329308e-06, "loss": 0.8488, "step": 10097 }, { "epoch": 0.6030096739519886, "grad_norm": 2.617161273956299, "learning_rate": 4.411784221352266e-06, "loss": 0.8433, "step": 10098 }, { "epoch": 0.6030693897050042, "grad_norm": 2.4336295127868652, "learning_rate": 4.411120695375225e-06, "loss": 0.8469, "step": 10099 }, { "epoch": 0.6031291054580198, "grad_norm": 2.160343647003174, "learning_rate": 4.410457169398182e-06, "loss": 0.8268, "step": 10100 }, { "epoch": 0.6031888212110355, "grad_norm": 2.4857752323150635, "learning_rate": 4.40979364342114e-06, "loss": 0.8509, "step": 10101 }, { "epoch": 0.6032485369640511, "grad_norm": 1.9815657138824463, "learning_rate": 4.409130117444098e-06, "loss": 0.8414, "step": 10102 }, { "epoch": 0.6033082527170668, "grad_norm": 1.966091513633728, "learning_rate": 4.408466591467056e-06, "loss": 0.8182, "step": 10103 }, { "epoch": 0.6033679684700824, "grad_norm": 2.3974428176879883, "learning_rate": 4.4078030654900145e-06, "loss": 0.8091, "step": 10104 }, { "epoch": 0.603427684223098, "grad_norm": 1.981733798980713, "learning_rate": 4.4071395395129726e-06, "loss": 0.8631, "step": 10105 }, { "epoch": 0.6034873999761137, "grad_norm": 2.3414628505706787, "learning_rate": 4.406476013535931e-06, "loss": 0.8536, "step": 10106 }, { "epoch": 0.6035471157291293, "grad_norm": 3.7656378746032715, "learning_rate": 4.405812487558888e-06, "loss": 0.8188, "step": 10107 }, { "epoch": 0.603606831482145, "grad_norm": 3.5439836978912354, "learning_rate": 4.405148961581846e-06, "loss": 0.841, "step": 10108 }, { "epoch": 0.6036665472351607, "grad_norm": 2.3604116439819336, "learning_rate": 4.404485435604804e-06, "loss": 0.8033, "step": 10109 }, { "epoch": 0.6037262629881763, "grad_norm": 2.5651440620422363, "learning_rate": 4.403821909627762e-06, "loss": 0.8503, "step": 10110 }, { "epoch": 0.6037859787411919, "grad_norm": 2.084643840789795, "learning_rate": 4.40315838365072e-06, "loss": 0.842, "step": 10111 }, { "epoch": 0.6038456944942076, "grad_norm": 2.9662604331970215, "learning_rate": 4.402494857673678e-06, "loss": 0.8463, "step": 10112 }, { "epoch": 0.6039054102472232, "grad_norm": 2.591162919998169, "learning_rate": 4.401831331696636e-06, "loss": 0.8459, "step": 10113 }, { "epoch": 0.6039651260002389, "grad_norm": 2.2356879711151123, "learning_rate": 4.401167805719595e-06, "loss": 0.8715, "step": 10114 }, { "epoch": 0.6040248417532545, "grad_norm": 3.9034507274627686, "learning_rate": 4.400504279742553e-06, "loss": 0.8393, "step": 10115 }, { "epoch": 0.6040845575062701, "grad_norm": 9.315929412841797, "learning_rate": 4.39984075376551e-06, "loss": 0.8082, "step": 10116 }, { "epoch": 0.6041442732592858, "grad_norm": 1.849734902381897, "learning_rate": 4.399177227788468e-06, "loss": 0.8091, "step": 10117 }, { "epoch": 0.6042039890123014, "grad_norm": 2.126580238342285, "learning_rate": 4.398513701811426e-06, "loss": 0.8276, "step": 10118 }, { "epoch": 0.6042637047653171, "grad_norm": 2.6467058658599854, "learning_rate": 4.397850175834384e-06, "loss": 0.8595, "step": 10119 }, { "epoch": 0.6043234205183328, "grad_norm": 2.4232964515686035, "learning_rate": 4.397186649857342e-06, "loss": 0.8495, "step": 10120 }, { "epoch": 0.6043831362713484, "grad_norm": 1.8148468732833862, "learning_rate": 4.3965231238803005e-06, "loss": 0.8067, "step": 10121 }, { "epoch": 0.604442852024364, "grad_norm": 2.874201536178589, "learning_rate": 4.395859597903258e-06, "loss": 0.8282, "step": 10122 }, { "epoch": 0.6045025677773797, "grad_norm": 1.9051103591918945, "learning_rate": 4.395196071926216e-06, "loss": 0.8323, "step": 10123 }, { "epoch": 0.6045622835303953, "grad_norm": 2.0409905910491943, "learning_rate": 4.394532545949175e-06, "loss": 0.8478, "step": 10124 }, { "epoch": 0.604621999283411, "grad_norm": 2.299755573272705, "learning_rate": 4.393869019972132e-06, "loss": 0.8546, "step": 10125 }, { "epoch": 0.6046817150364266, "grad_norm": 3.4793078899383545, "learning_rate": 4.39320549399509e-06, "loss": 0.8058, "step": 10126 }, { "epoch": 0.6047414307894422, "grad_norm": 2.363762617111206, "learning_rate": 4.392541968018048e-06, "loss": 0.8661, "step": 10127 }, { "epoch": 0.6048011465424579, "grad_norm": 2.2502665519714355, "learning_rate": 4.391878442041006e-06, "loss": 0.8667, "step": 10128 }, { "epoch": 0.6048608622954735, "grad_norm": 2.9823668003082275, "learning_rate": 4.3912149160639644e-06, "loss": 0.8481, "step": 10129 }, { "epoch": 0.6049205780484892, "grad_norm": 2.6443004608154297, "learning_rate": 4.3905513900869225e-06, "loss": 0.833, "step": 10130 }, { "epoch": 0.6049802938015049, "grad_norm": 1.9994161128997803, "learning_rate": 4.389887864109881e-06, "loss": 0.8594, "step": 10131 }, { "epoch": 0.6050400095545205, "grad_norm": 2.6618430614471436, "learning_rate": 4.389224338132838e-06, "loss": 0.8194, "step": 10132 }, { "epoch": 0.6050997253075361, "grad_norm": 3.334733247756958, "learning_rate": 4.388560812155796e-06, "loss": 0.828, "step": 10133 }, { "epoch": 0.6051594410605518, "grad_norm": 2.526805877685547, "learning_rate": 4.387897286178754e-06, "loss": 0.8467, "step": 10134 }, { "epoch": 0.6052191568135674, "grad_norm": 3.008392333984375, "learning_rate": 4.387233760201712e-06, "loss": 0.8634, "step": 10135 }, { "epoch": 0.6052788725665831, "grad_norm": 3.155848264694214, "learning_rate": 4.38657023422467e-06, "loss": 0.8395, "step": 10136 }, { "epoch": 0.6053385883195987, "grad_norm": 3.399850845336914, "learning_rate": 4.385906708247628e-06, "loss": 0.8606, "step": 10137 }, { "epoch": 0.6053983040726143, "grad_norm": 3.023540496826172, "learning_rate": 4.385243182270586e-06, "loss": 0.795, "step": 10138 }, { "epoch": 0.60545801982563, "grad_norm": 1.883038878440857, "learning_rate": 4.384579656293545e-06, "loss": 0.8533, "step": 10139 }, { "epoch": 0.6055177355786456, "grad_norm": 2.100762128829956, "learning_rate": 4.383916130316503e-06, "loss": 0.7953, "step": 10140 }, { "epoch": 0.6055774513316613, "grad_norm": 1.8175687789916992, "learning_rate": 4.38325260433946e-06, "loss": 0.8447, "step": 10141 }, { "epoch": 0.605637167084677, "grad_norm": 1.6539982557296753, "learning_rate": 4.382589078362418e-06, "loss": 0.8311, "step": 10142 }, { "epoch": 0.6056968828376926, "grad_norm": 3.6435935497283936, "learning_rate": 4.381925552385376e-06, "loss": 0.8398, "step": 10143 }, { "epoch": 0.6057565985907082, "grad_norm": 3.161280393600464, "learning_rate": 4.381262026408334e-06, "loss": 0.821, "step": 10144 }, { "epoch": 0.6058163143437238, "grad_norm": 2.654043674468994, "learning_rate": 4.380598500431292e-06, "loss": 0.8269, "step": 10145 }, { "epoch": 0.6058760300967395, "grad_norm": 2.589038372039795, "learning_rate": 4.3799349744542504e-06, "loss": 0.8494, "step": 10146 }, { "epoch": 0.6059357458497552, "grad_norm": 3.4476709365844727, "learning_rate": 4.379271448477208e-06, "loss": 0.8552, "step": 10147 }, { "epoch": 0.6059954616027708, "grad_norm": 1.9109421968460083, "learning_rate": 4.378607922500166e-06, "loss": 0.8442, "step": 10148 }, { "epoch": 0.6060551773557865, "grad_norm": 2.136679172515869, "learning_rate": 4.377944396523125e-06, "loss": 0.8458, "step": 10149 }, { "epoch": 0.6061148931088021, "grad_norm": 3.018747329711914, "learning_rate": 4.377280870546082e-06, "loss": 0.888, "step": 10150 }, { "epoch": 0.6061746088618177, "grad_norm": 1.9523953199386597, "learning_rate": 4.37661734456904e-06, "loss": 0.8244, "step": 10151 }, { "epoch": 0.6062343246148334, "grad_norm": 2.2193593978881836, "learning_rate": 4.375953818591998e-06, "loss": 0.8404, "step": 10152 }, { "epoch": 0.6062940403678491, "grad_norm": 2.960397243499756, "learning_rate": 4.375290292614956e-06, "loss": 0.8434, "step": 10153 }, { "epoch": 0.6063537561208647, "grad_norm": 2.7349436283111572, "learning_rate": 4.374626766637914e-06, "loss": 0.8365, "step": 10154 }, { "epoch": 0.6064134718738803, "grad_norm": 1.8838800191879272, "learning_rate": 4.3739632406608725e-06, "loss": 0.8461, "step": 10155 }, { "epoch": 0.606473187626896, "grad_norm": 2.7621891498565674, "learning_rate": 4.373299714683831e-06, "loss": 0.86, "step": 10156 }, { "epoch": 0.6065329033799116, "grad_norm": 1.6692941188812256, "learning_rate": 4.372636188706788e-06, "loss": 0.8555, "step": 10157 }, { "epoch": 0.6065926191329273, "grad_norm": 3.388298273086548, "learning_rate": 4.371972662729746e-06, "loss": 0.8422, "step": 10158 }, { "epoch": 0.606652334885943, "grad_norm": 1.9855375289916992, "learning_rate": 4.371309136752704e-06, "loss": 0.8465, "step": 10159 }, { "epoch": 0.6067120506389586, "grad_norm": 1.9176105260849, "learning_rate": 4.370645610775662e-06, "loss": 0.8246, "step": 10160 }, { "epoch": 0.6067717663919742, "grad_norm": 2.455627679824829, "learning_rate": 4.36998208479862e-06, "loss": 0.8408, "step": 10161 }, { "epoch": 0.6068314821449898, "grad_norm": 1.789515495300293, "learning_rate": 4.369318558821578e-06, "loss": 0.8463, "step": 10162 }, { "epoch": 0.6068911978980055, "grad_norm": 3.006300687789917, "learning_rate": 4.368655032844536e-06, "loss": 0.8457, "step": 10163 }, { "epoch": 0.6069509136510212, "grad_norm": 2.1671640872955322, "learning_rate": 4.3679915068674946e-06, "loss": 0.81, "step": 10164 }, { "epoch": 0.6070106294040368, "grad_norm": 2.7581536769866943, "learning_rate": 4.367327980890453e-06, "loss": 0.838, "step": 10165 }, { "epoch": 0.6070703451570524, "grad_norm": 1.6117041110992432, "learning_rate": 4.36666445491341e-06, "loss": 0.818, "step": 10166 }, { "epoch": 0.607130060910068, "grad_norm": 2.173295021057129, "learning_rate": 4.366000928936368e-06, "loss": 0.8716, "step": 10167 }, { "epoch": 0.6071897766630837, "grad_norm": 2.7341830730438232, "learning_rate": 4.365337402959326e-06, "loss": 0.8411, "step": 10168 }, { "epoch": 0.6072494924160994, "grad_norm": 3.64603853225708, "learning_rate": 4.364673876982284e-06, "loss": 0.8406, "step": 10169 }, { "epoch": 0.607309208169115, "grad_norm": 4.297677993774414, "learning_rate": 4.364010351005242e-06, "loss": 0.8328, "step": 10170 }, { "epoch": 0.6073689239221307, "grad_norm": 5.386888027191162, "learning_rate": 4.3633468250282e-06, "loss": 0.8416, "step": 10171 }, { "epoch": 0.6074286396751463, "grad_norm": 2.040268659591675, "learning_rate": 4.362683299051158e-06, "loss": 0.8056, "step": 10172 }, { "epoch": 0.6074883554281619, "grad_norm": 2.7010159492492676, "learning_rate": 4.362019773074116e-06, "loss": 0.866, "step": 10173 }, { "epoch": 0.6075480711811776, "grad_norm": 1.8022756576538086, "learning_rate": 4.361356247097075e-06, "loss": 0.8049, "step": 10174 }, { "epoch": 0.6076077869341933, "grad_norm": 2.622332811355591, "learning_rate": 4.360692721120032e-06, "loss": 0.8622, "step": 10175 }, { "epoch": 0.6076675026872089, "grad_norm": 2.3664028644561768, "learning_rate": 4.36002919514299e-06, "loss": 0.8323, "step": 10176 }, { "epoch": 0.6077272184402245, "grad_norm": 2.6967434883117676, "learning_rate": 4.359365669165948e-06, "loss": 0.8106, "step": 10177 }, { "epoch": 0.6077869341932401, "grad_norm": 2.011486053466797, "learning_rate": 4.358702143188906e-06, "loss": 0.8295, "step": 10178 }, { "epoch": 0.6078466499462558, "grad_norm": 1.934563159942627, "learning_rate": 4.358038617211864e-06, "loss": 0.8417, "step": 10179 }, { "epoch": 0.6079063656992715, "grad_norm": 3.8758654594421387, "learning_rate": 4.3573750912348225e-06, "loss": 0.8258, "step": 10180 }, { "epoch": 0.6079660814522871, "grad_norm": 2.7057571411132812, "learning_rate": 4.3567115652577806e-06, "loss": 0.8679, "step": 10181 }, { "epoch": 0.6080257972053028, "grad_norm": 2.8248672485351562, "learning_rate": 4.356048039280738e-06, "loss": 0.8327, "step": 10182 }, { "epoch": 0.6080855129583184, "grad_norm": 4.804447174072266, "learning_rate": 4.355384513303696e-06, "loss": 0.8534, "step": 10183 }, { "epoch": 0.608145228711334, "grad_norm": 2.4375078678131104, "learning_rate": 4.354720987326654e-06, "loss": 0.8084, "step": 10184 }, { "epoch": 0.6082049444643497, "grad_norm": 2.2519707679748535, "learning_rate": 4.354057461349612e-06, "loss": 0.8601, "step": 10185 }, { "epoch": 0.6082646602173654, "grad_norm": 4.181149482727051, "learning_rate": 4.35339393537257e-06, "loss": 0.8439, "step": 10186 }, { "epoch": 0.608324375970381, "grad_norm": 1.718602180480957, "learning_rate": 4.352730409395528e-06, "loss": 0.8288, "step": 10187 }, { "epoch": 0.6083840917233966, "grad_norm": 2.1034257411956787, "learning_rate": 4.3520668834184856e-06, "loss": 0.8278, "step": 10188 }, { "epoch": 0.6084438074764122, "grad_norm": 2.486255168914795, "learning_rate": 4.3514033574414445e-06, "loss": 0.8454, "step": 10189 }, { "epoch": 0.6085035232294279, "grad_norm": 2.822737693786621, "learning_rate": 4.350739831464403e-06, "loss": 0.8251, "step": 10190 }, { "epoch": 0.6085632389824436, "grad_norm": 4.0618438720703125, "learning_rate": 4.35007630548736e-06, "loss": 0.8555, "step": 10191 }, { "epoch": 0.6086229547354592, "grad_norm": 2.453913450241089, "learning_rate": 4.349412779510318e-06, "loss": 0.8227, "step": 10192 }, { "epoch": 0.6086826704884749, "grad_norm": 2.1548032760620117, "learning_rate": 4.348749253533276e-06, "loss": 0.8365, "step": 10193 }, { "epoch": 0.6087423862414905, "grad_norm": 2.0646860599517822, "learning_rate": 4.348085727556234e-06, "loss": 0.8367, "step": 10194 }, { "epoch": 0.6088021019945061, "grad_norm": 4.068470478057861, "learning_rate": 4.347422201579192e-06, "loss": 0.8645, "step": 10195 }, { "epoch": 0.6088618177475218, "grad_norm": 1.7532360553741455, "learning_rate": 4.34675867560215e-06, "loss": 0.8349, "step": 10196 }, { "epoch": 0.6089215335005375, "grad_norm": 2.09488582611084, "learning_rate": 4.346095149625108e-06, "loss": 0.8345, "step": 10197 }, { "epoch": 0.6089812492535531, "grad_norm": 2.002152681350708, "learning_rate": 4.345431623648066e-06, "loss": 0.7938, "step": 10198 }, { "epoch": 0.6090409650065687, "grad_norm": 3.7304458618164062, "learning_rate": 4.344768097671025e-06, "loss": 0.8549, "step": 10199 }, { "epoch": 0.6091006807595843, "grad_norm": 1.8884776830673218, "learning_rate": 4.344104571693982e-06, "loss": 0.827, "step": 10200 }, { "epoch": 0.6091006807595843, "eval_text_loss": 0.9055197238922119, "eval_text_runtime": 15.19, "eval_text_samples_per_second": 263.331, "eval_text_steps_per_second": 0.527, "step": 10200 }, { "epoch": 0.6091006807595843, "eval_image_loss": 0.6137577295303345, "eval_image_runtime": 5.0198, "eval_image_samples_per_second": 796.845, "eval_image_steps_per_second": 1.594, "step": 10200 }, { "epoch": 0.6091006807595843, "eval_video_loss": 1.0512616634368896, "eval_video_runtime": 77.403, "eval_video_samples_per_second": 51.678, "eval_video_steps_per_second": 0.103, "step": 10200 }, { "epoch": 0.6091603965126, "grad_norm": 7.771112442016602, "learning_rate": 4.34344104571694e-06, "loss": 0.8444, "step": 10201 }, { "epoch": 0.6092201122656157, "grad_norm": 2.056511640548706, "learning_rate": 4.342777519739898e-06, "loss": 0.8528, "step": 10202 }, { "epoch": 0.6092798280186313, "grad_norm": 2.952756404876709, "learning_rate": 4.342113993762856e-06, "loss": 0.8347, "step": 10203 }, { "epoch": 0.609339543771647, "grad_norm": 3.9337971210479736, "learning_rate": 4.341450467785814e-06, "loss": 0.8716, "step": 10204 }, { "epoch": 0.6093992595246626, "grad_norm": 2.9601757526397705, "learning_rate": 4.3407869418087724e-06, "loss": 0.8463, "step": 10205 }, { "epoch": 0.6094589752776782, "grad_norm": 2.2140309810638428, "learning_rate": 4.3401234158317305e-06, "loss": 0.8455, "step": 10206 }, { "epoch": 0.6095186910306939, "grad_norm": 2.229809045791626, "learning_rate": 4.339459889854688e-06, "loss": 0.8522, "step": 10207 }, { "epoch": 0.6095784067837096, "grad_norm": 1.8860340118408203, "learning_rate": 4.338796363877646e-06, "loss": 0.8483, "step": 10208 }, { "epoch": 0.6096381225367252, "grad_norm": 2.285541296005249, "learning_rate": 4.338132837900604e-06, "loss": 0.8215, "step": 10209 }, { "epoch": 0.6096978382897408, "grad_norm": 2.7383666038513184, "learning_rate": 4.337469311923562e-06, "loss": 0.8282, "step": 10210 }, { "epoch": 0.6097575540427564, "grad_norm": 2.4630613327026367, "learning_rate": 4.33680578594652e-06, "loss": 0.8404, "step": 10211 }, { "epoch": 0.6098172697957721, "grad_norm": 2.150803804397583, "learning_rate": 4.336142259969478e-06, "loss": 0.8145, "step": 10212 }, { "epoch": 0.6098769855487878, "grad_norm": 2.0097029209136963, "learning_rate": 4.3354787339924355e-06, "loss": 0.8396, "step": 10213 }, { "epoch": 0.6099367013018034, "grad_norm": 3.9451634883880615, "learning_rate": 4.3348152080153945e-06, "loss": 0.8409, "step": 10214 }, { "epoch": 0.6099964170548191, "grad_norm": 3.18887996673584, "learning_rate": 4.334151682038353e-06, "loss": 0.8689, "step": 10215 }, { "epoch": 0.6100561328078347, "grad_norm": 1.7846871614456177, "learning_rate": 4.33348815606131e-06, "loss": 0.8456, "step": 10216 }, { "epoch": 0.6101158485608503, "grad_norm": 1.8988046646118164, "learning_rate": 4.332824630084268e-06, "loss": 0.8397, "step": 10217 }, { "epoch": 0.610175564313866, "grad_norm": 3.1025280952453613, "learning_rate": 4.332161104107226e-06, "loss": 0.781, "step": 10218 }, { "epoch": 0.6102352800668817, "grad_norm": 2.6378400325775146, "learning_rate": 4.331497578130184e-06, "loss": 0.857, "step": 10219 }, { "epoch": 0.6102949958198973, "grad_norm": 2.1479735374450684, "learning_rate": 4.330834052153142e-06, "loss": 0.869, "step": 10220 }, { "epoch": 0.610354711572913, "grad_norm": 1.791955590248108, "learning_rate": 4.3301705261761e-06, "loss": 0.8541, "step": 10221 }, { "epoch": 0.6104144273259285, "grad_norm": 2.683310031890869, "learning_rate": 4.329507000199058e-06, "loss": 0.84, "step": 10222 }, { "epoch": 0.6104741430789442, "grad_norm": 3.8415915966033936, "learning_rate": 4.328843474222016e-06, "loss": 0.8723, "step": 10223 }, { "epoch": 0.6105338588319599, "grad_norm": 1.814426064491272, "learning_rate": 4.328179948244975e-06, "loss": 0.85, "step": 10224 }, { "epoch": 0.6105935745849755, "grad_norm": 3.0447704792022705, "learning_rate": 4.327516422267932e-06, "loss": 0.8522, "step": 10225 }, { "epoch": 0.6106532903379912, "grad_norm": 3.5203239917755127, "learning_rate": 4.32685289629089e-06, "loss": 0.8573, "step": 10226 }, { "epoch": 0.6107130060910068, "grad_norm": 6.062314987182617, "learning_rate": 4.326189370313848e-06, "loss": 0.8345, "step": 10227 }, { "epoch": 0.6107727218440224, "grad_norm": 2.224548578262329, "learning_rate": 4.325525844336806e-06, "loss": 0.8264, "step": 10228 }, { "epoch": 0.6108324375970381, "grad_norm": 1.801122784614563, "learning_rate": 4.324862318359764e-06, "loss": 0.8399, "step": 10229 }, { "epoch": 0.6108921533500538, "grad_norm": 2.302778959274292, "learning_rate": 4.324198792382722e-06, "loss": 0.8272, "step": 10230 }, { "epoch": 0.6109518691030694, "grad_norm": 2.4093987941741943, "learning_rate": 4.3235352664056805e-06, "loss": 0.866, "step": 10231 }, { "epoch": 0.6110115848560851, "grad_norm": 2.853346109390259, "learning_rate": 4.322871740428638e-06, "loss": 0.8608, "step": 10232 }, { "epoch": 0.6110713006091006, "grad_norm": 2.6738038063049316, "learning_rate": 4.322208214451596e-06, "loss": 0.8769, "step": 10233 }, { "epoch": 0.6111310163621163, "grad_norm": 2.808090925216675, "learning_rate": 4.321544688474554e-06, "loss": 0.8939, "step": 10234 }, { "epoch": 0.611190732115132, "grad_norm": 2.1600146293640137, "learning_rate": 4.320881162497512e-06, "loss": 0.876, "step": 10235 }, { "epoch": 0.6112504478681476, "grad_norm": 2.4625160694122314, "learning_rate": 4.32021763652047e-06, "loss": 0.8446, "step": 10236 }, { "epoch": 0.6113101636211633, "grad_norm": 2.9137725830078125, "learning_rate": 4.319554110543428e-06, "loss": 0.8938, "step": 10237 }, { "epoch": 0.6113698793741789, "grad_norm": 2.0757575035095215, "learning_rate": 4.3188905845663855e-06, "loss": 0.8551, "step": 10238 }, { "epoch": 0.6114295951271945, "grad_norm": 2.417175054550171, "learning_rate": 4.3182270585893445e-06, "loss": 0.8409, "step": 10239 }, { "epoch": 0.6114893108802102, "grad_norm": 2.748953104019165, "learning_rate": 4.3175635326123026e-06, "loss": 0.859, "step": 10240 }, { "epoch": 0.6115490266332259, "grad_norm": 2.502227306365967, "learning_rate": 4.31690000663526e-06, "loss": 0.8572, "step": 10241 }, { "epoch": 0.6116087423862415, "grad_norm": 2.488940715789795, "learning_rate": 4.316236480658218e-06, "loss": 0.8527, "step": 10242 }, { "epoch": 0.6116684581392572, "grad_norm": 2.4109182357788086, "learning_rate": 4.315572954681176e-06, "loss": 0.8699, "step": 10243 }, { "epoch": 0.6117281738922727, "grad_norm": 2.756774425506592, "learning_rate": 4.314909428704134e-06, "loss": 0.8123, "step": 10244 }, { "epoch": 0.6117878896452884, "grad_norm": 4.055766582489014, "learning_rate": 4.314245902727092e-06, "loss": 0.8342, "step": 10245 }, { "epoch": 0.6118476053983041, "grad_norm": 2.2299466133117676, "learning_rate": 4.31358237675005e-06, "loss": 0.8533, "step": 10246 }, { "epoch": 0.6119073211513197, "grad_norm": 2.435131788253784, "learning_rate": 4.3129188507730076e-06, "loss": 0.8315, "step": 10247 }, { "epoch": 0.6119670369043354, "grad_norm": 1.542636513710022, "learning_rate": 4.312255324795966e-06, "loss": 0.8411, "step": 10248 }, { "epoch": 0.612026752657351, "grad_norm": 3.8060853481292725, "learning_rate": 4.311591798818925e-06, "loss": 0.8405, "step": 10249 }, { "epoch": 0.6120864684103666, "grad_norm": 3.0684168338775635, "learning_rate": 4.310928272841882e-06, "loss": 0.8447, "step": 10250 }, { "epoch": 0.6121461841633823, "grad_norm": 1.4799525737762451, "learning_rate": 4.31026474686484e-06, "loss": 0.8443, "step": 10251 }, { "epoch": 0.612205899916398, "grad_norm": 2.9274158477783203, "learning_rate": 4.309601220887798e-06, "loss": 0.86, "step": 10252 }, { "epoch": 0.6122656156694136, "grad_norm": 1.7897660732269287, "learning_rate": 4.308937694910756e-06, "loss": 0.8251, "step": 10253 }, { "epoch": 0.6123253314224293, "grad_norm": 1.9475791454315186, "learning_rate": 4.308274168933714e-06, "loss": 0.8209, "step": 10254 }, { "epoch": 0.6123850471754448, "grad_norm": 2.1611058712005615, "learning_rate": 4.307610642956672e-06, "loss": 0.8523, "step": 10255 }, { "epoch": 0.6124447629284605, "grad_norm": 2.015885353088379, "learning_rate": 4.3069471169796305e-06, "loss": 0.8796, "step": 10256 }, { "epoch": 0.6125044786814762, "grad_norm": 2.701141834259033, "learning_rate": 4.306283591002588e-06, "loss": 0.874, "step": 10257 }, { "epoch": 0.6125641944344918, "grad_norm": 2.339582920074463, "learning_rate": 4.305620065025546e-06, "loss": 0.7969, "step": 10258 }, { "epoch": 0.6126239101875075, "grad_norm": 3.2472665309906006, "learning_rate": 4.304956539048504e-06, "loss": 0.8487, "step": 10259 }, { "epoch": 0.6126836259405231, "grad_norm": 2.061984062194824, "learning_rate": 4.304293013071462e-06, "loss": 0.825, "step": 10260 }, { "epoch": 0.6127433416935387, "grad_norm": 1.9272938966751099, "learning_rate": 4.30362948709442e-06, "loss": 0.857, "step": 10261 }, { "epoch": 0.6128030574465544, "grad_norm": 3.4432380199432373, "learning_rate": 4.302965961117378e-06, "loss": 0.8277, "step": 10262 }, { "epoch": 0.61286277319957, "grad_norm": 2.421210289001465, "learning_rate": 4.3023024351403355e-06, "loss": 0.8157, "step": 10263 }, { "epoch": 0.6129224889525857, "grad_norm": 1.6830161809921265, "learning_rate": 4.3016389091632944e-06, "loss": 0.8135, "step": 10264 }, { "epoch": 0.6129822047056014, "grad_norm": 2.5167882442474365, "learning_rate": 4.3009753831862525e-06, "loss": 0.8336, "step": 10265 }, { "epoch": 0.6130419204586169, "grad_norm": 2.1946589946746826, "learning_rate": 4.30031185720921e-06, "loss": 0.8257, "step": 10266 }, { "epoch": 0.6131016362116326, "grad_norm": 4.22916841506958, "learning_rate": 4.299648331232168e-06, "loss": 0.8353, "step": 10267 }, { "epoch": 0.6131613519646483, "grad_norm": 7.581422328948975, "learning_rate": 4.298984805255126e-06, "loss": 0.8386, "step": 10268 }, { "epoch": 0.6132210677176639, "grad_norm": 2.049421787261963, "learning_rate": 4.298321279278084e-06, "loss": 0.8264, "step": 10269 }, { "epoch": 0.6132807834706796, "grad_norm": 1.761131763458252, "learning_rate": 4.297657753301042e-06, "loss": 0.8157, "step": 10270 }, { "epoch": 0.6133404992236952, "grad_norm": 1.7658625841140747, "learning_rate": 4.296994227324e-06, "loss": 0.8326, "step": 10271 }, { "epoch": 0.6134002149767108, "grad_norm": 1.624356985092163, "learning_rate": 4.2963307013469575e-06, "loss": 0.8195, "step": 10272 }, { "epoch": 0.6134599307297265, "grad_norm": 1.8550128936767578, "learning_rate": 4.295667175369916e-06, "loss": 0.8661, "step": 10273 }, { "epoch": 0.6135196464827422, "grad_norm": 2.341233968734741, "learning_rate": 4.295003649392875e-06, "loss": 0.8369, "step": 10274 }, { "epoch": 0.6135793622357578, "grad_norm": 2.317462205886841, "learning_rate": 4.294340123415832e-06, "loss": 0.8401, "step": 10275 }, { "epoch": 0.6136390779887735, "grad_norm": 2.2160348892211914, "learning_rate": 4.29367659743879e-06, "loss": 0.8715, "step": 10276 }, { "epoch": 0.613698793741789, "grad_norm": 5.222003936767578, "learning_rate": 4.293013071461748e-06, "loss": 0.8314, "step": 10277 }, { "epoch": 0.6137585094948047, "grad_norm": 1.8640943765640259, "learning_rate": 4.292349545484706e-06, "loss": 0.8525, "step": 10278 }, { "epoch": 0.6138182252478204, "grad_norm": 1.7119762897491455, "learning_rate": 4.291686019507664e-06, "loss": 0.8556, "step": 10279 }, { "epoch": 0.613877941000836, "grad_norm": 3.352010488510132, "learning_rate": 4.291022493530622e-06, "loss": 0.8303, "step": 10280 }, { "epoch": 0.6139376567538517, "grad_norm": 2.8173041343688965, "learning_rate": 4.2903589675535805e-06, "loss": 0.8385, "step": 10281 }, { "epoch": 0.6139973725068673, "grad_norm": 2.2716548442840576, "learning_rate": 4.289695441576538e-06, "loss": 0.8367, "step": 10282 }, { "epoch": 0.6140570882598829, "grad_norm": 2.0107784271240234, "learning_rate": 4.289031915599496e-06, "loss": 0.859, "step": 10283 }, { "epoch": 0.6141168040128986, "grad_norm": 3.7801713943481445, "learning_rate": 4.288368389622454e-06, "loss": 0.8209, "step": 10284 }, { "epoch": 0.6141765197659143, "grad_norm": 2.6322381496429443, "learning_rate": 4.287704863645412e-06, "loss": 0.8241, "step": 10285 }, { "epoch": 0.6142362355189299, "grad_norm": 2.1574349403381348, "learning_rate": 4.28704133766837e-06, "loss": 0.8545, "step": 10286 }, { "epoch": 0.6142959512719456, "grad_norm": 2.0172924995422363, "learning_rate": 4.286377811691328e-06, "loss": 0.8356, "step": 10287 }, { "epoch": 0.6143556670249611, "grad_norm": 1.9037569761276245, "learning_rate": 4.2857142857142855e-06, "loss": 0.8318, "step": 10288 }, { "epoch": 0.6144153827779768, "grad_norm": 2.6432528495788574, "learning_rate": 4.285050759737244e-06, "loss": 0.8159, "step": 10289 }, { "epoch": 0.6144750985309925, "grad_norm": 2.0402495861053467, "learning_rate": 4.2843872337602025e-06, "loss": 0.854, "step": 10290 }, { "epoch": 0.6145348142840081, "grad_norm": 2.2825865745544434, "learning_rate": 4.28372370778316e-06, "loss": 0.8391, "step": 10291 }, { "epoch": 0.6145945300370238, "grad_norm": 2.652679204940796, "learning_rate": 4.283060181806118e-06, "loss": 0.8351, "step": 10292 }, { "epoch": 0.6146542457900395, "grad_norm": 2.789451837539673, "learning_rate": 4.282396655829076e-06, "loss": 0.8156, "step": 10293 }, { "epoch": 0.614713961543055, "grad_norm": 5.254025459289551, "learning_rate": 4.281733129852034e-06, "loss": 0.8254, "step": 10294 }, { "epoch": 0.6147736772960707, "grad_norm": 2.0845634937286377, "learning_rate": 4.281069603874992e-06, "loss": 0.8518, "step": 10295 }, { "epoch": 0.6148333930490864, "grad_norm": 2.041306495666504, "learning_rate": 4.28040607789795e-06, "loss": 0.869, "step": 10296 }, { "epoch": 0.614893108802102, "grad_norm": 2.0537607669830322, "learning_rate": 4.2797425519209075e-06, "loss": 0.8314, "step": 10297 }, { "epoch": 0.6149528245551177, "grad_norm": 2.4047584533691406, "learning_rate": 4.279079025943866e-06, "loss": 0.8254, "step": 10298 }, { "epoch": 0.6150125403081332, "grad_norm": 2.141415596008301, "learning_rate": 4.2784154999668246e-06, "loss": 0.8589, "step": 10299 }, { "epoch": 0.6150722560611489, "grad_norm": 2.3529818058013916, "learning_rate": 4.277751973989782e-06, "loss": 0.828, "step": 10300 }, { "epoch": 0.6151319718141646, "grad_norm": 1.6809555292129517, "learning_rate": 4.27708844801274e-06, "loss": 0.8681, "step": 10301 }, { "epoch": 0.6151916875671802, "grad_norm": 2.0683445930480957, "learning_rate": 4.276424922035698e-06, "loss": 0.8364, "step": 10302 }, { "epoch": 0.6152514033201959, "grad_norm": 2.300240993499756, "learning_rate": 4.275761396058656e-06, "loss": 0.8572, "step": 10303 }, { "epoch": 0.6153111190732116, "grad_norm": 2.033010721206665, "learning_rate": 4.275097870081614e-06, "loss": 0.877, "step": 10304 }, { "epoch": 0.6153708348262271, "grad_norm": 2.999525547027588, "learning_rate": 4.274434344104572e-06, "loss": 0.8344, "step": 10305 }, { "epoch": 0.6154305505792428, "grad_norm": 1.699599266052246, "learning_rate": 4.27377081812753e-06, "loss": 0.7985, "step": 10306 }, { "epoch": 0.6154902663322585, "grad_norm": 2.308795690536499, "learning_rate": 4.273107292150488e-06, "loss": 0.8221, "step": 10307 }, { "epoch": 0.6155499820852741, "grad_norm": 2.284989356994629, "learning_rate": 4.272443766173446e-06, "loss": 0.8973, "step": 10308 }, { "epoch": 0.6156096978382898, "grad_norm": 2.122100591659546, "learning_rate": 4.271780240196404e-06, "loss": 0.8372, "step": 10309 }, { "epoch": 0.6156694135913053, "grad_norm": 1.8676058053970337, "learning_rate": 4.271116714219362e-06, "loss": 0.83, "step": 10310 }, { "epoch": 0.615729129344321, "grad_norm": 1.75597083568573, "learning_rate": 4.27045318824232e-06, "loss": 0.8364, "step": 10311 }, { "epoch": 0.6157888450973367, "grad_norm": 1.8563045263290405, "learning_rate": 4.269789662265278e-06, "loss": 0.8501, "step": 10312 }, { "epoch": 0.6158485608503523, "grad_norm": 3.087529182434082, "learning_rate": 4.2691261362882354e-06, "loss": 0.8364, "step": 10313 }, { "epoch": 0.615908276603368, "grad_norm": 2.6409270763397217, "learning_rate": 4.268462610311194e-06, "loss": 0.8121, "step": 10314 }, { "epoch": 0.6159679923563837, "grad_norm": 2.168560743331909, "learning_rate": 4.2677990843341525e-06, "loss": 0.8386, "step": 10315 }, { "epoch": 0.6160277081093992, "grad_norm": 2.433727741241455, "learning_rate": 4.26713555835711e-06, "loss": 0.8359, "step": 10316 }, { "epoch": 0.6160874238624149, "grad_norm": 1.7805838584899902, "learning_rate": 4.266472032380068e-06, "loss": 0.8367, "step": 10317 }, { "epoch": 0.6161471396154306, "grad_norm": 2.2427470684051514, "learning_rate": 4.265808506403026e-06, "loss": 0.8283, "step": 10318 }, { "epoch": 0.6162068553684462, "grad_norm": 2.5664944648742676, "learning_rate": 4.265144980425984e-06, "loss": 0.8557, "step": 10319 }, { "epoch": 0.6162665711214619, "grad_norm": 1.8005915880203247, "learning_rate": 4.264481454448942e-06, "loss": 0.8249, "step": 10320 }, { "epoch": 0.6163262868744774, "grad_norm": 2.4537837505340576, "learning_rate": 4.2638179284719e-06, "loss": 0.8647, "step": 10321 }, { "epoch": 0.6163860026274931, "grad_norm": 2.579738140106201, "learning_rate": 4.2631544024948575e-06, "loss": 0.8382, "step": 10322 }, { "epoch": 0.6164457183805088, "grad_norm": 1.8272454738616943, "learning_rate": 4.262490876517816e-06, "loss": 0.8531, "step": 10323 }, { "epoch": 0.6165054341335244, "grad_norm": 2.3719301223754883, "learning_rate": 4.2618273505407745e-06, "loss": 0.8668, "step": 10324 }, { "epoch": 0.6165651498865401, "grad_norm": 1.8351000547409058, "learning_rate": 4.261163824563732e-06, "loss": 0.81, "step": 10325 }, { "epoch": 0.6166248656395558, "grad_norm": 1.988421082496643, "learning_rate": 4.26050029858669e-06, "loss": 0.8247, "step": 10326 }, { "epoch": 0.6166845813925713, "grad_norm": 1.8082743883132935, "learning_rate": 4.259836772609648e-06, "loss": 0.8743, "step": 10327 }, { "epoch": 0.616744297145587, "grad_norm": 1.986281156539917, "learning_rate": 4.259173246632606e-06, "loss": 0.818, "step": 10328 }, { "epoch": 0.6168040128986026, "grad_norm": 2.1590840816497803, "learning_rate": 4.258509720655564e-06, "loss": 0.8407, "step": 10329 }, { "epoch": 0.6168637286516183, "grad_norm": 3.3726539611816406, "learning_rate": 4.257846194678522e-06, "loss": 0.8929, "step": 10330 }, { "epoch": 0.616923444404634, "grad_norm": 2.1682229042053223, "learning_rate": 4.25718266870148e-06, "loss": 0.8233, "step": 10331 }, { "epoch": 0.6169831601576495, "grad_norm": 2.1485226154327393, "learning_rate": 4.256519142724438e-06, "loss": 0.8359, "step": 10332 }, { "epoch": 0.6170428759106652, "grad_norm": 2.4934027194976807, "learning_rate": 4.255855616747396e-06, "loss": 0.8499, "step": 10333 }, { "epoch": 0.6171025916636809, "grad_norm": 2.828444242477417, "learning_rate": 4.255192090770354e-06, "loss": 0.8458, "step": 10334 }, { "epoch": 0.6171623074166965, "grad_norm": 2.426832914352417, "learning_rate": 4.254528564793312e-06, "loss": 0.8341, "step": 10335 }, { "epoch": 0.6172220231697122, "grad_norm": 1.9559121131896973, "learning_rate": 4.25386503881627e-06, "loss": 0.7967, "step": 10336 }, { "epoch": 0.6172817389227279, "grad_norm": 2.592782974243164, "learning_rate": 4.253201512839228e-06, "loss": 0.837, "step": 10337 }, { "epoch": 0.6173414546757434, "grad_norm": 2.6986167430877686, "learning_rate": 4.252537986862185e-06, "loss": 0.8636, "step": 10338 }, { "epoch": 0.6174011704287591, "grad_norm": 7.242136001586914, "learning_rate": 4.251874460885144e-06, "loss": 0.8353, "step": 10339 }, { "epoch": 0.6174608861817747, "grad_norm": 2.227383613586426, "learning_rate": 4.2512109349081024e-06, "loss": 0.8662, "step": 10340 }, { "epoch": 0.6175206019347904, "grad_norm": 2.762272596359253, "learning_rate": 4.25054740893106e-06, "loss": 0.8582, "step": 10341 }, { "epoch": 0.6175803176878061, "grad_norm": 1.5856026411056519, "learning_rate": 4.249883882954018e-06, "loss": 0.825, "step": 10342 }, { "epoch": 0.6176400334408216, "grad_norm": 1.881048321723938, "learning_rate": 4.249220356976976e-06, "loss": 0.8485, "step": 10343 }, { "epoch": 0.6176997491938373, "grad_norm": 2.5317466259002686, "learning_rate": 4.248556830999934e-06, "loss": 0.823, "step": 10344 }, { "epoch": 0.617759464946853, "grad_norm": 3.655993700027466, "learning_rate": 4.247893305022892e-06, "loss": 0.8538, "step": 10345 }, { "epoch": 0.6178191806998686, "grad_norm": 1.688924789428711, "learning_rate": 4.24722977904585e-06, "loss": 0.8514, "step": 10346 }, { "epoch": 0.6178788964528843, "grad_norm": 1.7435734272003174, "learning_rate": 4.2465662530688075e-06, "loss": 0.8306, "step": 10347 }, { "epoch": 0.6179386122059, "grad_norm": 1.876800298690796, "learning_rate": 4.2459027270917656e-06, "loss": 0.8361, "step": 10348 }, { "epoch": 0.6179983279589155, "grad_norm": 2.2443435192108154, "learning_rate": 4.2452392011147245e-06, "loss": 0.8645, "step": 10349 }, { "epoch": 0.6180580437119312, "grad_norm": 4.488665580749512, "learning_rate": 4.244575675137682e-06, "loss": 0.8772, "step": 10350 }, { "epoch": 0.6181177594649468, "grad_norm": 3.6728768348693848, "learning_rate": 4.24391214916064e-06, "loss": 0.8124, "step": 10351 }, { "epoch": 0.6181774752179625, "grad_norm": 2.3529787063598633, "learning_rate": 4.243248623183598e-06, "loss": 0.8265, "step": 10352 }, { "epoch": 0.6182371909709782, "grad_norm": 1.7724734544754028, "learning_rate": 4.242585097206556e-06, "loss": 0.8483, "step": 10353 }, { "epoch": 0.6182969067239938, "grad_norm": 2.2345213890075684, "learning_rate": 4.241921571229514e-06, "loss": 0.8454, "step": 10354 }, { "epoch": 0.6183566224770094, "grad_norm": 3.1754164695739746, "learning_rate": 4.241258045252472e-06, "loss": 0.833, "step": 10355 }, { "epoch": 0.6184163382300251, "grad_norm": 2.245429754257202, "learning_rate": 4.24059451927543e-06, "loss": 0.8324, "step": 10356 }, { "epoch": 0.6184760539830407, "grad_norm": 1.720550537109375, "learning_rate": 4.239930993298388e-06, "loss": 0.8557, "step": 10357 }, { "epoch": 0.6185357697360564, "grad_norm": 2.7590999603271484, "learning_rate": 4.239267467321346e-06, "loss": 0.8294, "step": 10358 }, { "epoch": 0.6185954854890721, "grad_norm": 1.5169460773468018, "learning_rate": 4.238603941344304e-06, "loss": 0.8456, "step": 10359 }, { "epoch": 0.6186552012420876, "grad_norm": 3.282707929611206, "learning_rate": 4.237940415367262e-06, "loss": 0.8652, "step": 10360 }, { "epoch": 0.6187149169951033, "grad_norm": 1.626591444015503, "learning_rate": 4.23727688939022e-06, "loss": 0.8691, "step": 10361 }, { "epoch": 0.618774632748119, "grad_norm": 2.380951404571533, "learning_rate": 4.236613363413178e-06, "loss": 0.8626, "step": 10362 }, { "epoch": 0.6188343485011346, "grad_norm": 1.6684294939041138, "learning_rate": 4.235949837436135e-06, "loss": 0.8507, "step": 10363 }, { "epoch": 0.6188940642541503, "grad_norm": 3.691788911819458, "learning_rate": 4.235286311459094e-06, "loss": 0.8225, "step": 10364 }, { "epoch": 0.6189537800071659, "grad_norm": 3.4719736576080322, "learning_rate": 4.234622785482052e-06, "loss": 0.8537, "step": 10365 }, { "epoch": 0.6190134957601815, "grad_norm": 2.5469272136688232, "learning_rate": 4.23395925950501e-06, "loss": 0.8039, "step": 10366 }, { "epoch": 0.6190732115131972, "grad_norm": 2.003059148788452, "learning_rate": 4.233295733527968e-06, "loss": 0.8374, "step": 10367 }, { "epoch": 0.6191329272662128, "grad_norm": 2.11020827293396, "learning_rate": 4.232632207550926e-06, "loss": 0.8686, "step": 10368 }, { "epoch": 0.6191926430192285, "grad_norm": 2.2324116230010986, "learning_rate": 4.231968681573884e-06, "loss": 0.8194, "step": 10369 }, { "epoch": 0.6192523587722442, "grad_norm": 2.7716293334960938, "learning_rate": 4.231305155596842e-06, "loss": 0.8538, "step": 10370 }, { "epoch": 0.6193120745252597, "grad_norm": 1.831447958946228, "learning_rate": 4.2306416296198e-06, "loss": 0.8365, "step": 10371 }, { "epoch": 0.6193717902782754, "grad_norm": 2.6772663593292236, "learning_rate": 4.2299781036427574e-06, "loss": 0.8582, "step": 10372 }, { "epoch": 0.619431506031291, "grad_norm": 3.7958641052246094, "learning_rate": 4.2293145776657155e-06, "loss": 0.8392, "step": 10373 }, { "epoch": 0.6194912217843067, "grad_norm": 3.833967685699463, "learning_rate": 4.2286510516886745e-06, "loss": 0.8754, "step": 10374 }, { "epoch": 0.6195509375373224, "grad_norm": 2.2232749462127686, "learning_rate": 4.227987525711632e-06, "loss": 0.8364, "step": 10375 }, { "epoch": 0.619610653290338, "grad_norm": 2.4332008361816406, "learning_rate": 4.22732399973459e-06, "loss": 0.8344, "step": 10376 }, { "epoch": 0.6196703690433536, "grad_norm": 2.8590900897979736, "learning_rate": 4.226660473757548e-06, "loss": 0.7921, "step": 10377 }, { "epoch": 0.6197300847963693, "grad_norm": 1.4470696449279785, "learning_rate": 4.225996947780506e-06, "loss": 0.8317, "step": 10378 }, { "epoch": 0.6197898005493849, "grad_norm": 1.8371144533157349, "learning_rate": 4.225333421803464e-06, "loss": 0.8153, "step": 10379 }, { "epoch": 0.6198495163024006, "grad_norm": 2.1739747524261475, "learning_rate": 4.224669895826422e-06, "loss": 0.8657, "step": 10380 }, { "epoch": 0.6199092320554163, "grad_norm": 2.520646333694458, "learning_rate": 4.22400636984938e-06, "loss": 0.8436, "step": 10381 }, { "epoch": 0.6199689478084318, "grad_norm": 2.2406487464904785, "learning_rate": 4.223342843872338e-06, "loss": 0.8765, "step": 10382 }, { "epoch": 0.6200286635614475, "grad_norm": 2.8764657974243164, "learning_rate": 4.222679317895296e-06, "loss": 0.8647, "step": 10383 }, { "epoch": 0.6200883793144631, "grad_norm": 13.159602165222168, "learning_rate": 4.222015791918254e-06, "loss": 0.897, "step": 10384 }, { "epoch": 0.6201480950674788, "grad_norm": 2.3400440216064453, "learning_rate": 4.221352265941212e-06, "loss": 0.8453, "step": 10385 }, { "epoch": 0.6202078108204945, "grad_norm": 1.85003662109375, "learning_rate": 4.22068873996417e-06, "loss": 0.8211, "step": 10386 }, { "epoch": 0.6202675265735101, "grad_norm": 1.7367173433303833, "learning_rate": 4.220025213987128e-06, "loss": 0.8449, "step": 10387 }, { "epoch": 0.6203272423265257, "grad_norm": 1.6498466730117798, "learning_rate": 4.219361688010085e-06, "loss": 0.8422, "step": 10388 }, { "epoch": 0.6203869580795414, "grad_norm": 7.1366705894470215, "learning_rate": 4.218698162033044e-06, "loss": 0.8094, "step": 10389 }, { "epoch": 0.620446673832557, "grad_norm": 1.8880736827850342, "learning_rate": 4.218034636056002e-06, "loss": 0.837, "step": 10390 }, { "epoch": 0.6205063895855727, "grad_norm": 2.246187448501587, "learning_rate": 4.21737111007896e-06, "loss": 0.9146, "step": 10391 }, { "epoch": 0.6205661053385884, "grad_norm": 1.8555402755737305, "learning_rate": 4.216707584101918e-06, "loss": 0.8359, "step": 10392 }, { "epoch": 0.6206258210916039, "grad_norm": 3.235640525817871, "learning_rate": 4.216044058124876e-06, "loss": 0.8391, "step": 10393 }, { "epoch": 0.6206855368446196, "grad_norm": 2.7594473361968994, "learning_rate": 4.215380532147834e-06, "loss": 0.8352, "step": 10394 }, { "epoch": 0.6207452525976352, "grad_norm": 2.128927230834961, "learning_rate": 4.214717006170792e-06, "loss": 0.7906, "step": 10395 }, { "epoch": 0.6208049683506509, "grad_norm": 1.983138084411621, "learning_rate": 4.21405348019375e-06, "loss": 0.8444, "step": 10396 }, { "epoch": 0.6208646841036666, "grad_norm": 2.309962511062622, "learning_rate": 4.213389954216707e-06, "loss": 0.8567, "step": 10397 }, { "epoch": 0.6209243998566822, "grad_norm": 1.4800065755844116, "learning_rate": 4.2127264282396655e-06, "loss": 0.8071, "step": 10398 }, { "epoch": 0.6209841156096978, "grad_norm": 2.4549076557159424, "learning_rate": 4.2120629022626244e-06, "loss": 0.8561, "step": 10399 }, { "epoch": 0.6210438313627135, "grad_norm": 2.2474093437194824, "learning_rate": 4.211399376285582e-06, "loss": 0.8514, "step": 10400 }, { "epoch": 0.6210438313627135, "eval_text_loss": 0.9039833545684814, "eval_text_runtime": 15.1722, "eval_text_samples_per_second": 263.639, "eval_text_steps_per_second": 0.527, "step": 10400 }, { "epoch": 0.6210438313627135, "eval_image_loss": 0.6118402481079102, "eval_image_runtime": 5.0281, "eval_image_samples_per_second": 795.525, "eval_image_steps_per_second": 1.591, "step": 10400 }, { "epoch": 0.6210438313627135, "eval_video_loss": 1.0492918491363525, "eval_video_runtime": 77.1564, "eval_video_samples_per_second": 51.843, "eval_video_steps_per_second": 0.104, "step": 10400 }, { "epoch": 0.6211035471157291, "grad_norm": 3.6331186294555664, "learning_rate": 4.21073585030854e-06, "loss": 0.8436, "step": 10401 }, { "epoch": 0.6211632628687448, "grad_norm": 1.8992360830307007, "learning_rate": 4.210072324331498e-06, "loss": 0.8394, "step": 10402 }, { "epoch": 0.6212229786217605, "grad_norm": 2.570685625076294, "learning_rate": 4.209408798354456e-06, "loss": 0.8481, "step": 10403 }, { "epoch": 0.621282694374776, "grad_norm": 3.2910962104797363, "learning_rate": 4.208745272377414e-06, "loss": 0.8649, "step": 10404 }, { "epoch": 0.6213424101277917, "grad_norm": 2.1616508960723877, "learning_rate": 4.208081746400372e-06, "loss": 0.8629, "step": 10405 }, { "epoch": 0.6214021258808073, "grad_norm": 2.2712366580963135, "learning_rate": 4.20741822042333e-06, "loss": 0.8108, "step": 10406 }, { "epoch": 0.621461841633823, "grad_norm": 2.0492262840270996, "learning_rate": 4.2067546944462876e-06, "loss": 0.8585, "step": 10407 }, { "epoch": 0.6215215573868387, "grad_norm": 5.120309829711914, "learning_rate": 4.206091168469246e-06, "loss": 0.8736, "step": 10408 }, { "epoch": 0.6215812731398543, "grad_norm": 2.6003854274749756, "learning_rate": 4.205427642492204e-06, "loss": 0.8529, "step": 10409 }, { "epoch": 0.6216409888928699, "grad_norm": 1.8055083751678467, "learning_rate": 4.204764116515162e-06, "loss": 0.8446, "step": 10410 }, { "epoch": 0.6217007046458856, "grad_norm": 2.038017749786377, "learning_rate": 4.20410059053812e-06, "loss": 0.8824, "step": 10411 }, { "epoch": 0.6217604203989012, "grad_norm": 2.329970598220825, "learning_rate": 4.203437064561078e-06, "loss": 0.8741, "step": 10412 }, { "epoch": 0.6218201361519169, "grad_norm": 2.7912652492523193, "learning_rate": 4.202773538584035e-06, "loss": 0.8622, "step": 10413 }, { "epoch": 0.6218798519049326, "grad_norm": 2.1491448879241943, "learning_rate": 4.202110012606994e-06, "loss": 0.8413, "step": 10414 }, { "epoch": 0.6219395676579481, "grad_norm": 2.6174240112304688, "learning_rate": 4.201446486629952e-06, "loss": 0.8474, "step": 10415 }, { "epoch": 0.6219992834109638, "grad_norm": 2.2243399620056152, "learning_rate": 4.20078296065291e-06, "loss": 0.821, "step": 10416 }, { "epoch": 0.6220589991639794, "grad_norm": 2.3690686225891113, "learning_rate": 4.200119434675868e-06, "loss": 0.8618, "step": 10417 }, { "epoch": 0.6221187149169951, "grad_norm": 2.339057207107544, "learning_rate": 4.199455908698826e-06, "loss": 0.8512, "step": 10418 }, { "epoch": 0.6221784306700108, "grad_norm": 4.5762619972229, "learning_rate": 4.198792382721784e-06, "loss": 0.8429, "step": 10419 }, { "epoch": 0.6222381464230264, "grad_norm": 4.4651336669921875, "learning_rate": 4.198128856744742e-06, "loss": 0.8458, "step": 10420 }, { "epoch": 0.622297862176042, "grad_norm": 3.23884654045105, "learning_rate": 4.1974653307677e-06, "loss": 0.8803, "step": 10421 }, { "epoch": 0.6223575779290577, "grad_norm": 1.7409335374832153, "learning_rate": 4.196801804790658e-06, "loss": 0.8503, "step": 10422 }, { "epoch": 0.6224172936820733, "grad_norm": 1.6892955303192139, "learning_rate": 4.1961382788136155e-06, "loss": 0.8315, "step": 10423 }, { "epoch": 0.622477009435089, "grad_norm": 3.58449125289917, "learning_rate": 4.195474752836574e-06, "loss": 0.8565, "step": 10424 }, { "epoch": 0.6225367251881047, "grad_norm": 2.5980191230773926, "learning_rate": 4.194811226859532e-06, "loss": 0.8272, "step": 10425 }, { "epoch": 0.6225964409411203, "grad_norm": 1.9838035106658936, "learning_rate": 4.19414770088249e-06, "loss": 0.7941, "step": 10426 }, { "epoch": 0.6226561566941359, "grad_norm": 1.609726905822754, "learning_rate": 4.193484174905448e-06, "loss": 0.7958, "step": 10427 }, { "epoch": 0.6227158724471515, "grad_norm": 1.5813132524490356, "learning_rate": 4.192820648928406e-06, "loss": 0.8175, "step": 10428 }, { "epoch": 0.6227755882001672, "grad_norm": 2.009962320327759, "learning_rate": 4.192157122951364e-06, "loss": 0.8233, "step": 10429 }, { "epoch": 0.6228353039531829, "grad_norm": 3.5299065113067627, "learning_rate": 4.191493596974322e-06, "loss": 0.87, "step": 10430 }, { "epoch": 0.6228950197061985, "grad_norm": 2.5517845153808594, "learning_rate": 4.19083007099728e-06, "loss": 0.8774, "step": 10431 }, { "epoch": 0.6229547354592141, "grad_norm": 1.6734075546264648, "learning_rate": 4.1901665450202375e-06, "loss": 0.83, "step": 10432 }, { "epoch": 0.6230144512122298, "grad_norm": 2.9279022216796875, "learning_rate": 4.189503019043196e-06, "loss": 0.8331, "step": 10433 }, { "epoch": 0.6230741669652454, "grad_norm": 2.873657464981079, "learning_rate": 4.188839493066154e-06, "loss": 0.8491, "step": 10434 }, { "epoch": 0.6231338827182611, "grad_norm": 2.234252691268921, "learning_rate": 4.188175967089112e-06, "loss": 0.8348, "step": 10435 }, { "epoch": 0.6231935984712768, "grad_norm": 2.3261635303497314, "learning_rate": 4.18751244111207e-06, "loss": 0.8254, "step": 10436 }, { "epoch": 0.6232533142242924, "grad_norm": 2.705840826034546, "learning_rate": 4.186848915135028e-06, "loss": 0.8517, "step": 10437 }, { "epoch": 0.623313029977308, "grad_norm": 4.850791931152344, "learning_rate": 4.186185389157985e-06, "loss": 0.8621, "step": 10438 }, { "epoch": 0.6233727457303236, "grad_norm": 3.9960720539093018, "learning_rate": 4.185521863180944e-06, "loss": 0.8142, "step": 10439 }, { "epoch": 0.6234324614833393, "grad_norm": 3.4954328536987305, "learning_rate": 4.184858337203902e-06, "loss": 0.8456, "step": 10440 }, { "epoch": 0.623492177236355, "grad_norm": 2.1523518562316895, "learning_rate": 4.18419481122686e-06, "loss": 0.8379, "step": 10441 }, { "epoch": 0.6235518929893706, "grad_norm": 2.3118653297424316, "learning_rate": 4.183531285249818e-06, "loss": 0.8158, "step": 10442 }, { "epoch": 0.6236116087423862, "grad_norm": 1.5112934112548828, "learning_rate": 4.182867759272776e-06, "loss": 0.8194, "step": 10443 }, { "epoch": 0.6236713244954019, "grad_norm": 2.2488882541656494, "learning_rate": 4.182204233295734e-06, "loss": 0.8318, "step": 10444 }, { "epoch": 0.6237310402484175, "grad_norm": 2.494781732559204, "learning_rate": 4.181540707318692e-06, "loss": 0.8167, "step": 10445 }, { "epoch": 0.6237907560014332, "grad_norm": 2.517422676086426, "learning_rate": 4.18087718134165e-06, "loss": 0.8053, "step": 10446 }, { "epoch": 0.6238504717544489, "grad_norm": 2.175168037414551, "learning_rate": 4.180213655364608e-06, "loss": 0.8456, "step": 10447 }, { "epoch": 0.6239101875074645, "grad_norm": 2.705686092376709, "learning_rate": 4.1795501293875654e-06, "loss": 0.8318, "step": 10448 }, { "epoch": 0.6239699032604801, "grad_norm": 1.964841365814209, "learning_rate": 4.178886603410524e-06, "loss": 0.8322, "step": 10449 }, { "epoch": 0.6240296190134957, "grad_norm": 2.5880424976348877, "learning_rate": 4.178223077433482e-06, "loss": 0.8633, "step": 10450 }, { "epoch": 0.6240893347665114, "grad_norm": 2.1940882205963135, "learning_rate": 4.17755955145644e-06, "loss": 0.8021, "step": 10451 }, { "epoch": 0.6241490505195271, "grad_norm": 2.1501126289367676, "learning_rate": 4.176896025479398e-06, "loss": 0.8451, "step": 10452 }, { "epoch": 0.6242087662725427, "grad_norm": 3.659435272216797, "learning_rate": 4.176232499502356e-06, "loss": 0.8764, "step": 10453 }, { "epoch": 0.6242684820255583, "grad_norm": 1.7870757579803467, "learning_rate": 4.175568973525314e-06, "loss": 0.8146, "step": 10454 }, { "epoch": 0.624328197778574, "grad_norm": 2.612027883529663, "learning_rate": 4.174905447548272e-06, "loss": 0.8564, "step": 10455 }, { "epoch": 0.6243879135315896, "grad_norm": 2.1091701984405518, "learning_rate": 4.17424192157123e-06, "loss": 0.8372, "step": 10456 }, { "epoch": 0.6244476292846053, "grad_norm": 2.177870988845825, "learning_rate": 4.1735783955941875e-06, "loss": 0.8177, "step": 10457 }, { "epoch": 0.624507345037621, "grad_norm": 2.5339832305908203, "learning_rate": 4.172914869617146e-06, "loss": 0.8392, "step": 10458 }, { "epoch": 0.6245670607906366, "grad_norm": 2.62685489654541, "learning_rate": 4.172251343640104e-06, "loss": 0.8323, "step": 10459 }, { "epoch": 0.6246267765436522, "grad_norm": 2.0117592811584473, "learning_rate": 4.171587817663062e-06, "loss": 0.8552, "step": 10460 }, { "epoch": 0.6246864922966678, "grad_norm": 1.7715364694595337, "learning_rate": 4.17092429168602e-06, "loss": 0.8171, "step": 10461 }, { "epoch": 0.6247462080496835, "grad_norm": 2.119755744934082, "learning_rate": 4.170260765708978e-06, "loss": 0.8425, "step": 10462 }, { "epoch": 0.6248059238026992, "grad_norm": 1.8939471244812012, "learning_rate": 4.169597239731935e-06, "loss": 0.8478, "step": 10463 }, { "epoch": 0.6248656395557148, "grad_norm": 2.0375239849090576, "learning_rate": 4.168933713754894e-06, "loss": 0.8516, "step": 10464 }, { "epoch": 0.6249253553087304, "grad_norm": 1.5852750539779663, "learning_rate": 4.168270187777852e-06, "loss": 0.8369, "step": 10465 }, { "epoch": 0.6249850710617461, "grad_norm": 2.007448196411133, "learning_rate": 4.1676066618008096e-06, "loss": 0.8108, "step": 10466 }, { "epoch": 0.6250447868147617, "grad_norm": 1.7748162746429443, "learning_rate": 4.166943135823768e-06, "loss": 0.8634, "step": 10467 }, { "epoch": 0.6251045025677774, "grad_norm": 1.926793098449707, "learning_rate": 4.166279609846726e-06, "loss": 0.8526, "step": 10468 }, { "epoch": 0.625164218320793, "grad_norm": 2.2183990478515625, "learning_rate": 4.165616083869684e-06, "loss": 0.8521, "step": 10469 }, { "epoch": 0.6252239340738087, "grad_norm": 2.1851701736450195, "learning_rate": 4.164952557892642e-06, "loss": 0.812, "step": 10470 }, { "epoch": 0.6252836498268243, "grad_norm": 2.611980438232422, "learning_rate": 4.1642890319156e-06, "loss": 0.8368, "step": 10471 }, { "epoch": 0.6253433655798399, "grad_norm": 1.613488793373108, "learning_rate": 4.163625505938558e-06, "loss": 0.8154, "step": 10472 }, { "epoch": 0.6254030813328556, "grad_norm": 1.7289619445800781, "learning_rate": 4.162961979961515e-06, "loss": 0.8624, "step": 10473 }, { "epoch": 0.6254627970858713, "grad_norm": 2.0872459411621094, "learning_rate": 4.162298453984474e-06, "loss": 0.873, "step": 10474 }, { "epoch": 0.6255225128388869, "grad_norm": 2.0025124549865723, "learning_rate": 4.161634928007432e-06, "loss": 0.8582, "step": 10475 }, { "epoch": 0.6255822285919025, "grad_norm": 2.453674793243408, "learning_rate": 4.16097140203039e-06, "loss": 0.8724, "step": 10476 }, { "epoch": 0.6256419443449182, "grad_norm": 2.2872462272644043, "learning_rate": 4.160307876053348e-06, "loss": 0.8142, "step": 10477 }, { "epoch": 0.6257016600979338, "grad_norm": 6.981052875518799, "learning_rate": 4.159644350076306e-06, "loss": 0.8421, "step": 10478 }, { "epoch": 0.6257613758509495, "grad_norm": 1.9397168159484863, "learning_rate": 4.158980824099264e-06, "loss": 0.8507, "step": 10479 }, { "epoch": 0.6258210916039652, "grad_norm": 1.899794101715088, "learning_rate": 4.158317298122222e-06, "loss": 0.8399, "step": 10480 }, { "epoch": 0.6258808073569808, "grad_norm": 2.168715715408325, "learning_rate": 4.15765377214518e-06, "loss": 0.8533, "step": 10481 }, { "epoch": 0.6259405231099964, "grad_norm": 3.5452864170074463, "learning_rate": 4.1569902461681375e-06, "loss": 0.8237, "step": 10482 }, { "epoch": 0.626000238863012, "grad_norm": 3.774481773376465, "learning_rate": 4.1563267201910956e-06, "loss": 0.8411, "step": 10483 }, { "epoch": 0.6260599546160277, "grad_norm": 2.273245096206665, "learning_rate": 4.155663194214054e-06, "loss": 0.8613, "step": 10484 }, { "epoch": 0.6261196703690434, "grad_norm": 2.6392722129821777, "learning_rate": 4.154999668237012e-06, "loss": 0.8265, "step": 10485 }, { "epoch": 0.626179386122059, "grad_norm": 1.794204592704773, "learning_rate": 4.15433614225997e-06, "loss": 0.8467, "step": 10486 }, { "epoch": 0.6262391018750747, "grad_norm": 2.647387981414795, "learning_rate": 4.153672616282928e-06, "loss": 0.8313, "step": 10487 }, { "epoch": 0.6262988176280903, "grad_norm": 1.5855712890625, "learning_rate": 4.153009090305885e-06, "loss": 0.832, "step": 10488 }, { "epoch": 0.6263585333811059, "grad_norm": 1.6993992328643799, "learning_rate": 4.152345564328844e-06, "loss": 0.8505, "step": 10489 }, { "epoch": 0.6264182491341216, "grad_norm": 1.9314963817596436, "learning_rate": 4.151682038351802e-06, "loss": 0.8556, "step": 10490 }, { "epoch": 0.6264779648871373, "grad_norm": 4.8906569480896, "learning_rate": 4.1510185123747595e-06, "loss": 0.8245, "step": 10491 }, { "epoch": 0.6265376806401529, "grad_norm": 3.245936155319214, "learning_rate": 4.150354986397718e-06, "loss": 0.8236, "step": 10492 }, { "epoch": 0.6265973963931685, "grad_norm": 2.83833646774292, "learning_rate": 4.149691460420676e-06, "loss": 0.8343, "step": 10493 }, { "epoch": 0.6266571121461841, "grad_norm": 1.664863109588623, "learning_rate": 4.149027934443634e-06, "loss": 0.833, "step": 10494 }, { "epoch": 0.6267168278991998, "grad_norm": 2.6143860816955566, "learning_rate": 4.148364408466592e-06, "loss": 0.8208, "step": 10495 }, { "epoch": 0.6267765436522155, "grad_norm": 4.976192474365234, "learning_rate": 4.14770088248955e-06, "loss": 0.8264, "step": 10496 }, { "epoch": 0.6268362594052311, "grad_norm": 3.094653606414795, "learning_rate": 4.147037356512508e-06, "loss": 0.8294, "step": 10497 }, { "epoch": 0.6268959751582468, "grad_norm": 2.0161495208740234, "learning_rate": 4.146373830535465e-06, "loss": 0.8398, "step": 10498 }, { "epoch": 0.6269556909112624, "grad_norm": 1.5587141513824463, "learning_rate": 4.145710304558424e-06, "loss": 0.8258, "step": 10499 }, { "epoch": 0.627015406664278, "grad_norm": 2.9612598419189453, "learning_rate": 4.145046778581382e-06, "loss": 0.8125, "step": 10500 }, { "epoch": 0.6270751224172937, "grad_norm": 1.9105501174926758, "learning_rate": 4.14438325260434e-06, "loss": 0.8538, "step": 10501 }, { "epoch": 0.6271348381703093, "grad_norm": 4.934284210205078, "learning_rate": 4.143719726627298e-06, "loss": 0.8482, "step": 10502 }, { "epoch": 0.627194553923325, "grad_norm": 2.7797465324401855, "learning_rate": 4.143056200650256e-06, "loss": 0.8469, "step": 10503 }, { "epoch": 0.6272542696763406, "grad_norm": 1.8069679737091064, "learning_rate": 4.142392674673214e-06, "loss": 0.8444, "step": 10504 }, { "epoch": 0.6273139854293562, "grad_norm": 1.5385780334472656, "learning_rate": 4.141729148696172e-06, "loss": 0.8398, "step": 10505 }, { "epoch": 0.6273737011823719, "grad_norm": 1.7672216892242432, "learning_rate": 4.14106562271913e-06, "loss": 0.7996, "step": 10506 }, { "epoch": 0.6274334169353876, "grad_norm": 3.277039051055908, "learning_rate": 4.1404020967420874e-06, "loss": 0.8361, "step": 10507 }, { "epoch": 0.6274931326884032, "grad_norm": 1.7164360284805298, "learning_rate": 4.1397385707650455e-06, "loss": 0.8307, "step": 10508 }, { "epoch": 0.6275528484414189, "grad_norm": 2.3379030227661133, "learning_rate": 4.139075044788004e-06, "loss": 0.8337, "step": 10509 }, { "epoch": 0.6276125641944345, "grad_norm": 2.226440906524658, "learning_rate": 4.138411518810962e-06, "loss": 0.7998, "step": 10510 }, { "epoch": 0.6276722799474501, "grad_norm": 1.8337695598602295, "learning_rate": 4.13774799283392e-06, "loss": 0.8236, "step": 10511 }, { "epoch": 0.6277319957004658, "grad_norm": 2.0126044750213623, "learning_rate": 4.137084466856878e-06, "loss": 0.8248, "step": 10512 }, { "epoch": 0.6277917114534814, "grad_norm": 3.5569519996643066, "learning_rate": 4.136420940879835e-06, "loss": 0.8368, "step": 10513 }, { "epoch": 0.6278514272064971, "grad_norm": 2.5958755016326904, "learning_rate": 4.135757414902794e-06, "loss": 0.8421, "step": 10514 }, { "epoch": 0.6279111429595127, "grad_norm": 2.6895816326141357, "learning_rate": 4.135093888925752e-06, "loss": 0.868, "step": 10515 }, { "epoch": 0.6279708587125283, "grad_norm": 1.7556281089782715, "learning_rate": 4.1344303629487095e-06, "loss": 0.8263, "step": 10516 }, { "epoch": 0.628030574465544, "grad_norm": 2.827145576477051, "learning_rate": 4.133766836971668e-06, "loss": 0.8458, "step": 10517 }, { "epoch": 0.6280902902185597, "grad_norm": 1.9963932037353516, "learning_rate": 4.133103310994626e-06, "loss": 0.819, "step": 10518 }, { "epoch": 0.6281500059715753, "grad_norm": 2.039477825164795, "learning_rate": 4.132439785017584e-06, "loss": 0.798, "step": 10519 }, { "epoch": 0.628209721724591, "grad_norm": 2.593315362930298, "learning_rate": 4.131776259040542e-06, "loss": 0.8647, "step": 10520 }, { "epoch": 0.6282694374776066, "grad_norm": 2.0960865020751953, "learning_rate": 4.1311127330635e-06, "loss": 0.8325, "step": 10521 }, { "epoch": 0.6283291532306222, "grad_norm": 2.6890785694122314, "learning_rate": 4.130449207086458e-06, "loss": 0.8525, "step": 10522 }, { "epoch": 0.6283888689836379, "grad_norm": 2.103578805923462, "learning_rate": 4.129785681109415e-06, "loss": 0.8232, "step": 10523 }, { "epoch": 0.6284485847366535, "grad_norm": 2.1113698482513428, "learning_rate": 4.129122155132374e-06, "loss": 0.8242, "step": 10524 }, { "epoch": 0.6285083004896692, "grad_norm": 1.9459967613220215, "learning_rate": 4.1284586291553316e-06, "loss": 0.8492, "step": 10525 }, { "epoch": 0.6285680162426848, "grad_norm": 2.753056764602661, "learning_rate": 4.12779510317829e-06, "loss": 0.8894, "step": 10526 }, { "epoch": 0.6286277319957004, "grad_norm": 1.981913685798645, "learning_rate": 4.127131577201248e-06, "loss": 0.8426, "step": 10527 }, { "epoch": 0.6286874477487161, "grad_norm": 3.586254835128784, "learning_rate": 4.126468051224206e-06, "loss": 0.8513, "step": 10528 }, { "epoch": 0.6287471635017318, "grad_norm": 2.3563175201416016, "learning_rate": 4.125804525247164e-06, "loss": 0.8483, "step": 10529 }, { "epoch": 0.6288068792547474, "grad_norm": 2.446829319000244, "learning_rate": 4.125140999270122e-06, "loss": 0.7921, "step": 10530 }, { "epoch": 0.6288665950077631, "grad_norm": 2.492565870285034, "learning_rate": 4.12447747329308e-06, "loss": 0.8546, "step": 10531 }, { "epoch": 0.6289263107607787, "grad_norm": 1.7593917846679688, "learning_rate": 4.123813947316037e-06, "loss": 0.8709, "step": 10532 }, { "epoch": 0.6289860265137943, "grad_norm": 1.7527422904968262, "learning_rate": 4.1231504213389955e-06, "loss": 0.8492, "step": 10533 }, { "epoch": 0.62904574226681, "grad_norm": 2.909031391143799, "learning_rate": 4.122486895361954e-06, "loss": 0.8509, "step": 10534 }, { "epoch": 0.6291054580198256, "grad_norm": 1.6027867794036865, "learning_rate": 4.121823369384912e-06, "loss": 0.8522, "step": 10535 }, { "epoch": 0.6291651737728413, "grad_norm": 3.087059736251831, "learning_rate": 4.12115984340787e-06, "loss": 0.8275, "step": 10536 }, { "epoch": 0.6292248895258569, "grad_norm": 2.164341449737549, "learning_rate": 4.120496317430828e-06, "loss": 0.8909, "step": 10537 }, { "epoch": 0.6292846052788725, "grad_norm": 9.048171043395996, "learning_rate": 4.119832791453785e-06, "loss": 0.8664, "step": 10538 }, { "epoch": 0.6293443210318882, "grad_norm": 2.357156753540039, "learning_rate": 4.119169265476744e-06, "loss": 0.8547, "step": 10539 }, { "epoch": 0.6294040367849039, "grad_norm": 2.2632031440734863, "learning_rate": 4.118505739499702e-06, "loss": 0.8581, "step": 10540 }, { "epoch": 0.6294637525379195, "grad_norm": 1.6253548860549927, "learning_rate": 4.1178422135226595e-06, "loss": 0.8683, "step": 10541 }, { "epoch": 0.6295234682909352, "grad_norm": 2.2311418056488037, "learning_rate": 4.1171786875456176e-06, "loss": 0.7891, "step": 10542 }, { "epoch": 0.6295831840439507, "grad_norm": 1.9447523355484009, "learning_rate": 4.116515161568576e-06, "loss": 0.8461, "step": 10543 }, { "epoch": 0.6296428997969664, "grad_norm": 1.9585663080215454, "learning_rate": 4.115851635591534e-06, "loss": 0.8696, "step": 10544 }, { "epoch": 0.6297026155499821, "grad_norm": 2.6245291233062744, "learning_rate": 4.115188109614492e-06, "loss": 0.8597, "step": 10545 }, { "epoch": 0.6297623313029977, "grad_norm": 2.251312732696533, "learning_rate": 4.11452458363745e-06, "loss": 0.8605, "step": 10546 }, { "epoch": 0.6298220470560134, "grad_norm": 3.9576377868652344, "learning_rate": 4.113861057660408e-06, "loss": 0.839, "step": 10547 }, { "epoch": 0.629881762809029, "grad_norm": 2.0033912658691406, "learning_rate": 4.113197531683365e-06, "loss": 0.8341, "step": 10548 }, { "epoch": 0.6299414785620446, "grad_norm": 2.6894748210906982, "learning_rate": 4.112534005706324e-06, "loss": 0.8526, "step": 10549 }, { "epoch": 0.6300011943150603, "grad_norm": 2.326488733291626, "learning_rate": 4.1118704797292815e-06, "loss": 0.8269, "step": 10550 }, { "epoch": 0.630060910068076, "grad_norm": 4.174574375152588, "learning_rate": 4.11120695375224e-06, "loss": 0.8459, "step": 10551 }, { "epoch": 0.6301206258210916, "grad_norm": 1.5439589023590088, "learning_rate": 4.110543427775198e-06, "loss": 0.8812, "step": 10552 }, { "epoch": 0.6301803415741073, "grad_norm": 1.7545697689056396, "learning_rate": 4.109879901798156e-06, "loss": 0.8398, "step": 10553 }, { "epoch": 0.6302400573271228, "grad_norm": 1.9529153108596802, "learning_rate": 4.109216375821114e-06, "loss": 0.8484, "step": 10554 }, { "epoch": 0.6302997730801385, "grad_norm": 1.749838948249817, "learning_rate": 4.108552849844072e-06, "loss": 0.811, "step": 10555 }, { "epoch": 0.6303594888331542, "grad_norm": 2.018538236618042, "learning_rate": 4.10788932386703e-06, "loss": 0.8009, "step": 10556 }, { "epoch": 0.6304192045861698, "grad_norm": 2.15903639793396, "learning_rate": 4.107225797889987e-06, "loss": 0.8241, "step": 10557 }, { "epoch": 0.6304789203391855, "grad_norm": 3.3498148918151855, "learning_rate": 4.1065622719129455e-06, "loss": 0.8692, "step": 10558 }, { "epoch": 0.6305386360922012, "grad_norm": 2.6322786808013916, "learning_rate": 4.105898745935904e-06, "loss": 0.8399, "step": 10559 }, { "epoch": 0.6305983518452167, "grad_norm": 2.082597017288208, "learning_rate": 4.105235219958862e-06, "loss": 0.857, "step": 10560 }, { "epoch": 0.6306580675982324, "grad_norm": 2.3151257038116455, "learning_rate": 4.10457169398182e-06, "loss": 0.8578, "step": 10561 }, { "epoch": 0.6307177833512481, "grad_norm": 2.2815799713134766, "learning_rate": 4.103908168004778e-06, "loss": 0.8566, "step": 10562 }, { "epoch": 0.6307774991042637, "grad_norm": 1.7795684337615967, "learning_rate": 4.103244642027735e-06, "loss": 0.8371, "step": 10563 }, { "epoch": 0.6308372148572794, "grad_norm": 1.9500579833984375, "learning_rate": 4.102581116050694e-06, "loss": 0.8428, "step": 10564 }, { "epoch": 0.630896930610295, "grad_norm": 2.685788869857788, "learning_rate": 4.101917590073652e-06, "loss": 0.8025, "step": 10565 }, { "epoch": 0.6309566463633106, "grad_norm": 2.0471763610839844, "learning_rate": 4.1012540640966094e-06, "loss": 0.839, "step": 10566 }, { "epoch": 0.6310163621163263, "grad_norm": 1.9406907558441162, "learning_rate": 4.1005905381195675e-06, "loss": 0.8366, "step": 10567 }, { "epoch": 0.631076077869342, "grad_norm": 1.8458256721496582, "learning_rate": 4.099927012142526e-06, "loss": 0.7998, "step": 10568 }, { "epoch": 0.6311357936223576, "grad_norm": 2.118964195251465, "learning_rate": 4.099263486165484e-06, "loss": 0.8712, "step": 10569 }, { "epoch": 0.6311955093753733, "grad_norm": 1.9284391403198242, "learning_rate": 4.098599960188442e-06, "loss": 0.8523, "step": 10570 }, { "epoch": 0.6312552251283888, "grad_norm": 2.37715220451355, "learning_rate": 4.0979364342114e-06, "loss": 0.855, "step": 10571 }, { "epoch": 0.6313149408814045, "grad_norm": 3.0120327472686768, "learning_rate": 4.097272908234358e-06, "loss": 0.8397, "step": 10572 }, { "epoch": 0.6313746566344202, "grad_norm": 3.2375519275665283, "learning_rate": 4.096609382257315e-06, "loss": 0.8585, "step": 10573 }, { "epoch": 0.6314343723874358, "grad_norm": 2.3925249576568604, "learning_rate": 4.095945856280274e-06, "loss": 0.8696, "step": 10574 }, { "epoch": 0.6314940881404515, "grad_norm": 2.087001085281372, "learning_rate": 4.0952823303032315e-06, "loss": 0.8319, "step": 10575 }, { "epoch": 0.631553803893467, "grad_norm": 3.3536460399627686, "learning_rate": 4.09461880432619e-06, "loss": 0.8512, "step": 10576 }, { "epoch": 0.6316135196464827, "grad_norm": 2.1363985538482666, "learning_rate": 4.093955278349148e-06, "loss": 0.8578, "step": 10577 }, { "epoch": 0.6316732353994984, "grad_norm": 2.154768466949463, "learning_rate": 4.093291752372106e-06, "loss": 0.8065, "step": 10578 }, { "epoch": 0.631732951152514, "grad_norm": 1.7318775653839111, "learning_rate": 4.092628226395064e-06, "loss": 0.8392, "step": 10579 }, { "epoch": 0.6317926669055297, "grad_norm": 1.9181402921676636, "learning_rate": 4.091964700418022e-06, "loss": 0.8606, "step": 10580 }, { "epoch": 0.6318523826585454, "grad_norm": 3.237844705581665, "learning_rate": 4.09130117444098e-06, "loss": 0.8767, "step": 10581 }, { "epoch": 0.6319120984115609, "grad_norm": 2.167768955230713, "learning_rate": 4.090637648463937e-06, "loss": 0.8534, "step": 10582 }, { "epoch": 0.6319718141645766, "grad_norm": 3.0800840854644775, "learning_rate": 4.0899741224868955e-06, "loss": 0.8352, "step": 10583 }, { "epoch": 0.6320315299175923, "grad_norm": 2.383291006088257, "learning_rate": 4.0893105965098536e-06, "loss": 0.8373, "step": 10584 }, { "epoch": 0.6320912456706079, "grad_norm": 1.9682084321975708, "learning_rate": 4.088647070532812e-06, "loss": 0.8306, "step": 10585 }, { "epoch": 0.6321509614236236, "grad_norm": 1.9850854873657227, "learning_rate": 4.08798354455577e-06, "loss": 0.8203, "step": 10586 }, { "epoch": 0.6322106771766391, "grad_norm": 3.3822743892669678, "learning_rate": 4.087320018578728e-06, "loss": 0.8718, "step": 10587 }, { "epoch": 0.6322703929296548, "grad_norm": 2.1669654846191406, "learning_rate": 4.086656492601685e-06, "loss": 0.835, "step": 10588 }, { "epoch": 0.6323301086826705, "grad_norm": 2.0414609909057617, "learning_rate": 4.085992966624644e-06, "loss": 0.8459, "step": 10589 }, { "epoch": 0.6323898244356861, "grad_norm": 2.2923691272735596, "learning_rate": 4.085329440647602e-06, "loss": 0.8682, "step": 10590 }, { "epoch": 0.6324495401887018, "grad_norm": 3.072848320007324, "learning_rate": 4.084665914670559e-06, "loss": 0.8776, "step": 10591 }, { "epoch": 0.6325092559417175, "grad_norm": 2.3337764739990234, "learning_rate": 4.0840023886935175e-06, "loss": 0.8403, "step": 10592 }, { "epoch": 0.632568971694733, "grad_norm": 2.3366165161132812, "learning_rate": 4.083338862716476e-06, "loss": 0.8473, "step": 10593 }, { "epoch": 0.6326286874477487, "grad_norm": 2.6935102939605713, "learning_rate": 4.082675336739434e-06, "loss": 0.8376, "step": 10594 }, { "epoch": 0.6326884032007644, "grad_norm": 1.8175877332687378, "learning_rate": 4.082011810762392e-06, "loss": 0.8315, "step": 10595 }, { "epoch": 0.63274811895378, "grad_norm": 1.5149556398391724, "learning_rate": 4.08134828478535e-06, "loss": 0.8467, "step": 10596 }, { "epoch": 0.6328078347067957, "grad_norm": 2.1186459064483643, "learning_rate": 4.080684758808308e-06, "loss": 0.8514, "step": 10597 }, { "epoch": 0.6328675504598112, "grad_norm": 1.5439631938934326, "learning_rate": 4.080021232831265e-06, "loss": 0.8494, "step": 10598 }, { "epoch": 0.6329272662128269, "grad_norm": 2.1586828231811523, "learning_rate": 4.079357706854224e-06, "loss": 0.8171, "step": 10599 }, { "epoch": 0.6329869819658426, "grad_norm": 2.2427690029144287, "learning_rate": 4.0786941808771815e-06, "loss": 0.8257, "step": 10600 }, { "epoch": 0.6329869819658426, "eval_text_loss": 0.903321385383606, "eval_text_runtime": 15.2513, "eval_text_samples_per_second": 262.272, "eval_text_steps_per_second": 0.525, "step": 10600 }, { "epoch": 0.6329869819658426, "eval_image_loss": 0.60946124792099, "eval_image_runtime": 4.9899, "eval_image_samples_per_second": 801.613, "eval_image_steps_per_second": 1.603, "step": 10600 }, { "epoch": 0.6329869819658426, "eval_video_loss": 1.046971082687378, "eval_video_runtime": 76.8141, "eval_video_samples_per_second": 52.074, "eval_video_steps_per_second": 0.104, "step": 10600 }, { "epoch": 0.6330466977188582, "grad_norm": 2.3492586612701416, "learning_rate": 4.0780306549001396e-06, "loss": 0.8077, "step": 10601 }, { "epoch": 0.6331064134718739, "grad_norm": 2.6377947330474854, "learning_rate": 4.077367128923098e-06, "loss": 0.8425, "step": 10602 }, { "epoch": 0.6331661292248896, "grad_norm": 4.198101043701172, "learning_rate": 4.076703602946056e-06, "loss": 0.8895, "step": 10603 }, { "epoch": 0.6332258449779051, "grad_norm": 1.6901353597640991, "learning_rate": 4.076040076969014e-06, "loss": 0.8409, "step": 10604 }, { "epoch": 0.6332855607309208, "grad_norm": 3.1368844509124756, "learning_rate": 4.075376550991972e-06, "loss": 0.8626, "step": 10605 }, { "epoch": 0.6333452764839365, "grad_norm": 2.018531084060669, "learning_rate": 4.07471302501493e-06, "loss": 0.8416, "step": 10606 }, { "epoch": 0.6334049922369521, "grad_norm": 4.1385273933410645, "learning_rate": 4.074049499037887e-06, "loss": 0.8168, "step": 10607 }, { "epoch": 0.6334647079899678, "grad_norm": 2.5100202560424805, "learning_rate": 4.0733859730608454e-06, "loss": 0.8212, "step": 10608 }, { "epoch": 0.6335244237429833, "grad_norm": 2.3310630321502686, "learning_rate": 4.0727224470838035e-06, "loss": 0.8186, "step": 10609 }, { "epoch": 0.633584139495999, "grad_norm": 1.9565153121948242, "learning_rate": 4.072058921106762e-06, "loss": 0.8584, "step": 10610 }, { "epoch": 0.6336438552490147, "grad_norm": 3.565291404724121, "learning_rate": 4.07139539512972e-06, "loss": 0.8403, "step": 10611 }, { "epoch": 0.6337035710020303, "grad_norm": 3.165919065475464, "learning_rate": 4.070731869152678e-06, "loss": 0.8229, "step": 10612 }, { "epoch": 0.633763286755046, "grad_norm": 1.7904638051986694, "learning_rate": 4.070068343175635e-06, "loss": 0.8287, "step": 10613 }, { "epoch": 0.6338230025080617, "grad_norm": 2.401303768157959, "learning_rate": 4.069404817198594e-06, "loss": 0.8545, "step": 10614 }, { "epoch": 0.6338827182610772, "grad_norm": 2.001451253890991, "learning_rate": 4.068741291221552e-06, "loss": 0.837, "step": 10615 }, { "epoch": 0.6339424340140929, "grad_norm": 2.3245115280151367, "learning_rate": 4.068077765244509e-06, "loss": 0.8268, "step": 10616 }, { "epoch": 0.6340021497671086, "grad_norm": 1.674902319908142, "learning_rate": 4.0674142392674675e-06, "loss": 0.8453, "step": 10617 }, { "epoch": 0.6340618655201242, "grad_norm": 1.9356000423431396, "learning_rate": 4.066750713290426e-06, "loss": 0.8398, "step": 10618 }, { "epoch": 0.6341215812731399, "grad_norm": 2.0024383068084717, "learning_rate": 4.066087187313384e-06, "loss": 0.8686, "step": 10619 }, { "epoch": 0.6341812970261556, "grad_norm": 1.8496427536010742, "learning_rate": 4.065423661336342e-06, "loss": 0.8287, "step": 10620 }, { "epoch": 0.6342410127791711, "grad_norm": 1.8099955320358276, "learning_rate": 4.0647601353593e-06, "loss": 0.8482, "step": 10621 }, { "epoch": 0.6343007285321868, "grad_norm": 2.3769073486328125, "learning_rate": 4.064096609382258e-06, "loss": 0.8538, "step": 10622 }, { "epoch": 0.6343604442852024, "grad_norm": 3.1074225902557373, "learning_rate": 4.063433083405215e-06, "loss": 0.8506, "step": 10623 }, { "epoch": 0.6344201600382181, "grad_norm": 2.4604077339172363, "learning_rate": 4.062769557428174e-06, "loss": 0.8587, "step": 10624 }, { "epoch": 0.6344798757912338, "grad_norm": 6.107905387878418, "learning_rate": 4.0621060314511314e-06, "loss": 0.8155, "step": 10625 }, { "epoch": 0.6345395915442493, "grad_norm": 2.262361764907837, "learning_rate": 4.0614425054740895e-06, "loss": 0.8456, "step": 10626 }, { "epoch": 0.634599307297265, "grad_norm": 1.869397759437561, "learning_rate": 4.060778979497048e-06, "loss": 0.8319, "step": 10627 }, { "epoch": 0.6346590230502807, "grad_norm": 2.2735259532928467, "learning_rate": 4.060115453520006e-06, "loss": 0.8633, "step": 10628 }, { "epoch": 0.6347187388032963, "grad_norm": 1.9578857421875, "learning_rate": 4.059451927542964e-06, "loss": 0.7886, "step": 10629 }, { "epoch": 0.634778454556312, "grad_norm": 1.6752676963806152, "learning_rate": 4.058788401565922e-06, "loss": 0.8927, "step": 10630 }, { "epoch": 0.6348381703093277, "grad_norm": 1.9624868631362915, "learning_rate": 4.05812487558888e-06, "loss": 0.8797, "step": 10631 }, { "epoch": 0.6348978860623432, "grad_norm": 3.457805633544922, "learning_rate": 4.057461349611837e-06, "loss": 0.8262, "step": 10632 }, { "epoch": 0.6349576018153589, "grad_norm": 2.3683433532714844, "learning_rate": 4.056797823634795e-06, "loss": 0.8409, "step": 10633 }, { "epoch": 0.6350173175683745, "grad_norm": 1.954936146736145, "learning_rate": 4.0561342976577535e-06, "loss": 0.8165, "step": 10634 }, { "epoch": 0.6350770333213902, "grad_norm": 2.3077869415283203, "learning_rate": 4.055470771680712e-06, "loss": 0.8278, "step": 10635 }, { "epoch": 0.6351367490744059, "grad_norm": 3.5415658950805664, "learning_rate": 4.05480724570367e-06, "loss": 0.8551, "step": 10636 }, { "epoch": 0.6351964648274214, "grad_norm": 2.041853666305542, "learning_rate": 4.054143719726628e-06, "loss": 0.8331, "step": 10637 }, { "epoch": 0.6352561805804371, "grad_norm": 2.434046745300293, "learning_rate": 4.053480193749585e-06, "loss": 0.8479, "step": 10638 }, { "epoch": 0.6353158963334528, "grad_norm": 2.8940773010253906, "learning_rate": 4.052816667772544e-06, "loss": 0.8178, "step": 10639 }, { "epoch": 0.6353756120864684, "grad_norm": 1.448257327079773, "learning_rate": 4.052153141795502e-06, "loss": 0.8269, "step": 10640 }, { "epoch": 0.6354353278394841, "grad_norm": 10.731263160705566, "learning_rate": 4.051489615818459e-06, "loss": 0.8175, "step": 10641 }, { "epoch": 0.6354950435924998, "grad_norm": 3.6728999614715576, "learning_rate": 4.0508260898414175e-06, "loss": 0.7924, "step": 10642 }, { "epoch": 0.6355547593455153, "grad_norm": 9.015380859375, "learning_rate": 4.0501625638643756e-06, "loss": 0.848, "step": 10643 }, { "epoch": 0.635614475098531, "grad_norm": 3.48547101020813, "learning_rate": 4.049499037887334e-06, "loss": 0.8266, "step": 10644 }, { "epoch": 0.6356741908515466, "grad_norm": 1.9857444763183594, "learning_rate": 4.048835511910292e-06, "loss": 0.8293, "step": 10645 }, { "epoch": 0.6357339066045623, "grad_norm": 2.222313404083252, "learning_rate": 4.04817198593325e-06, "loss": 0.8189, "step": 10646 }, { "epoch": 0.635793622357578, "grad_norm": 2.050400972366333, "learning_rate": 4.047508459956208e-06, "loss": 0.8439, "step": 10647 }, { "epoch": 0.6358533381105935, "grad_norm": 5.999031066894531, "learning_rate": 4.046844933979165e-06, "loss": 0.8434, "step": 10648 }, { "epoch": 0.6359130538636092, "grad_norm": 1.683782696723938, "learning_rate": 4.046181408002124e-06, "loss": 0.8762, "step": 10649 }, { "epoch": 0.6359727696166249, "grad_norm": 2.3030991554260254, "learning_rate": 4.045517882025081e-06, "loss": 0.819, "step": 10650 }, { "epoch": 0.6360324853696405, "grad_norm": 1.7787725925445557, "learning_rate": 4.0448543560480395e-06, "loss": 0.8674, "step": 10651 }, { "epoch": 0.6360922011226562, "grad_norm": 3.2375521659851074, "learning_rate": 4.044190830070998e-06, "loss": 0.8537, "step": 10652 }, { "epoch": 0.6361519168756719, "grad_norm": 1.6719437837600708, "learning_rate": 4.043527304093956e-06, "loss": 0.8087, "step": 10653 }, { "epoch": 0.6362116326286874, "grad_norm": 1.8611654043197632, "learning_rate": 4.042863778116914e-06, "loss": 0.868, "step": 10654 }, { "epoch": 0.6362713483817031, "grad_norm": 10.255297660827637, "learning_rate": 4.042200252139872e-06, "loss": 0.8378, "step": 10655 }, { "epoch": 0.6363310641347187, "grad_norm": 2.832887887954712, "learning_rate": 4.04153672616283e-06, "loss": 0.8411, "step": 10656 }, { "epoch": 0.6363907798877344, "grad_norm": 1.6738687753677368, "learning_rate": 4.040873200185787e-06, "loss": 0.8451, "step": 10657 }, { "epoch": 0.6364504956407501, "grad_norm": 2.322082757949829, "learning_rate": 4.040209674208745e-06, "loss": 0.8613, "step": 10658 }, { "epoch": 0.6365102113937656, "grad_norm": 2.0398008823394775, "learning_rate": 4.0395461482317035e-06, "loss": 0.825, "step": 10659 }, { "epoch": 0.6365699271467813, "grad_norm": 3.867980718612671, "learning_rate": 4.0388826222546616e-06, "loss": 0.8212, "step": 10660 }, { "epoch": 0.636629642899797, "grad_norm": 1.6401405334472656, "learning_rate": 4.03821909627762e-06, "loss": 0.8332, "step": 10661 }, { "epoch": 0.6366893586528126, "grad_norm": 2.1710596084594727, "learning_rate": 4.037555570300578e-06, "loss": 0.8393, "step": 10662 }, { "epoch": 0.6367490744058283, "grad_norm": 2.1048126220703125, "learning_rate": 4.036892044323535e-06, "loss": 0.7969, "step": 10663 }, { "epoch": 0.636808790158844, "grad_norm": 2.2889645099639893, "learning_rate": 4.036228518346493e-06, "loss": 0.851, "step": 10664 }, { "epoch": 0.6368685059118595, "grad_norm": 2.705446243286133, "learning_rate": 4.035564992369452e-06, "loss": 0.8255, "step": 10665 }, { "epoch": 0.6369282216648752, "grad_norm": 2.017112970352173, "learning_rate": 4.034901466392409e-06, "loss": 0.822, "step": 10666 }, { "epoch": 0.6369879374178908, "grad_norm": 3.408592939376831, "learning_rate": 4.0342379404153674e-06, "loss": 0.8166, "step": 10667 }, { "epoch": 0.6370476531709065, "grad_norm": 3.5947062969207764, "learning_rate": 4.0335744144383255e-06, "loss": 0.8327, "step": 10668 }, { "epoch": 0.6371073689239222, "grad_norm": 1.6825850009918213, "learning_rate": 4.032910888461284e-06, "loss": 0.8037, "step": 10669 }, { "epoch": 0.6371670846769377, "grad_norm": 1.8611055612564087, "learning_rate": 4.032247362484242e-06, "loss": 0.8397, "step": 10670 }, { "epoch": 0.6372268004299534, "grad_norm": 4.896934509277344, "learning_rate": 4.0315838365072e-06, "loss": 0.8801, "step": 10671 }, { "epoch": 0.637286516182969, "grad_norm": 2.2235300540924072, "learning_rate": 4.030920310530158e-06, "loss": 0.8336, "step": 10672 }, { "epoch": 0.6373462319359847, "grad_norm": 2.2613916397094727, "learning_rate": 4.030256784553115e-06, "loss": 0.8101, "step": 10673 }, { "epoch": 0.6374059476890004, "grad_norm": 1.7347227334976196, "learning_rate": 4.029593258576073e-06, "loss": 0.9079, "step": 10674 }, { "epoch": 0.637465663442016, "grad_norm": 2.3791463375091553, "learning_rate": 4.028929732599031e-06, "loss": 0.8491, "step": 10675 }, { "epoch": 0.6375253791950316, "grad_norm": 2.2024478912353516, "learning_rate": 4.0282662066219895e-06, "loss": 0.7964, "step": 10676 }, { "epoch": 0.6375850949480473, "grad_norm": 2.4997243881225586, "learning_rate": 4.027602680644948e-06, "loss": 0.8603, "step": 10677 }, { "epoch": 0.6376448107010629, "grad_norm": 1.4364315271377563, "learning_rate": 4.026939154667906e-06, "loss": 0.86, "step": 10678 }, { "epoch": 0.6377045264540786, "grad_norm": 2.775973320007324, "learning_rate": 4.026275628690863e-06, "loss": 0.8414, "step": 10679 }, { "epoch": 0.6377642422070943, "grad_norm": 3.569990634918213, "learning_rate": 4.025612102713822e-06, "loss": 0.8362, "step": 10680 }, { "epoch": 0.6378239579601098, "grad_norm": 1.7795382738113403, "learning_rate": 4.02494857673678e-06, "loss": 0.8611, "step": 10681 }, { "epoch": 0.6378836737131255, "grad_norm": 2.1183738708496094, "learning_rate": 4.024285050759737e-06, "loss": 0.8113, "step": 10682 }, { "epoch": 0.6379433894661412, "grad_norm": 2.1518750190734863, "learning_rate": 4.023621524782695e-06, "loss": 0.8815, "step": 10683 }, { "epoch": 0.6380031052191568, "grad_norm": 2.3716635704040527, "learning_rate": 4.0229579988056534e-06, "loss": 0.8462, "step": 10684 }, { "epoch": 0.6380628209721725, "grad_norm": 2.0604257583618164, "learning_rate": 4.0222944728286115e-06, "loss": 0.8521, "step": 10685 }, { "epoch": 0.6381225367251881, "grad_norm": 2.141364336013794, "learning_rate": 4.02163094685157e-06, "loss": 0.8058, "step": 10686 }, { "epoch": 0.6381822524782037, "grad_norm": 1.6762510538101196, "learning_rate": 4.020967420874528e-06, "loss": 0.8471, "step": 10687 }, { "epoch": 0.6382419682312194, "grad_norm": 1.8868043422698975, "learning_rate": 4.020303894897485e-06, "loss": 0.8359, "step": 10688 }, { "epoch": 0.638301683984235, "grad_norm": 3.09867787361145, "learning_rate": 4.019640368920443e-06, "loss": 0.8258, "step": 10689 }, { "epoch": 0.6383613997372507, "grad_norm": 3.558635711669922, "learning_rate": 4.018976842943402e-06, "loss": 0.8651, "step": 10690 }, { "epoch": 0.6384211154902664, "grad_norm": 1.4779127836227417, "learning_rate": 4.018313316966359e-06, "loss": 0.7757, "step": 10691 }, { "epoch": 0.638480831243282, "grad_norm": 1.9673463106155396, "learning_rate": 4.017649790989317e-06, "loss": 0.8176, "step": 10692 }, { "epoch": 0.6385405469962976, "grad_norm": 2.615478038787842, "learning_rate": 4.0169862650122755e-06, "loss": 0.8037, "step": 10693 }, { "epoch": 0.6386002627493133, "grad_norm": 2.030367136001587, "learning_rate": 4.016322739035234e-06, "loss": 0.8358, "step": 10694 }, { "epoch": 0.6386599785023289, "grad_norm": 2.503199338912964, "learning_rate": 4.015659213058192e-06, "loss": 0.842, "step": 10695 }, { "epoch": 0.6387196942553446, "grad_norm": 2.1176607608795166, "learning_rate": 4.01499568708115e-06, "loss": 0.8754, "step": 10696 }, { "epoch": 0.6387794100083602, "grad_norm": 2.872929573059082, "learning_rate": 4.014332161104108e-06, "loss": 0.8412, "step": 10697 }, { "epoch": 0.6388391257613758, "grad_norm": 2.2670092582702637, "learning_rate": 4.013668635127065e-06, "loss": 0.8627, "step": 10698 }, { "epoch": 0.6388988415143915, "grad_norm": 2.14477276802063, "learning_rate": 4.013005109150023e-06, "loss": 0.8483, "step": 10699 }, { "epoch": 0.6389585572674071, "grad_norm": 1.6578974723815918, "learning_rate": 4.012341583172981e-06, "loss": 0.8191, "step": 10700 }, { "epoch": 0.6390182730204228, "grad_norm": 2.028465747833252, "learning_rate": 4.0116780571959395e-06, "loss": 0.8256, "step": 10701 }, { "epoch": 0.6390779887734385, "grad_norm": 3.3450937271118164, "learning_rate": 4.0110145312188976e-06, "loss": 0.8653, "step": 10702 }, { "epoch": 0.6391377045264541, "grad_norm": 2.528332471847534, "learning_rate": 4.010351005241856e-06, "loss": 0.8028, "step": 10703 }, { "epoch": 0.6391974202794697, "grad_norm": 2.325103759765625, "learning_rate": 4.009687479264813e-06, "loss": 0.8664, "step": 10704 }, { "epoch": 0.6392571360324854, "grad_norm": 1.8037598133087158, "learning_rate": 4.009023953287772e-06, "loss": 0.8231, "step": 10705 }, { "epoch": 0.639316851785501, "grad_norm": 2.2222771644592285, "learning_rate": 4.00836042731073e-06, "loss": 0.8014, "step": 10706 }, { "epoch": 0.6393765675385167, "grad_norm": 1.8669466972351074, "learning_rate": 4.007696901333687e-06, "loss": 0.8259, "step": 10707 }, { "epoch": 0.6394362832915323, "grad_norm": 3.6779279708862305, "learning_rate": 4.007033375356645e-06, "loss": 0.8187, "step": 10708 }, { "epoch": 0.6394959990445479, "grad_norm": 3.4905049800872803, "learning_rate": 4.006369849379603e-06, "loss": 0.8445, "step": 10709 }, { "epoch": 0.6395557147975636, "grad_norm": 2.709465742111206, "learning_rate": 4.0057063234025615e-06, "loss": 0.802, "step": 10710 }, { "epoch": 0.6396154305505792, "grad_norm": 2.0635194778442383, "learning_rate": 4.00504279742552e-06, "loss": 0.8759, "step": 10711 }, { "epoch": 0.6396751463035949, "grad_norm": 2.2723255157470703, "learning_rate": 4.004379271448478e-06, "loss": 0.8459, "step": 10712 }, { "epoch": 0.6397348620566106, "grad_norm": 2.036583423614502, "learning_rate": 4.003715745471435e-06, "loss": 0.8323, "step": 10713 }, { "epoch": 0.6397945778096262, "grad_norm": 2.1193411350250244, "learning_rate": 4.003052219494393e-06, "loss": 0.8458, "step": 10714 }, { "epoch": 0.6398542935626418, "grad_norm": 3.201075315475464, "learning_rate": 4.002388693517352e-06, "loss": 0.8385, "step": 10715 }, { "epoch": 0.6399140093156575, "grad_norm": 2.0252718925476074, "learning_rate": 4.001725167540309e-06, "loss": 0.8279, "step": 10716 }, { "epoch": 0.6399737250686731, "grad_norm": 2.658827304840088, "learning_rate": 4.001061641563267e-06, "loss": 0.8369, "step": 10717 }, { "epoch": 0.6400334408216888, "grad_norm": 2.3016061782836914, "learning_rate": 4.0003981155862255e-06, "loss": 0.8483, "step": 10718 }, { "epoch": 0.6400931565747044, "grad_norm": 2.5098795890808105, "learning_rate": 3.9997345896091836e-06, "loss": 0.8158, "step": 10719 }, { "epoch": 0.64015287232772, "grad_norm": 2.6258270740509033, "learning_rate": 3.999071063632142e-06, "loss": 0.8284, "step": 10720 }, { "epoch": 0.6402125880807357, "grad_norm": 1.991445779800415, "learning_rate": 3.9984075376551e-06, "loss": 0.8463, "step": 10721 }, { "epoch": 0.6402723038337513, "grad_norm": 2.9510600566864014, "learning_rate": 3.997744011678058e-06, "loss": 0.8793, "step": 10722 }, { "epoch": 0.640332019586767, "grad_norm": 3.0157580375671387, "learning_rate": 3.997080485701015e-06, "loss": 0.8449, "step": 10723 }, { "epoch": 0.6403917353397827, "grad_norm": 2.9402737617492676, "learning_rate": 3.996416959723973e-06, "loss": 0.8393, "step": 10724 }, { "epoch": 0.6404514510927983, "grad_norm": 2.5876059532165527, "learning_rate": 3.995753433746931e-06, "loss": 0.8752, "step": 10725 }, { "epoch": 0.6405111668458139, "grad_norm": 1.7079938650131226, "learning_rate": 3.9950899077698894e-06, "loss": 0.8383, "step": 10726 }, { "epoch": 0.6405708825988295, "grad_norm": 3.421729564666748, "learning_rate": 3.9944263817928475e-06, "loss": 0.8481, "step": 10727 }, { "epoch": 0.6406305983518452, "grad_norm": 2.01735258102417, "learning_rate": 3.993762855815806e-06, "loss": 0.8304, "step": 10728 }, { "epoch": 0.6406903141048609, "grad_norm": 2.3561267852783203, "learning_rate": 3.993099329838763e-06, "loss": 0.8615, "step": 10729 }, { "epoch": 0.6407500298578765, "grad_norm": 1.8605060577392578, "learning_rate": 3.992435803861722e-06, "loss": 0.8591, "step": 10730 }, { "epoch": 0.6408097456108921, "grad_norm": 2.040118455886841, "learning_rate": 3.99177227788468e-06, "loss": 0.812, "step": 10731 }, { "epoch": 0.6408694613639078, "grad_norm": 1.9702545404434204, "learning_rate": 3.991108751907637e-06, "loss": 0.8246, "step": 10732 }, { "epoch": 0.6409291771169234, "grad_norm": 2.133880138397217, "learning_rate": 3.990445225930595e-06, "loss": 0.8351, "step": 10733 }, { "epoch": 0.6409888928699391, "grad_norm": 4.2866058349609375, "learning_rate": 3.989781699953553e-06, "loss": 0.851, "step": 10734 }, { "epoch": 0.6410486086229548, "grad_norm": 2.453596830368042, "learning_rate": 3.9891181739765115e-06, "loss": 0.8437, "step": 10735 }, { "epoch": 0.6411083243759704, "grad_norm": 2.1369664669036865, "learning_rate": 3.98845464799947e-06, "loss": 0.8508, "step": 10736 }, { "epoch": 0.641168040128986, "grad_norm": 2.7173826694488525, "learning_rate": 3.987791122022428e-06, "loss": 0.8123, "step": 10737 }, { "epoch": 0.6412277558820016, "grad_norm": 4.364511489868164, "learning_rate": 3.987127596045385e-06, "loss": 0.839, "step": 10738 }, { "epoch": 0.6412874716350173, "grad_norm": 5.281250953674316, "learning_rate": 3.986464070068343e-06, "loss": 0.8105, "step": 10739 }, { "epoch": 0.641347187388033, "grad_norm": 2.5839786529541016, "learning_rate": 3.985800544091302e-06, "loss": 0.8623, "step": 10740 }, { "epoch": 0.6414069031410486, "grad_norm": 2.7596397399902344, "learning_rate": 3.985137018114259e-06, "loss": 0.8402, "step": 10741 }, { "epoch": 0.6414666188940642, "grad_norm": 1.7261425256729126, "learning_rate": 3.984473492137217e-06, "loss": 0.8194, "step": 10742 }, { "epoch": 0.6415263346470799, "grad_norm": 2.4088551998138428, "learning_rate": 3.9838099661601754e-06, "loss": 0.7978, "step": 10743 }, { "epoch": 0.6415860504000955, "grad_norm": 1.9932132959365845, "learning_rate": 3.9831464401831335e-06, "loss": 0.8489, "step": 10744 }, { "epoch": 0.6416457661531112, "grad_norm": 2.229267120361328, "learning_rate": 3.982482914206092e-06, "loss": 0.8297, "step": 10745 }, { "epoch": 0.6417054819061269, "grad_norm": 2.101619243621826, "learning_rate": 3.98181938822905e-06, "loss": 0.8286, "step": 10746 }, { "epoch": 0.6417651976591425, "grad_norm": 3.279744863510132, "learning_rate": 3.981155862252008e-06, "loss": 0.8246, "step": 10747 }, { "epoch": 0.6418249134121581, "grad_norm": 2.855466365814209, "learning_rate": 3.980492336274965e-06, "loss": 0.8074, "step": 10748 }, { "epoch": 0.6418846291651737, "grad_norm": 1.8750948905944824, "learning_rate": 3.979828810297923e-06, "loss": 0.8389, "step": 10749 }, { "epoch": 0.6419443449181894, "grad_norm": 2.4356706142425537, "learning_rate": 3.979165284320881e-06, "loss": 0.8509, "step": 10750 }, { "epoch": 0.6420040606712051, "grad_norm": 2.2426633834838867, "learning_rate": 3.978501758343839e-06, "loss": 0.8164, "step": 10751 }, { "epoch": 0.6420637764242207, "grad_norm": 1.5572798252105713, "learning_rate": 3.9778382323667975e-06, "loss": 0.8236, "step": 10752 }, { "epoch": 0.6421234921772364, "grad_norm": 3.263425588607788, "learning_rate": 3.977174706389756e-06, "loss": 0.8477, "step": 10753 }, { "epoch": 0.642183207930252, "grad_norm": 2.2194299697875977, "learning_rate": 3.976511180412713e-06, "loss": 0.8262, "step": 10754 }, { "epoch": 0.6422429236832676, "grad_norm": 1.783465027809143, "learning_rate": 3.975847654435672e-06, "loss": 0.8165, "step": 10755 }, { "epoch": 0.6423026394362833, "grad_norm": 1.953360915184021, "learning_rate": 3.97518412845863e-06, "loss": 0.8529, "step": 10756 }, { "epoch": 0.642362355189299, "grad_norm": 1.8131383657455444, "learning_rate": 3.974520602481587e-06, "loss": 0.8651, "step": 10757 }, { "epoch": 0.6424220709423146, "grad_norm": 2.3264989852905273, "learning_rate": 3.973857076504545e-06, "loss": 0.8044, "step": 10758 }, { "epoch": 0.6424817866953302, "grad_norm": 2.2866930961608887, "learning_rate": 3.973193550527503e-06, "loss": 0.8379, "step": 10759 }, { "epoch": 0.6425415024483458, "grad_norm": 2.0995399951934814, "learning_rate": 3.9725300245504615e-06, "loss": 0.8756, "step": 10760 }, { "epoch": 0.6426012182013615, "grad_norm": 2.664485454559326, "learning_rate": 3.9718664985734196e-06, "loss": 0.8398, "step": 10761 }, { "epoch": 0.6426609339543772, "grad_norm": 1.8747459650039673, "learning_rate": 3.971202972596378e-06, "loss": 0.8717, "step": 10762 }, { "epoch": 0.6427206497073928, "grad_norm": 2.2154388427734375, "learning_rate": 3.970539446619335e-06, "loss": 0.8413, "step": 10763 }, { "epoch": 0.6427803654604085, "grad_norm": 2.619929552078247, "learning_rate": 3.969875920642293e-06, "loss": 0.876, "step": 10764 }, { "epoch": 0.6428400812134241, "grad_norm": 3.737070083618164, "learning_rate": 3.969212394665252e-06, "loss": 0.8727, "step": 10765 }, { "epoch": 0.6428997969664397, "grad_norm": 1.8556945323944092, "learning_rate": 3.968548868688209e-06, "loss": 0.8551, "step": 10766 }, { "epoch": 0.6429595127194554, "grad_norm": 3.022113084793091, "learning_rate": 3.967885342711167e-06, "loss": 0.8384, "step": 10767 }, { "epoch": 0.6430192284724711, "grad_norm": 3.762460708618164, "learning_rate": 3.967221816734125e-06, "loss": 0.8309, "step": 10768 }, { "epoch": 0.6430789442254867, "grad_norm": 2.010416030883789, "learning_rate": 3.9665582907570835e-06, "loss": 0.8154, "step": 10769 }, { "epoch": 0.6431386599785023, "grad_norm": 2.476607084274292, "learning_rate": 3.965894764780042e-06, "loss": 0.856, "step": 10770 }, { "epoch": 0.643198375731518, "grad_norm": 2.074626922607422, "learning_rate": 3.965231238803e-06, "loss": 0.8346, "step": 10771 }, { "epoch": 0.6432580914845336, "grad_norm": 2.2924327850341797, "learning_rate": 3.964567712825958e-06, "loss": 0.8343, "step": 10772 }, { "epoch": 0.6433178072375493, "grad_norm": 3.470289468765259, "learning_rate": 3.963904186848915e-06, "loss": 0.8371, "step": 10773 }, { "epoch": 0.6433775229905649, "grad_norm": 2.63261079788208, "learning_rate": 3.963240660871873e-06, "loss": 0.8616, "step": 10774 }, { "epoch": 0.6434372387435806, "grad_norm": 4.3086771965026855, "learning_rate": 3.962577134894831e-06, "loss": 0.8113, "step": 10775 }, { "epoch": 0.6434969544965962, "grad_norm": 2.0625131130218506, "learning_rate": 3.961913608917789e-06, "loss": 0.8373, "step": 10776 }, { "epoch": 0.6435566702496118, "grad_norm": 2.343742847442627, "learning_rate": 3.9612500829407475e-06, "loss": 0.8839, "step": 10777 }, { "epoch": 0.6436163860026275, "grad_norm": 1.9323614835739136, "learning_rate": 3.9605865569637056e-06, "loss": 0.8477, "step": 10778 }, { "epoch": 0.6436761017556432, "grad_norm": 3.022472381591797, "learning_rate": 3.959923030986663e-06, "loss": 0.8353, "step": 10779 }, { "epoch": 0.6437358175086588, "grad_norm": 2.5859148502349854, "learning_rate": 3.959259505009622e-06, "loss": 0.859, "step": 10780 }, { "epoch": 0.6437955332616744, "grad_norm": 2.0919580459594727, "learning_rate": 3.95859597903258e-06, "loss": 0.8157, "step": 10781 }, { "epoch": 0.64385524901469, "grad_norm": 2.9735400676727295, "learning_rate": 3.957932453055537e-06, "loss": 0.8395, "step": 10782 }, { "epoch": 0.6439149647677057, "grad_norm": 2.1267342567443848, "learning_rate": 3.957268927078495e-06, "loss": 0.8185, "step": 10783 }, { "epoch": 0.6439746805207214, "grad_norm": 1.9137097597122192, "learning_rate": 3.956605401101453e-06, "loss": 0.8489, "step": 10784 }, { "epoch": 0.644034396273737, "grad_norm": 2.1726322174072266, "learning_rate": 3.955941875124411e-06, "loss": 0.8174, "step": 10785 }, { "epoch": 0.6440941120267527, "grad_norm": 3.992891550064087, "learning_rate": 3.9552783491473695e-06, "loss": 0.8257, "step": 10786 }, { "epoch": 0.6441538277797683, "grad_norm": 1.7488148212432861, "learning_rate": 3.954614823170328e-06, "loss": 0.8166, "step": 10787 }, { "epoch": 0.6442135435327839, "grad_norm": 2.497321367263794, "learning_rate": 3.953951297193285e-06, "loss": 0.8161, "step": 10788 }, { "epoch": 0.6442732592857996, "grad_norm": 1.7178782224655151, "learning_rate": 3.953287771216243e-06, "loss": 0.8417, "step": 10789 }, { "epoch": 0.6443329750388153, "grad_norm": 2.2414627075195312, "learning_rate": 3.952624245239202e-06, "loss": 0.8475, "step": 10790 }, { "epoch": 0.6443926907918309, "grad_norm": 1.7687205076217651, "learning_rate": 3.951960719262159e-06, "loss": 0.8239, "step": 10791 }, { "epoch": 0.6444524065448465, "grad_norm": 1.9619616270065308, "learning_rate": 3.951297193285117e-06, "loss": 0.7986, "step": 10792 }, { "epoch": 0.6445121222978621, "grad_norm": 2.6058003902435303, "learning_rate": 3.950633667308075e-06, "loss": 0.8677, "step": 10793 }, { "epoch": 0.6445718380508778, "grad_norm": 3.9329094886779785, "learning_rate": 3.9499701413310335e-06, "loss": 0.8101, "step": 10794 }, { "epoch": 0.6446315538038935, "grad_norm": 5.044623851776123, "learning_rate": 3.949306615353992e-06, "loss": 0.8761, "step": 10795 }, { "epoch": 0.6446912695569091, "grad_norm": 2.5469181537628174, "learning_rate": 3.94864308937695e-06, "loss": 0.868, "step": 10796 }, { "epoch": 0.6447509853099248, "grad_norm": 2.259004592895508, "learning_rate": 3.947979563399908e-06, "loss": 0.8143, "step": 10797 }, { "epoch": 0.6448107010629404, "grad_norm": 2.073267698287964, "learning_rate": 3.947316037422865e-06, "loss": 0.8533, "step": 10798 }, { "epoch": 0.644870416815956, "grad_norm": 1.884318232536316, "learning_rate": 3.946652511445823e-06, "loss": 0.8218, "step": 10799 }, { "epoch": 0.6449301325689717, "grad_norm": 2.3767495155334473, "learning_rate": 3.945988985468781e-06, "loss": 0.8414, "step": 10800 }, { "epoch": 0.6449301325689717, "eval_text_loss": 0.9021971225738525, "eval_text_runtime": 15.1904, "eval_text_samples_per_second": 263.324, "eval_text_steps_per_second": 0.527, "step": 10800 }, { "epoch": 0.6449301325689717, "eval_image_loss": 0.6108888387680054, "eval_image_runtime": 5.0914, "eval_image_samples_per_second": 785.643, "eval_image_steps_per_second": 1.571, "step": 10800 }, { "epoch": 0.6449301325689717, "eval_video_loss": 1.0453180074691772, "eval_video_runtime": 76.2468, "eval_video_samples_per_second": 52.461, "eval_video_steps_per_second": 0.105, "step": 10800 }, { "epoch": 0.6449898483219874, "grad_norm": 1.9299025535583496, "learning_rate": 3.945325459491739e-06, "loss": 0.8451, "step": 10801 }, { "epoch": 0.645049564075003, "grad_norm": 1.95749831199646, "learning_rate": 3.9446619335146974e-06, "loss": 0.8687, "step": 10802 }, { "epoch": 0.6451092798280186, "grad_norm": 1.7395946979522705, "learning_rate": 3.9439984075376555e-06, "loss": 0.8302, "step": 10803 }, { "epoch": 0.6451689955810342, "grad_norm": 2.0707528591156006, "learning_rate": 3.943334881560613e-06, "loss": 0.823, "step": 10804 }, { "epoch": 0.6452287113340499, "grad_norm": 2.3434925079345703, "learning_rate": 3.942671355583572e-06, "loss": 0.8328, "step": 10805 }, { "epoch": 0.6452884270870656, "grad_norm": 2.3354663848876953, "learning_rate": 3.94200782960653e-06, "loss": 0.7983, "step": 10806 }, { "epoch": 0.6453481428400812, "grad_norm": 1.6830089092254639, "learning_rate": 3.941344303629487e-06, "loss": 0.8179, "step": 10807 }, { "epoch": 0.6454078585930969, "grad_norm": 1.8792120218276978, "learning_rate": 3.940680777652445e-06, "loss": 0.8332, "step": 10808 }, { "epoch": 0.6454675743461125, "grad_norm": 3.55832839012146, "learning_rate": 3.940017251675403e-06, "loss": 0.82, "step": 10809 }, { "epoch": 0.6455272900991281, "grad_norm": 2.203056812286377, "learning_rate": 3.939353725698361e-06, "loss": 0.8523, "step": 10810 }, { "epoch": 0.6455870058521438, "grad_norm": 2.3045012950897217, "learning_rate": 3.9386901997213195e-06, "loss": 0.8447, "step": 10811 }, { "epoch": 0.6456467216051595, "grad_norm": 7.092294692993164, "learning_rate": 3.938026673744278e-06, "loss": 0.8299, "step": 10812 }, { "epoch": 0.6457064373581751, "grad_norm": 1.789546012878418, "learning_rate": 3.937363147767236e-06, "loss": 0.8478, "step": 10813 }, { "epoch": 0.6457661531111907, "grad_norm": 2.343201160430908, "learning_rate": 3.936699621790193e-06, "loss": 0.8696, "step": 10814 }, { "epoch": 0.6458258688642063, "grad_norm": 3.8309035301208496, "learning_rate": 3.936036095813152e-06, "loss": 0.8292, "step": 10815 }, { "epoch": 0.645885584617222, "grad_norm": 1.6845602989196777, "learning_rate": 3.935372569836109e-06, "loss": 0.8199, "step": 10816 }, { "epoch": 0.6459453003702377, "grad_norm": 2.180727243423462, "learning_rate": 3.934709043859067e-06, "loss": 0.8309, "step": 10817 }, { "epoch": 0.6460050161232533, "grad_norm": 2.3727405071258545, "learning_rate": 3.934045517882025e-06, "loss": 0.8233, "step": 10818 }, { "epoch": 0.646064731876269, "grad_norm": 1.7655079364776611, "learning_rate": 3.9333819919049835e-06, "loss": 0.8627, "step": 10819 }, { "epoch": 0.6461244476292846, "grad_norm": 2.7652666568756104, "learning_rate": 3.9327184659279416e-06, "loss": 0.8512, "step": 10820 }, { "epoch": 0.6461841633823002, "grad_norm": 2.358515977859497, "learning_rate": 3.9320549399509e-06, "loss": 0.8249, "step": 10821 }, { "epoch": 0.6462438791353159, "grad_norm": 1.6904774904251099, "learning_rate": 3.931391413973858e-06, "loss": 0.8145, "step": 10822 }, { "epoch": 0.6463035948883316, "grad_norm": 2.272035837173462, "learning_rate": 3.930727887996815e-06, "loss": 0.793, "step": 10823 }, { "epoch": 0.6463633106413472, "grad_norm": 2.1025445461273193, "learning_rate": 3.930064362019773e-06, "loss": 0.8431, "step": 10824 }, { "epoch": 0.6464230263943629, "grad_norm": 3.058096408843994, "learning_rate": 3.929400836042731e-06, "loss": 0.8344, "step": 10825 }, { "epoch": 0.6464827421473784, "grad_norm": 2.090667724609375, "learning_rate": 3.928737310065689e-06, "loss": 0.851, "step": 10826 }, { "epoch": 0.6465424579003941, "grad_norm": 2.9318902492523193, "learning_rate": 3.928073784088647e-06, "loss": 0.8828, "step": 10827 }, { "epoch": 0.6466021736534098, "grad_norm": 2.873162269592285, "learning_rate": 3.9274102581116055e-06, "loss": 0.8388, "step": 10828 }, { "epoch": 0.6466618894064254, "grad_norm": 1.8396695852279663, "learning_rate": 3.926746732134563e-06, "loss": 0.8223, "step": 10829 }, { "epoch": 0.6467216051594411, "grad_norm": 1.8441381454467773, "learning_rate": 3.926083206157522e-06, "loss": 0.8349, "step": 10830 }, { "epoch": 0.6467813209124567, "grad_norm": 2.1882688999176025, "learning_rate": 3.92541968018048e-06, "loss": 0.8323, "step": 10831 }, { "epoch": 0.6468410366654723, "grad_norm": 2.8102023601531982, "learning_rate": 3.924756154203437e-06, "loss": 0.8483, "step": 10832 }, { "epoch": 0.646900752418488, "grad_norm": 2.081200122833252, "learning_rate": 3.924092628226395e-06, "loss": 0.8413, "step": 10833 }, { "epoch": 0.6469604681715037, "grad_norm": 1.8324692249298096, "learning_rate": 3.923429102249353e-06, "loss": 0.8471, "step": 10834 }, { "epoch": 0.6470201839245193, "grad_norm": 1.7679561376571655, "learning_rate": 3.922765576272311e-06, "loss": 0.8359, "step": 10835 }, { "epoch": 0.647079899677535, "grad_norm": 1.9572206735610962, "learning_rate": 3.9221020502952695e-06, "loss": 0.8653, "step": 10836 }, { "epoch": 0.6471396154305505, "grad_norm": 1.8806298971176147, "learning_rate": 3.9214385243182276e-06, "loss": 0.8679, "step": 10837 }, { "epoch": 0.6471993311835662, "grad_norm": 1.7458529472351074, "learning_rate": 3.920774998341186e-06, "loss": 0.8729, "step": 10838 }, { "epoch": 0.6472590469365819, "grad_norm": 2.0300452709198, "learning_rate": 3.920111472364143e-06, "loss": 0.8302, "step": 10839 }, { "epoch": 0.6473187626895975, "grad_norm": 2.0137553215026855, "learning_rate": 3.919447946387102e-06, "loss": 0.8577, "step": 10840 }, { "epoch": 0.6473784784426132, "grad_norm": 2.3862216472625732, "learning_rate": 3.918784420410059e-06, "loss": 0.8745, "step": 10841 }, { "epoch": 0.6474381941956288, "grad_norm": 2.23363995552063, "learning_rate": 3.918120894433017e-06, "loss": 0.8483, "step": 10842 }, { "epoch": 0.6474979099486444, "grad_norm": 2.0901525020599365, "learning_rate": 3.917457368455975e-06, "loss": 0.8663, "step": 10843 }, { "epoch": 0.6475576257016601, "grad_norm": 2.022092580795288, "learning_rate": 3.916793842478933e-06, "loss": 0.8277, "step": 10844 }, { "epoch": 0.6476173414546758, "grad_norm": 1.9569339752197266, "learning_rate": 3.9161303165018915e-06, "loss": 0.8637, "step": 10845 }, { "epoch": 0.6476770572076914, "grad_norm": 3.939504623413086, "learning_rate": 3.91546679052485e-06, "loss": 0.8002, "step": 10846 }, { "epoch": 0.6477367729607071, "grad_norm": 2.0997400283813477, "learning_rate": 3.914803264547808e-06, "loss": 0.8453, "step": 10847 }, { "epoch": 0.6477964887137226, "grad_norm": 2.392512083053589, "learning_rate": 3.914139738570765e-06, "loss": 0.8501, "step": 10848 }, { "epoch": 0.6478562044667383, "grad_norm": 1.9026918411254883, "learning_rate": 3.913476212593723e-06, "loss": 0.8257, "step": 10849 }, { "epoch": 0.647915920219754, "grad_norm": 3.8795933723449707, "learning_rate": 3.912812686616681e-06, "loss": 0.8286, "step": 10850 }, { "epoch": 0.6479756359727696, "grad_norm": 2.1270506381988525, "learning_rate": 3.912149160639639e-06, "loss": 0.8204, "step": 10851 }, { "epoch": 0.6480353517257853, "grad_norm": 2.0429418087005615, "learning_rate": 3.911485634662597e-06, "loss": 0.8401, "step": 10852 }, { "epoch": 0.6480950674788009, "grad_norm": 1.6079713106155396, "learning_rate": 3.9108221086855555e-06, "loss": 0.8265, "step": 10853 }, { "epoch": 0.6481547832318165, "grad_norm": 1.547249436378479, "learning_rate": 3.910158582708513e-06, "loss": 0.8428, "step": 10854 }, { "epoch": 0.6482144989848322, "grad_norm": 1.9880123138427734, "learning_rate": 3.909495056731472e-06, "loss": 0.8389, "step": 10855 }, { "epoch": 0.6482742147378479, "grad_norm": 1.5171701908111572, "learning_rate": 3.90883153075443e-06, "loss": 0.8366, "step": 10856 }, { "epoch": 0.6483339304908635, "grad_norm": 2.508031129837036, "learning_rate": 3.908168004777387e-06, "loss": 0.8404, "step": 10857 }, { "epoch": 0.6483936462438792, "grad_norm": 2.168226718902588, "learning_rate": 3.907504478800345e-06, "loss": 0.8097, "step": 10858 }, { "epoch": 0.6484533619968947, "grad_norm": 2.336869716644287, "learning_rate": 3.906840952823303e-06, "loss": 0.8327, "step": 10859 }, { "epoch": 0.6485130777499104, "grad_norm": 1.7521657943725586, "learning_rate": 3.906177426846261e-06, "loss": 0.8433, "step": 10860 }, { "epoch": 0.6485727935029261, "grad_norm": 2.214735984802246, "learning_rate": 3.9055139008692194e-06, "loss": 0.8319, "step": 10861 }, { "epoch": 0.6486325092559417, "grad_norm": 2.464240789413452, "learning_rate": 3.9048503748921775e-06, "loss": 0.8919, "step": 10862 }, { "epoch": 0.6486922250089574, "grad_norm": 2.0410947799682617, "learning_rate": 3.904186848915136e-06, "loss": 0.8365, "step": 10863 }, { "epoch": 0.648751940761973, "grad_norm": 2.190657615661621, "learning_rate": 3.903523322938093e-06, "loss": 0.8497, "step": 10864 }, { "epoch": 0.6488116565149886, "grad_norm": 1.838018774986267, "learning_rate": 3.902859796961052e-06, "loss": 0.8121, "step": 10865 }, { "epoch": 0.6488713722680043, "grad_norm": 1.8318711519241333, "learning_rate": 3.902196270984009e-06, "loss": 0.8136, "step": 10866 }, { "epoch": 0.64893108802102, "grad_norm": 2.5646815299987793, "learning_rate": 3.901532745006967e-06, "loss": 0.8228, "step": 10867 }, { "epoch": 0.6489908037740356, "grad_norm": 2.257636308670044, "learning_rate": 3.900869219029925e-06, "loss": 0.8214, "step": 10868 }, { "epoch": 0.6490505195270513, "grad_norm": 7.2919816970825195, "learning_rate": 3.900205693052883e-06, "loss": 0.8087, "step": 10869 }, { "epoch": 0.6491102352800668, "grad_norm": 2.3603758811950684, "learning_rate": 3.8995421670758415e-06, "loss": 0.8456, "step": 10870 }, { "epoch": 0.6491699510330825, "grad_norm": 2.1489782333374023, "learning_rate": 3.8988786410988e-06, "loss": 0.8391, "step": 10871 }, { "epoch": 0.6492296667860982, "grad_norm": 2.3832430839538574, "learning_rate": 3.898215115121758e-06, "loss": 0.8784, "step": 10872 }, { "epoch": 0.6492893825391138, "grad_norm": 2.1265177726745605, "learning_rate": 3.897551589144715e-06, "loss": 0.8712, "step": 10873 }, { "epoch": 0.6493490982921295, "grad_norm": 2.3180735111236572, "learning_rate": 3.896888063167673e-06, "loss": 0.8772, "step": 10874 }, { "epoch": 0.649408814045145, "grad_norm": 2.2404427528381348, "learning_rate": 3.896224537190631e-06, "loss": 0.8373, "step": 10875 }, { "epoch": 0.6494685297981607, "grad_norm": 1.9316900968551636, "learning_rate": 3.895561011213589e-06, "loss": 0.8793, "step": 10876 }, { "epoch": 0.6495282455511764, "grad_norm": 1.8974863290786743, "learning_rate": 3.894897485236547e-06, "loss": 0.8375, "step": 10877 }, { "epoch": 0.649587961304192, "grad_norm": 2.2620956897735596, "learning_rate": 3.8942339592595054e-06, "loss": 0.857, "step": 10878 }, { "epoch": 0.6496476770572077, "grad_norm": 4.217000961303711, "learning_rate": 3.893570433282463e-06, "loss": 0.8149, "step": 10879 }, { "epoch": 0.6497073928102234, "grad_norm": 2.2918906211853027, "learning_rate": 3.892906907305422e-06, "loss": 0.8296, "step": 10880 }, { "epoch": 0.6497671085632389, "grad_norm": 5.07397985458374, "learning_rate": 3.89224338132838e-06, "loss": 0.8532, "step": 10881 }, { "epoch": 0.6498268243162546, "grad_norm": 1.8998688459396362, "learning_rate": 3.891579855351337e-06, "loss": 0.8501, "step": 10882 }, { "epoch": 0.6498865400692703, "grad_norm": 1.90809965133667, "learning_rate": 3.890916329374295e-06, "loss": 0.8284, "step": 10883 }, { "epoch": 0.6499462558222859, "grad_norm": 3.875309705734253, "learning_rate": 3.890252803397253e-06, "loss": 0.783, "step": 10884 }, { "epoch": 0.6500059715753016, "grad_norm": 3.1311120986938477, "learning_rate": 3.889589277420211e-06, "loss": 0.864, "step": 10885 }, { "epoch": 0.6500656873283173, "grad_norm": 2.019725799560547, "learning_rate": 3.888925751443169e-06, "loss": 0.8287, "step": 10886 }, { "epoch": 0.6501254030813328, "grad_norm": 3.7261998653411865, "learning_rate": 3.8882622254661275e-06, "loss": 0.8464, "step": 10887 }, { "epoch": 0.6501851188343485, "grad_norm": 2.25516414642334, "learning_rate": 3.887598699489086e-06, "loss": 0.8574, "step": 10888 }, { "epoch": 0.6502448345873642, "grad_norm": 2.457650661468506, "learning_rate": 3.886935173512043e-06, "loss": 0.8198, "step": 10889 }, { "epoch": 0.6503045503403798, "grad_norm": 1.6017924547195435, "learning_rate": 3.886271647535002e-06, "loss": 0.8499, "step": 10890 }, { "epoch": 0.6503642660933955, "grad_norm": 1.643915057182312, "learning_rate": 3.885608121557959e-06, "loss": 0.8293, "step": 10891 }, { "epoch": 0.650423981846411, "grad_norm": 1.8385090827941895, "learning_rate": 3.884944595580917e-06, "loss": 0.8186, "step": 10892 }, { "epoch": 0.6504836975994267, "grad_norm": 1.9548579454421997, "learning_rate": 3.884281069603875e-06, "loss": 0.8326, "step": 10893 }, { "epoch": 0.6505434133524424, "grad_norm": 1.883702039718628, "learning_rate": 3.883617543626833e-06, "loss": 0.8811, "step": 10894 }, { "epoch": 0.650603129105458, "grad_norm": 2.594200372695923, "learning_rate": 3.8829540176497915e-06, "loss": 0.8425, "step": 10895 }, { "epoch": 0.6506628448584737, "grad_norm": 1.9747354984283447, "learning_rate": 3.8822904916727496e-06, "loss": 0.8393, "step": 10896 }, { "epoch": 0.6507225606114894, "grad_norm": 2.134714126586914, "learning_rate": 3.881626965695708e-06, "loss": 0.8361, "step": 10897 }, { "epoch": 0.6507822763645049, "grad_norm": 2.0496232509613037, "learning_rate": 3.880963439718665e-06, "loss": 0.8316, "step": 10898 }, { "epoch": 0.6508419921175206, "grad_norm": 1.9562129974365234, "learning_rate": 3.880299913741623e-06, "loss": 0.8096, "step": 10899 }, { "epoch": 0.6509017078705363, "grad_norm": 2.9947268962860107, "learning_rate": 3.879636387764581e-06, "loss": 0.832, "step": 10900 }, { "epoch": 0.6509614236235519, "grad_norm": 2.7683334350585938, "learning_rate": 3.878972861787539e-06, "loss": 0.8384, "step": 10901 }, { "epoch": 0.6510211393765676, "grad_norm": 1.775656819343567, "learning_rate": 3.878309335810497e-06, "loss": 0.8284, "step": 10902 }, { "epoch": 0.6510808551295831, "grad_norm": 3.7742693424224854, "learning_rate": 3.877645809833455e-06, "loss": 0.8278, "step": 10903 }, { "epoch": 0.6511405708825988, "grad_norm": 1.5871485471725464, "learning_rate": 3.876982283856413e-06, "loss": 0.8499, "step": 10904 }, { "epoch": 0.6512002866356145, "grad_norm": 2.8821675777435303, "learning_rate": 3.876318757879372e-06, "loss": 0.8231, "step": 10905 }, { "epoch": 0.6512600023886301, "grad_norm": 2.0910866260528564, "learning_rate": 3.87565523190233e-06, "loss": 0.828, "step": 10906 }, { "epoch": 0.6513197181416458, "grad_norm": 17.537944793701172, "learning_rate": 3.874991705925287e-06, "loss": 0.8541, "step": 10907 }, { "epoch": 0.6513794338946615, "grad_norm": 1.790144681930542, "learning_rate": 3.874328179948245e-06, "loss": 0.812, "step": 10908 }, { "epoch": 0.651439149647677, "grad_norm": 1.9876843690872192, "learning_rate": 3.873664653971203e-06, "loss": 0.8372, "step": 10909 }, { "epoch": 0.6514988654006927, "grad_norm": 2.5482804775238037, "learning_rate": 3.873001127994161e-06, "loss": 0.8186, "step": 10910 }, { "epoch": 0.6515585811537083, "grad_norm": 2.3805575370788574, "learning_rate": 3.872337602017119e-06, "loss": 0.8283, "step": 10911 }, { "epoch": 0.651618296906724, "grad_norm": 2.197115659713745, "learning_rate": 3.8716740760400775e-06, "loss": 0.8513, "step": 10912 }, { "epoch": 0.6516780126597397, "grad_norm": 2.0625417232513428, "learning_rate": 3.871010550063036e-06, "loss": 0.8364, "step": 10913 }, { "epoch": 0.6517377284127552, "grad_norm": 2.8953723907470703, "learning_rate": 3.870347024085993e-06, "loss": 0.8567, "step": 10914 }, { "epoch": 0.6517974441657709, "grad_norm": 1.6865382194519043, "learning_rate": 3.869683498108952e-06, "loss": 0.8181, "step": 10915 }, { "epoch": 0.6518571599187866, "grad_norm": 2.2898566722869873, "learning_rate": 3.869019972131909e-06, "loss": 0.8605, "step": 10916 }, { "epoch": 0.6519168756718022, "grad_norm": 2.3237507343292236, "learning_rate": 3.868356446154867e-06, "loss": 0.8567, "step": 10917 }, { "epoch": 0.6519765914248179, "grad_norm": 1.9656755924224854, "learning_rate": 3.867692920177825e-06, "loss": 0.8395, "step": 10918 }, { "epoch": 0.6520363071778336, "grad_norm": 1.61556875705719, "learning_rate": 3.867029394200783e-06, "loss": 0.8047, "step": 10919 }, { "epoch": 0.6520960229308491, "grad_norm": 1.8627991676330566, "learning_rate": 3.8663658682237414e-06, "loss": 0.8082, "step": 10920 }, { "epoch": 0.6521557386838648, "grad_norm": 1.6498429775238037, "learning_rate": 3.8657023422466995e-06, "loss": 0.8385, "step": 10921 }, { "epoch": 0.6522154544368804, "grad_norm": 2.0121617317199707, "learning_rate": 3.865038816269658e-06, "loss": 0.8302, "step": 10922 }, { "epoch": 0.6522751701898961, "grad_norm": 2.717346668243408, "learning_rate": 3.864375290292615e-06, "loss": 0.8815, "step": 10923 }, { "epoch": 0.6523348859429118, "grad_norm": 2.3995401859283447, "learning_rate": 3.863711764315573e-06, "loss": 0.8552, "step": 10924 }, { "epoch": 0.6523946016959273, "grad_norm": 3.171191930770874, "learning_rate": 3.863048238338531e-06, "loss": 0.861, "step": 10925 }, { "epoch": 0.652454317448943, "grad_norm": 2.2309234142303467, "learning_rate": 3.862384712361489e-06, "loss": 0.8253, "step": 10926 }, { "epoch": 0.6525140332019587, "grad_norm": 1.7999800443649292, "learning_rate": 3.861721186384447e-06, "loss": 0.8512, "step": 10927 }, { "epoch": 0.6525737489549743, "grad_norm": 1.7517516613006592, "learning_rate": 3.861057660407405e-06, "loss": 0.854, "step": 10928 }, { "epoch": 0.65263346470799, "grad_norm": 1.600000023841858, "learning_rate": 3.860394134430363e-06, "loss": 0.8486, "step": 10929 }, { "epoch": 0.6526931804610057, "grad_norm": 1.7281434535980225, "learning_rate": 3.859730608453322e-06, "loss": 0.8457, "step": 10930 }, { "epoch": 0.6527528962140212, "grad_norm": 2.6830310821533203, "learning_rate": 3.85906708247628e-06, "loss": 0.8353, "step": 10931 }, { "epoch": 0.6528126119670369, "grad_norm": 2.343904495239258, "learning_rate": 3.858403556499237e-06, "loss": 0.8368, "step": 10932 }, { "epoch": 0.6528723277200525, "grad_norm": 2.3032021522521973, "learning_rate": 3.857740030522195e-06, "loss": 0.836, "step": 10933 }, { "epoch": 0.6529320434730682, "grad_norm": 1.7262709140777588, "learning_rate": 3.857076504545153e-06, "loss": 0.8184, "step": 10934 }, { "epoch": 0.6529917592260839, "grad_norm": 2.034794330596924, "learning_rate": 3.856412978568111e-06, "loss": 0.818, "step": 10935 }, { "epoch": 0.6530514749790994, "grad_norm": 2.1417109966278076, "learning_rate": 3.855749452591069e-06, "loss": 0.8438, "step": 10936 }, { "epoch": 0.6531111907321151, "grad_norm": 2.048734426498413, "learning_rate": 3.8550859266140274e-06, "loss": 0.8529, "step": 10937 }, { "epoch": 0.6531709064851308, "grad_norm": 1.9109874963760376, "learning_rate": 3.8544224006369855e-06, "loss": 0.8424, "step": 10938 }, { "epoch": 0.6532306222381464, "grad_norm": 2.6685454845428467, "learning_rate": 3.853758874659943e-06, "loss": 0.8259, "step": 10939 }, { "epoch": 0.6532903379911621, "grad_norm": 2.1508963108062744, "learning_rate": 3.853095348682902e-06, "loss": 0.8171, "step": 10940 }, { "epoch": 0.6533500537441778, "grad_norm": 1.8718417882919312, "learning_rate": 3.852431822705859e-06, "loss": 0.8154, "step": 10941 }, { "epoch": 0.6534097694971933, "grad_norm": 2.4560818672180176, "learning_rate": 3.851768296728817e-06, "loss": 0.873, "step": 10942 }, { "epoch": 0.653469485250209, "grad_norm": 2.157055139541626, "learning_rate": 3.851104770751775e-06, "loss": 0.7941, "step": 10943 }, { "epoch": 0.6535292010032246, "grad_norm": 2.084564208984375, "learning_rate": 3.850441244774733e-06, "loss": 0.8628, "step": 10944 }, { "epoch": 0.6535889167562403, "grad_norm": 2.8966708183288574, "learning_rate": 3.849777718797691e-06, "loss": 0.8773, "step": 10945 }, { "epoch": 0.653648632509256, "grad_norm": 2.080798625946045, "learning_rate": 3.8491141928206495e-06, "loss": 0.8523, "step": 10946 }, { "epoch": 0.6537083482622715, "grad_norm": 1.4878343343734741, "learning_rate": 3.848450666843608e-06, "loss": 0.8722, "step": 10947 }, { "epoch": 0.6537680640152872, "grad_norm": 2.4676060676574707, "learning_rate": 3.847787140866565e-06, "loss": 0.8696, "step": 10948 }, { "epoch": 0.6538277797683029, "grad_norm": 2.6716067790985107, "learning_rate": 3.847123614889523e-06, "loss": 0.8243, "step": 10949 }, { "epoch": 0.6538874955213185, "grad_norm": 1.648087978363037, "learning_rate": 3.846460088912481e-06, "loss": 0.8016, "step": 10950 }, { "epoch": 0.6539472112743342, "grad_norm": 3.205090284347534, "learning_rate": 3.845796562935439e-06, "loss": 0.801, "step": 10951 }, { "epoch": 0.6540069270273499, "grad_norm": 2.954918384552002, "learning_rate": 3.845133036958397e-06, "loss": 0.8379, "step": 10952 }, { "epoch": 0.6540666427803654, "grad_norm": 4.162130832672119, "learning_rate": 3.844469510981355e-06, "loss": 0.844, "step": 10953 }, { "epoch": 0.6541263585333811, "grad_norm": 1.960520625114441, "learning_rate": 3.843805985004313e-06, "loss": 0.8322, "step": 10954 }, { "epoch": 0.6541860742863967, "grad_norm": 2.860900402069092, "learning_rate": 3.8431424590272716e-06, "loss": 0.8205, "step": 10955 }, { "epoch": 0.6542457900394124, "grad_norm": 9.838066101074219, "learning_rate": 3.84247893305023e-06, "loss": 0.8546, "step": 10956 }, { "epoch": 0.6543055057924281, "grad_norm": 1.893032431602478, "learning_rate": 3.841815407073187e-06, "loss": 0.8324, "step": 10957 }, { "epoch": 0.6543652215454437, "grad_norm": 1.9189382791519165, "learning_rate": 3.841151881096145e-06, "loss": 0.8226, "step": 10958 }, { "epoch": 0.6544249372984593, "grad_norm": 2.7319846153259277, "learning_rate": 3.840488355119103e-06, "loss": 0.8552, "step": 10959 }, { "epoch": 0.654484653051475, "grad_norm": 2.4377880096435547, "learning_rate": 3.839824829142061e-06, "loss": 0.8248, "step": 10960 }, { "epoch": 0.6545443688044906, "grad_norm": 4.2913126945495605, "learning_rate": 3.839161303165019e-06, "loss": 0.8493, "step": 10961 }, { "epoch": 0.6546040845575063, "grad_norm": 1.807714819908142, "learning_rate": 3.838497777187977e-06, "loss": 0.8376, "step": 10962 }, { "epoch": 0.654663800310522, "grad_norm": 2.0181806087493896, "learning_rate": 3.8378342512109355e-06, "loss": 0.8357, "step": 10963 }, { "epoch": 0.6547235160635375, "grad_norm": 1.7730281352996826, "learning_rate": 3.837170725233893e-06, "loss": 0.8387, "step": 10964 }, { "epoch": 0.6547832318165532, "grad_norm": 1.7560558319091797, "learning_rate": 3.836507199256852e-06, "loss": 0.8321, "step": 10965 }, { "epoch": 0.6548429475695688, "grad_norm": 1.6080878973007202, "learning_rate": 3.835843673279809e-06, "loss": 0.8419, "step": 10966 }, { "epoch": 0.6549026633225845, "grad_norm": 3.7049200534820557, "learning_rate": 3.835180147302767e-06, "loss": 0.8739, "step": 10967 }, { "epoch": 0.6549623790756002, "grad_norm": 2.798978328704834, "learning_rate": 3.834516621325725e-06, "loss": 0.85, "step": 10968 }, { "epoch": 0.6550220948286158, "grad_norm": 3.0323450565338135, "learning_rate": 3.833853095348683e-06, "loss": 0.8371, "step": 10969 }, { "epoch": 0.6550818105816314, "grad_norm": 1.9008182287216187, "learning_rate": 3.833189569371641e-06, "loss": 0.81, "step": 10970 }, { "epoch": 0.6551415263346471, "grad_norm": 1.743451476097107, "learning_rate": 3.8325260433945995e-06, "loss": 0.8681, "step": 10971 }, { "epoch": 0.6552012420876627, "grad_norm": 2.7629339694976807, "learning_rate": 3.831862517417558e-06, "loss": 0.854, "step": 10972 }, { "epoch": 0.6552609578406784, "grad_norm": 2.0250868797302246, "learning_rate": 3.831198991440515e-06, "loss": 0.8295, "step": 10973 }, { "epoch": 0.6553206735936941, "grad_norm": 1.6252299547195435, "learning_rate": 3.830535465463473e-06, "loss": 0.8123, "step": 10974 }, { "epoch": 0.6553803893467096, "grad_norm": 2.657519578933716, "learning_rate": 3.829871939486431e-06, "loss": 0.8391, "step": 10975 }, { "epoch": 0.6554401050997253, "grad_norm": 1.95203697681427, "learning_rate": 3.829208413509389e-06, "loss": 0.7882, "step": 10976 }, { "epoch": 0.6554998208527409, "grad_norm": 2.6567485332489014, "learning_rate": 3.828544887532347e-06, "loss": 0.85, "step": 10977 }, { "epoch": 0.6555595366057566, "grad_norm": 2.60830020904541, "learning_rate": 3.827881361555305e-06, "loss": 0.8256, "step": 10978 }, { "epoch": 0.6556192523587723, "grad_norm": 2.6452760696411133, "learning_rate": 3.827217835578263e-06, "loss": 0.8443, "step": 10979 }, { "epoch": 0.6556789681117879, "grad_norm": 2.186129331588745, "learning_rate": 3.8265543096012215e-06, "loss": 0.8051, "step": 10980 }, { "epoch": 0.6557386838648035, "grad_norm": 1.8170194625854492, "learning_rate": 3.82589078362418e-06, "loss": 0.8444, "step": 10981 }, { "epoch": 0.6557983996178192, "grad_norm": 2.0765328407287598, "learning_rate": 3.825227257647137e-06, "loss": 0.8194, "step": 10982 }, { "epoch": 0.6558581153708348, "grad_norm": 2.69486665725708, "learning_rate": 3.824563731670095e-06, "loss": 0.796, "step": 10983 }, { "epoch": 0.6559178311238505, "grad_norm": 2.7068324089050293, "learning_rate": 3.823900205693053e-06, "loss": 0.8685, "step": 10984 }, { "epoch": 0.6559775468768662, "grad_norm": 2.5185539722442627, "learning_rate": 3.823236679716011e-06, "loss": 0.8089, "step": 10985 }, { "epoch": 0.6560372626298817, "grad_norm": 2.468172311782837, "learning_rate": 3.822573153738969e-06, "loss": 0.8529, "step": 10986 }, { "epoch": 0.6560969783828974, "grad_norm": 1.8499441146850586, "learning_rate": 3.821909627761927e-06, "loss": 0.8236, "step": 10987 }, { "epoch": 0.656156694135913, "grad_norm": 2.340712547302246, "learning_rate": 3.8212461017848855e-06, "loss": 0.8177, "step": 10988 }, { "epoch": 0.6562164098889287, "grad_norm": 1.947309136390686, "learning_rate": 3.820582575807843e-06, "loss": 0.8421, "step": 10989 }, { "epoch": 0.6562761256419444, "grad_norm": 2.3451690673828125, "learning_rate": 3.819919049830802e-06, "loss": 0.862, "step": 10990 }, { "epoch": 0.65633584139496, "grad_norm": 1.8279837369918823, "learning_rate": 3.819255523853759e-06, "loss": 0.8392, "step": 10991 }, { "epoch": 0.6563955571479756, "grad_norm": 2.4435372352600098, "learning_rate": 3.818591997876717e-06, "loss": 0.8598, "step": 10992 }, { "epoch": 0.6564552729009913, "grad_norm": 2.50795841217041, "learning_rate": 3.817928471899675e-06, "loss": 0.8429, "step": 10993 }, { "epoch": 0.6565149886540069, "grad_norm": 2.1656854152679443, "learning_rate": 3.817264945922633e-06, "loss": 0.8138, "step": 10994 }, { "epoch": 0.6565747044070226, "grad_norm": 4.97219181060791, "learning_rate": 3.816601419945591e-06, "loss": 0.8299, "step": 10995 }, { "epoch": 0.6566344201600383, "grad_norm": 1.8973854780197144, "learning_rate": 3.8159378939685494e-06, "loss": 0.8178, "step": 10996 }, { "epoch": 0.6566941359130538, "grad_norm": 1.8645645380020142, "learning_rate": 3.8152743679915075e-06, "loss": 0.855, "step": 10997 }, { "epoch": 0.6567538516660695, "grad_norm": 1.641287922859192, "learning_rate": 3.8146108420144652e-06, "loss": 0.8404, "step": 10998 }, { "epoch": 0.6568135674190851, "grad_norm": 2.5182077884674072, "learning_rate": 3.813947316037423e-06, "loss": 0.8271, "step": 10999 }, { "epoch": 0.6568732831721008, "grad_norm": 2.362985610961914, "learning_rate": 3.8132837900603814e-06, "loss": 0.8259, "step": 11000 }, { "epoch": 0.6568732831721008, "eval_text_loss": 0.9013711214065552, "eval_text_runtime": 15.221, "eval_text_samples_per_second": 262.794, "eval_text_steps_per_second": 0.526, "step": 11000 }, { "epoch": 0.6568732831721008, "eval_image_loss": 0.6101874113082886, "eval_image_runtime": 5.0435, "eval_image_samples_per_second": 793.107, "eval_image_steps_per_second": 1.586, "step": 11000 }, { "epoch": 0.6568732831721008, "eval_video_loss": 1.0451054573059082, "eval_video_runtime": 76.4839, "eval_video_samples_per_second": 52.299, "eval_video_steps_per_second": 0.105, "step": 11000 }, { "epoch": 0.6569329989251165, "grad_norm": 1.9382108449935913, "learning_rate": 3.812620264083339e-06, "loss": 0.8099, "step": 11001 }, { "epoch": 0.6569927146781321, "grad_norm": 1.9955929517745972, "learning_rate": 3.811956738106297e-06, "loss": 0.8416, "step": 11002 }, { "epoch": 0.6570524304311477, "grad_norm": 1.9531320333480835, "learning_rate": 3.811293212129255e-06, "loss": 0.8234, "step": 11003 }, { "epoch": 0.6571121461841634, "grad_norm": 1.8319944143295288, "learning_rate": 3.810629686152213e-06, "loss": 0.8093, "step": 11004 }, { "epoch": 0.657171861937179, "grad_norm": 1.94112229347229, "learning_rate": 3.8099661601751715e-06, "loss": 0.8149, "step": 11005 }, { "epoch": 0.6572315776901947, "grad_norm": 3.5256989002227783, "learning_rate": 3.809302634198129e-06, "loss": 0.8921, "step": 11006 }, { "epoch": 0.6572912934432104, "grad_norm": 2.7971010208129883, "learning_rate": 3.8086391082210873e-06, "loss": 0.8695, "step": 11007 }, { "epoch": 0.6573510091962259, "grad_norm": 1.9444146156311035, "learning_rate": 3.807975582244045e-06, "loss": 0.7909, "step": 11008 }, { "epoch": 0.6574107249492416, "grad_norm": 2.4063727855682373, "learning_rate": 3.807312056267003e-06, "loss": 0.7913, "step": 11009 }, { "epoch": 0.6574704407022572, "grad_norm": 2.062913179397583, "learning_rate": 3.8066485302899616e-06, "loss": 0.8633, "step": 11010 }, { "epoch": 0.6575301564552729, "grad_norm": 2.006309986114502, "learning_rate": 3.8059850043129193e-06, "loss": 0.8151, "step": 11011 }, { "epoch": 0.6575898722082886, "grad_norm": 1.5897595882415771, "learning_rate": 3.8053214783358774e-06, "loss": 0.829, "step": 11012 }, { "epoch": 0.6576495879613042, "grad_norm": 3.288439989089966, "learning_rate": 3.804657952358835e-06, "loss": 0.822, "step": 11013 }, { "epoch": 0.6577093037143198, "grad_norm": 2.2086613178253174, "learning_rate": 3.803994426381793e-06, "loss": 0.8261, "step": 11014 }, { "epoch": 0.6577690194673355, "grad_norm": 1.816870093345642, "learning_rate": 3.8033309004047512e-06, "loss": 0.8427, "step": 11015 }, { "epoch": 0.6578287352203511, "grad_norm": 2.609588146209717, "learning_rate": 3.8026673744277093e-06, "loss": 0.8422, "step": 11016 }, { "epoch": 0.6578884509733668, "grad_norm": 2.202634811401367, "learning_rate": 3.802003848450667e-06, "loss": 0.8366, "step": 11017 }, { "epoch": 0.6579481667263825, "grad_norm": 3.4383363723754883, "learning_rate": 3.801340322473625e-06, "loss": 0.8266, "step": 11018 }, { "epoch": 0.6580078824793981, "grad_norm": 2.6185481548309326, "learning_rate": 3.800676796496583e-06, "loss": 0.8552, "step": 11019 }, { "epoch": 0.6580675982324137, "grad_norm": 2.4148337841033936, "learning_rate": 3.8000132705195413e-06, "loss": 0.8389, "step": 11020 }, { "epoch": 0.6581273139854293, "grad_norm": 2.2303061485290527, "learning_rate": 3.7993497445424994e-06, "loss": 0.8549, "step": 11021 }, { "epoch": 0.658187029738445, "grad_norm": 2.693984031677246, "learning_rate": 3.798686218565457e-06, "loss": 0.8655, "step": 11022 }, { "epoch": 0.6582467454914607, "grad_norm": 2.8284873962402344, "learning_rate": 3.798022692588415e-06, "loss": 0.8127, "step": 11023 }, { "epoch": 0.6583064612444763, "grad_norm": 1.8833014965057373, "learning_rate": 3.797359166611373e-06, "loss": 0.862, "step": 11024 }, { "epoch": 0.6583661769974919, "grad_norm": 4.270041465759277, "learning_rate": 3.7966956406343314e-06, "loss": 0.8676, "step": 11025 }, { "epoch": 0.6584258927505076, "grad_norm": 2.648954391479492, "learning_rate": 3.796032114657289e-06, "loss": 0.8656, "step": 11026 }, { "epoch": 0.6584856085035232, "grad_norm": 2.946538209915161, "learning_rate": 3.795368588680247e-06, "loss": 0.8226, "step": 11027 }, { "epoch": 0.6585453242565389, "grad_norm": 1.668766736984253, "learning_rate": 3.794705062703205e-06, "loss": 0.852, "step": 11028 }, { "epoch": 0.6586050400095546, "grad_norm": 2.04114031791687, "learning_rate": 3.794041536726163e-06, "loss": 0.8506, "step": 11029 }, { "epoch": 0.6586647557625702, "grad_norm": 2.961930513381958, "learning_rate": 3.7933780107491215e-06, "loss": 0.8899, "step": 11030 }, { "epoch": 0.6587244715155858, "grad_norm": 3.1664938926696777, "learning_rate": 3.792714484772079e-06, "loss": 0.8053, "step": 11031 }, { "epoch": 0.6587841872686014, "grad_norm": 1.8893520832061768, "learning_rate": 3.7920509587950373e-06, "loss": 0.8693, "step": 11032 }, { "epoch": 0.6588439030216171, "grad_norm": 2.394031047821045, "learning_rate": 3.791387432817995e-06, "loss": 0.8268, "step": 11033 }, { "epoch": 0.6589036187746328, "grad_norm": 2.192173957824707, "learning_rate": 3.790723906840953e-06, "loss": 0.8554, "step": 11034 }, { "epoch": 0.6589633345276484, "grad_norm": 3.3321115970611572, "learning_rate": 3.7900603808639116e-06, "loss": 0.8649, "step": 11035 }, { "epoch": 0.659023050280664, "grad_norm": 2.0620226860046387, "learning_rate": 3.7893968548868692e-06, "loss": 0.8741, "step": 11036 }, { "epoch": 0.6590827660336797, "grad_norm": 2.7236578464508057, "learning_rate": 3.7887333289098273e-06, "loss": 0.8738, "step": 11037 }, { "epoch": 0.6591424817866953, "grad_norm": 1.7045602798461914, "learning_rate": 3.788069802932785e-06, "loss": 0.8439, "step": 11038 }, { "epoch": 0.659202197539711, "grad_norm": 1.8324005603790283, "learning_rate": 3.787406276955743e-06, "loss": 0.8447, "step": 11039 }, { "epoch": 0.6592619132927267, "grad_norm": 1.6972358226776123, "learning_rate": 3.786742750978701e-06, "loss": 0.8365, "step": 11040 }, { "epoch": 0.6593216290457423, "grad_norm": 2.571500062942505, "learning_rate": 3.7860792250016593e-06, "loss": 0.8785, "step": 11041 }, { "epoch": 0.6593813447987579, "grad_norm": 1.7828091382980347, "learning_rate": 3.785415699024617e-06, "loss": 0.8315, "step": 11042 }, { "epoch": 0.6594410605517735, "grad_norm": 3.4476828575134277, "learning_rate": 3.784752173047575e-06, "loss": 0.842, "step": 11043 }, { "epoch": 0.6595007763047892, "grad_norm": 2.0033469200134277, "learning_rate": 3.7840886470705328e-06, "loss": 0.8404, "step": 11044 }, { "epoch": 0.6595604920578049, "grad_norm": 1.7015740871429443, "learning_rate": 3.7834251210934913e-06, "loss": 0.8305, "step": 11045 }, { "epoch": 0.6596202078108205, "grad_norm": 1.852559208869934, "learning_rate": 3.7827615951164494e-06, "loss": 0.8691, "step": 11046 }, { "epoch": 0.6596799235638361, "grad_norm": 5.305208683013916, "learning_rate": 3.782098069139407e-06, "loss": 0.8576, "step": 11047 }, { "epoch": 0.6597396393168518, "grad_norm": 19.356441497802734, "learning_rate": 3.781434543162365e-06, "loss": 0.8312, "step": 11048 }, { "epoch": 0.6597993550698674, "grad_norm": 2.5471909046173096, "learning_rate": 3.780771017185323e-06, "loss": 0.851, "step": 11049 }, { "epoch": 0.6598590708228831, "grad_norm": 4.190001964569092, "learning_rate": 3.7801074912082814e-06, "loss": 0.835, "step": 11050 }, { "epoch": 0.6599187865758988, "grad_norm": 2.7166507244110107, "learning_rate": 3.779443965231239e-06, "loss": 0.8094, "step": 11051 }, { "epoch": 0.6599785023289144, "grad_norm": 2.0865964889526367, "learning_rate": 3.778780439254197e-06, "loss": 0.8138, "step": 11052 }, { "epoch": 0.66003821808193, "grad_norm": 2.8993709087371826, "learning_rate": 3.778116913277155e-06, "loss": 0.862, "step": 11053 }, { "epoch": 0.6600979338349456, "grad_norm": 2.1451256275177, "learning_rate": 3.777453387300113e-06, "loss": 0.852, "step": 11054 }, { "epoch": 0.6601576495879613, "grad_norm": 2.1425960063934326, "learning_rate": 3.7767898613230714e-06, "loss": 0.8301, "step": 11055 }, { "epoch": 0.660217365340977, "grad_norm": 2.2069032192230225, "learning_rate": 3.776126335346029e-06, "loss": 0.8551, "step": 11056 }, { "epoch": 0.6602770810939926, "grad_norm": 1.8083362579345703, "learning_rate": 3.7754628093689872e-06, "loss": 0.8352, "step": 11057 }, { "epoch": 0.6603367968470082, "grad_norm": 1.6779555082321167, "learning_rate": 3.774799283391945e-06, "loss": 0.8354, "step": 11058 }, { "epoch": 0.6603965126000239, "grad_norm": 1.9703508615493774, "learning_rate": 3.774135757414903e-06, "loss": 0.809, "step": 11059 }, { "epoch": 0.6604562283530395, "grad_norm": 2.7193074226379395, "learning_rate": 3.7734722314378615e-06, "loss": 0.824, "step": 11060 }, { "epoch": 0.6605159441060552, "grad_norm": 2.644186496734619, "learning_rate": 3.772808705460819e-06, "loss": 0.8506, "step": 11061 }, { "epoch": 0.6605756598590709, "grad_norm": 1.992614507675171, "learning_rate": 3.7721451794837773e-06, "loss": 0.8362, "step": 11062 }, { "epoch": 0.6606353756120865, "grad_norm": 2.195693016052246, "learning_rate": 3.771481653506735e-06, "loss": 0.8369, "step": 11063 }, { "epoch": 0.6606950913651021, "grad_norm": 1.8659430742263794, "learning_rate": 3.770818127529693e-06, "loss": 0.8063, "step": 11064 }, { "epoch": 0.6607548071181177, "grad_norm": 1.6856167316436768, "learning_rate": 3.770154601552651e-06, "loss": 0.8491, "step": 11065 }, { "epoch": 0.6608145228711334, "grad_norm": 1.7999640703201294, "learning_rate": 3.7694910755756093e-06, "loss": 0.8708, "step": 11066 }, { "epoch": 0.6608742386241491, "grad_norm": 2.9844274520874023, "learning_rate": 3.768827549598567e-06, "loss": 0.8467, "step": 11067 }, { "epoch": 0.6609339543771647, "grad_norm": 2.068925380706787, "learning_rate": 3.768164023621525e-06, "loss": 0.8458, "step": 11068 }, { "epoch": 0.6609936701301803, "grad_norm": 2.548388957977295, "learning_rate": 3.7675004976444827e-06, "loss": 0.8427, "step": 11069 }, { "epoch": 0.661053385883196, "grad_norm": 2.5050466060638428, "learning_rate": 3.7668369716674413e-06, "loss": 0.8363, "step": 11070 }, { "epoch": 0.6611131016362116, "grad_norm": 2.791563034057617, "learning_rate": 3.7661734456903994e-06, "loss": 0.8222, "step": 11071 }, { "epoch": 0.6611728173892273, "grad_norm": 2.1257059574127197, "learning_rate": 3.765509919713357e-06, "loss": 0.813, "step": 11072 }, { "epoch": 0.661232533142243, "grad_norm": 2.060279130935669, "learning_rate": 3.764846393736315e-06, "loss": 0.8231, "step": 11073 }, { "epoch": 0.6612922488952586, "grad_norm": 2.4868335723876953, "learning_rate": 3.764182867759273e-06, "loss": 0.8342, "step": 11074 }, { "epoch": 0.6613519646482742, "grad_norm": 2.1073150634765625, "learning_rate": 3.7635193417822313e-06, "loss": 0.8319, "step": 11075 }, { "epoch": 0.6614116804012898, "grad_norm": 1.9802690744400024, "learning_rate": 3.762855815805189e-06, "loss": 0.8164, "step": 11076 }, { "epoch": 0.6614713961543055, "grad_norm": 3.17452073097229, "learning_rate": 3.762192289828147e-06, "loss": 0.808, "step": 11077 }, { "epoch": 0.6615311119073212, "grad_norm": 2.756338119506836, "learning_rate": 3.761528763851105e-06, "loss": 0.8461, "step": 11078 }, { "epoch": 0.6615908276603368, "grad_norm": 6.873748302459717, "learning_rate": 3.760865237874063e-06, "loss": 0.8268, "step": 11079 }, { "epoch": 0.6616505434133524, "grad_norm": 1.5000735521316528, "learning_rate": 3.7602017118970214e-06, "loss": 0.8327, "step": 11080 }, { "epoch": 0.661710259166368, "grad_norm": 2.4841482639312744, "learning_rate": 3.759538185919979e-06, "loss": 0.8437, "step": 11081 }, { "epoch": 0.6617699749193837, "grad_norm": 2.8923587799072266, "learning_rate": 3.758874659942937e-06, "loss": 0.8559, "step": 11082 }, { "epoch": 0.6618296906723994, "grad_norm": 2.264630079269409, "learning_rate": 3.758211133965895e-06, "loss": 0.809, "step": 11083 }, { "epoch": 0.661889406425415, "grad_norm": 3.4194037914276123, "learning_rate": 3.757547607988853e-06, "loss": 0.8233, "step": 11084 }, { "epoch": 0.6619491221784307, "grad_norm": 3.984147310256958, "learning_rate": 3.7568840820118115e-06, "loss": 0.8207, "step": 11085 }, { "epoch": 0.6620088379314463, "grad_norm": 2.0524516105651855, "learning_rate": 3.756220556034769e-06, "loss": 0.8159, "step": 11086 }, { "epoch": 0.6620685536844619, "grad_norm": 3.4797112941741943, "learning_rate": 3.7555570300577273e-06, "loss": 0.8675, "step": 11087 }, { "epoch": 0.6621282694374776, "grad_norm": 2.7849557399749756, "learning_rate": 3.754893504080685e-06, "loss": 0.8608, "step": 11088 }, { "epoch": 0.6621879851904933, "grad_norm": 2.393625259399414, "learning_rate": 3.754229978103643e-06, "loss": 0.8297, "step": 11089 }, { "epoch": 0.6622477009435089, "grad_norm": 2.5346553325653076, "learning_rate": 3.753566452126601e-06, "loss": 0.8597, "step": 11090 }, { "epoch": 0.6623074166965246, "grad_norm": 2.0733916759490967, "learning_rate": 3.7529029261495593e-06, "loss": 0.8458, "step": 11091 }, { "epoch": 0.6623671324495402, "grad_norm": 3.224064588546753, "learning_rate": 3.752239400172517e-06, "loss": 0.8172, "step": 11092 }, { "epoch": 0.6624268482025558, "grad_norm": 2.1373260021209717, "learning_rate": 3.751575874195475e-06, "loss": 0.7765, "step": 11093 }, { "epoch": 0.6624865639555715, "grad_norm": 2.297288656234741, "learning_rate": 3.7509123482184327e-06, "loss": 0.7885, "step": 11094 }, { "epoch": 0.6625462797085871, "grad_norm": 3.381619453430176, "learning_rate": 3.7502488222413912e-06, "loss": 0.7843, "step": 11095 }, { "epoch": 0.6626059954616028, "grad_norm": 1.7476470470428467, "learning_rate": 3.7495852962643493e-06, "loss": 0.8315, "step": 11096 }, { "epoch": 0.6626657112146184, "grad_norm": 2.10990834236145, "learning_rate": 3.748921770287307e-06, "loss": 0.8363, "step": 11097 }, { "epoch": 0.662725426967634, "grad_norm": 1.6315447092056274, "learning_rate": 3.748258244310265e-06, "loss": 0.7953, "step": 11098 }, { "epoch": 0.6627851427206497, "grad_norm": 2.0471928119659424, "learning_rate": 3.7475947183332228e-06, "loss": 0.8617, "step": 11099 }, { "epoch": 0.6628448584736654, "grad_norm": 1.8039828538894653, "learning_rate": 3.7469311923561813e-06, "loss": 0.8659, "step": 11100 }, { "epoch": 0.662904574226681, "grad_norm": 2.0518593788146973, "learning_rate": 3.746267666379139e-06, "loss": 0.8494, "step": 11101 }, { "epoch": 0.6629642899796967, "grad_norm": 2.3311798572540283, "learning_rate": 3.745604140402097e-06, "loss": 0.845, "step": 11102 }, { "epoch": 0.6630240057327123, "grad_norm": 1.9875291585922241, "learning_rate": 3.7449406144250548e-06, "loss": 0.8016, "step": 11103 }, { "epoch": 0.6630837214857279, "grad_norm": 2.483539581298828, "learning_rate": 3.744277088448013e-06, "loss": 0.8044, "step": 11104 }, { "epoch": 0.6631434372387436, "grad_norm": 2.2780494689941406, "learning_rate": 3.7436135624709714e-06, "loss": 0.8555, "step": 11105 }, { "epoch": 0.6632031529917592, "grad_norm": 2.3052642345428467, "learning_rate": 3.742950036493929e-06, "loss": 0.8063, "step": 11106 }, { "epoch": 0.6632628687447749, "grad_norm": 2.765316963195801, "learning_rate": 3.742286510516887e-06, "loss": 0.7917, "step": 11107 }, { "epoch": 0.6633225844977905, "grad_norm": 2.162879467010498, "learning_rate": 3.741622984539845e-06, "loss": 0.8291, "step": 11108 }, { "epoch": 0.6633823002508061, "grad_norm": 1.7362322807312012, "learning_rate": 3.740959458562803e-06, "loss": 0.8724, "step": 11109 }, { "epoch": 0.6634420160038218, "grad_norm": 2.79618763923645, "learning_rate": 3.7402959325857615e-06, "loss": 0.8149, "step": 11110 }, { "epoch": 0.6635017317568375, "grad_norm": 2.3361051082611084, "learning_rate": 3.739632406608719e-06, "loss": 0.8266, "step": 11111 }, { "epoch": 0.6635614475098531, "grad_norm": 2.2020411491394043, "learning_rate": 3.7389688806316772e-06, "loss": 0.8319, "step": 11112 }, { "epoch": 0.6636211632628688, "grad_norm": 3.66255784034729, "learning_rate": 3.738305354654635e-06, "loss": 0.8071, "step": 11113 }, { "epoch": 0.6636808790158844, "grad_norm": 2.026148557662964, "learning_rate": 3.737641828677593e-06, "loss": 0.871, "step": 11114 }, { "epoch": 0.6637405947689, "grad_norm": 2.138089418411255, "learning_rate": 3.736978302700551e-06, "loss": 0.827, "step": 11115 }, { "epoch": 0.6638003105219157, "grad_norm": 1.9641274213790894, "learning_rate": 3.7363147767235092e-06, "loss": 0.8444, "step": 11116 }, { "epoch": 0.6638600262749313, "grad_norm": 2.6256041526794434, "learning_rate": 3.735651250746467e-06, "loss": 0.8498, "step": 11117 }, { "epoch": 0.663919742027947, "grad_norm": 1.922645926475525, "learning_rate": 3.734987724769425e-06, "loss": 0.8157, "step": 11118 }, { "epoch": 0.6639794577809626, "grad_norm": 2.12050199508667, "learning_rate": 3.7343241987923827e-06, "loss": 0.8151, "step": 11119 }, { "epoch": 0.6640391735339782, "grad_norm": 1.6846541166305542, "learning_rate": 3.733660672815341e-06, "loss": 0.8269, "step": 11120 }, { "epoch": 0.6640988892869939, "grad_norm": 2.2094759941101074, "learning_rate": 3.7329971468382993e-06, "loss": 0.8274, "step": 11121 }, { "epoch": 0.6641586050400096, "grad_norm": 1.5304704904556274, "learning_rate": 3.732333620861257e-06, "loss": 0.8139, "step": 11122 }, { "epoch": 0.6642183207930252, "grad_norm": 2.379504680633545, "learning_rate": 3.731670094884215e-06, "loss": 0.8264, "step": 11123 }, { "epoch": 0.6642780365460409, "grad_norm": 2.0039212703704834, "learning_rate": 3.7310065689071728e-06, "loss": 0.8529, "step": 11124 }, { "epoch": 0.6643377522990564, "grad_norm": 2.699052333831787, "learning_rate": 3.7303430429301313e-06, "loss": 0.8101, "step": 11125 }, { "epoch": 0.6643974680520721, "grad_norm": 5.241832256317139, "learning_rate": 3.729679516953089e-06, "loss": 0.8153, "step": 11126 }, { "epoch": 0.6644571838050878, "grad_norm": 4.261483192443848, "learning_rate": 3.729015990976047e-06, "loss": 0.8378, "step": 11127 }, { "epoch": 0.6645168995581034, "grad_norm": 2.5378992557525635, "learning_rate": 3.7283524649990047e-06, "loss": 0.8408, "step": 11128 }, { "epoch": 0.6645766153111191, "grad_norm": 2.0601277351379395, "learning_rate": 3.727688939021963e-06, "loss": 0.857, "step": 11129 }, { "epoch": 0.6646363310641347, "grad_norm": 2.528679132461548, "learning_rate": 3.7270254130449214e-06, "loss": 0.856, "step": 11130 }, { "epoch": 0.6646960468171503, "grad_norm": 1.8280470371246338, "learning_rate": 3.726361887067879e-06, "loss": 0.857, "step": 11131 }, { "epoch": 0.664755762570166, "grad_norm": 1.8372448682785034, "learning_rate": 3.725698361090837e-06, "loss": 0.8714, "step": 11132 }, { "epoch": 0.6648154783231817, "grad_norm": 3.1242048740386963, "learning_rate": 3.725034835113795e-06, "loss": 0.844, "step": 11133 }, { "epoch": 0.6648751940761973, "grad_norm": 2.1847875118255615, "learning_rate": 3.724371309136753e-06, "loss": 0.89, "step": 11134 }, { "epoch": 0.664934909829213, "grad_norm": 5.427865028381348, "learning_rate": 3.7237077831597114e-06, "loss": 0.827, "step": 11135 }, { "epoch": 0.6649946255822285, "grad_norm": 2.10459041595459, "learning_rate": 3.723044257182669e-06, "loss": 0.8289, "step": 11136 }, { "epoch": 0.6650543413352442, "grad_norm": 2.133603811264038, "learning_rate": 3.7223807312056272e-06, "loss": 0.8094, "step": 11137 }, { "epoch": 0.6651140570882599, "grad_norm": 3.871936559677124, "learning_rate": 3.721717205228585e-06, "loss": 0.8702, "step": 11138 }, { "epoch": 0.6651737728412755, "grad_norm": 2.1181089878082275, "learning_rate": 3.721053679251543e-06, "loss": 0.8153, "step": 11139 }, { "epoch": 0.6652334885942912, "grad_norm": 2.762040615081787, "learning_rate": 3.720390153274501e-06, "loss": 0.8244, "step": 11140 }, { "epoch": 0.6652932043473068, "grad_norm": 2.8496477603912354, "learning_rate": 3.719726627297459e-06, "loss": 0.8315, "step": 11141 }, { "epoch": 0.6653529201003224, "grad_norm": 2.305396318435669, "learning_rate": 3.719063101320417e-06, "loss": 0.8502, "step": 11142 }, { "epoch": 0.6654126358533381, "grad_norm": 2.2578797340393066, "learning_rate": 3.718399575343375e-06, "loss": 0.8611, "step": 11143 }, { "epoch": 0.6654723516063538, "grad_norm": 3.741881847381592, "learning_rate": 3.7177360493663326e-06, "loss": 0.8115, "step": 11144 }, { "epoch": 0.6655320673593694, "grad_norm": 2.9683785438537598, "learning_rate": 3.717072523389291e-06, "loss": 0.8245, "step": 11145 }, { "epoch": 0.6655917831123851, "grad_norm": 2.502685785293579, "learning_rate": 3.7164089974122493e-06, "loss": 0.8116, "step": 11146 }, { "epoch": 0.6656514988654006, "grad_norm": 1.831334114074707, "learning_rate": 3.715745471435207e-06, "loss": 0.8256, "step": 11147 }, { "epoch": 0.6657112146184163, "grad_norm": 3.61962628364563, "learning_rate": 3.715081945458165e-06, "loss": 0.8173, "step": 11148 }, { "epoch": 0.665770930371432, "grad_norm": 2.6072564125061035, "learning_rate": 3.7144184194811227e-06, "loss": 0.8494, "step": 11149 }, { "epoch": 0.6658306461244476, "grad_norm": 2.4582555294036865, "learning_rate": 3.7137548935040813e-06, "loss": 0.8419, "step": 11150 }, { "epoch": 0.6658903618774633, "grad_norm": 2.1208882331848145, "learning_rate": 3.713091367527039e-06, "loss": 0.8234, "step": 11151 }, { "epoch": 0.6659500776304789, "grad_norm": 1.9488334655761719, "learning_rate": 3.712427841549997e-06, "loss": 0.8894, "step": 11152 }, { "epoch": 0.6660097933834945, "grad_norm": 2.8094000816345215, "learning_rate": 3.7117643155729547e-06, "loss": 0.843, "step": 11153 }, { "epoch": 0.6660695091365102, "grad_norm": 2.2963101863861084, "learning_rate": 3.711100789595913e-06, "loss": 0.8187, "step": 11154 }, { "epoch": 0.6661292248895259, "grad_norm": 1.9968502521514893, "learning_rate": 3.7104372636188713e-06, "loss": 0.8698, "step": 11155 }, { "epoch": 0.6661889406425415, "grad_norm": 1.9888616800308228, "learning_rate": 3.709773737641829e-06, "loss": 0.8041, "step": 11156 }, { "epoch": 0.6662486563955572, "grad_norm": 4.710199356079102, "learning_rate": 3.709110211664787e-06, "loss": 0.8226, "step": 11157 }, { "epoch": 0.6663083721485727, "grad_norm": 1.5516624450683594, "learning_rate": 3.7084466856877448e-06, "loss": 0.8294, "step": 11158 }, { "epoch": 0.6663680879015884, "grad_norm": 1.6784617900848389, "learning_rate": 3.707783159710703e-06, "loss": 0.8031, "step": 11159 }, { "epoch": 0.6664278036546041, "grad_norm": 2.1970274448394775, "learning_rate": 3.7071196337336614e-06, "loss": 0.8558, "step": 11160 }, { "epoch": 0.6664875194076197, "grad_norm": 1.671022891998291, "learning_rate": 3.706456107756619e-06, "loss": 0.8231, "step": 11161 }, { "epoch": 0.6665472351606354, "grad_norm": 2.619309902191162, "learning_rate": 3.705792581779577e-06, "loss": 0.8495, "step": 11162 }, { "epoch": 0.6666069509136511, "grad_norm": 2.244048833847046, "learning_rate": 3.705129055802535e-06, "loss": 0.811, "step": 11163 }, { "epoch": 0.6666666666666666, "grad_norm": 1.6500086784362793, "learning_rate": 3.704465529825493e-06, "loss": 0.8295, "step": 11164 }, { "epoch": 0.6667263824196823, "grad_norm": 1.9894590377807617, "learning_rate": 3.703802003848451e-06, "loss": 0.8218, "step": 11165 }, { "epoch": 0.666786098172698, "grad_norm": 2.875612497329712, "learning_rate": 3.703138477871409e-06, "loss": 0.83, "step": 11166 }, { "epoch": 0.6668458139257136, "grad_norm": 2.6549313068389893, "learning_rate": 3.702474951894367e-06, "loss": 0.8217, "step": 11167 }, { "epoch": 0.6669055296787293, "grad_norm": 1.8335096836090088, "learning_rate": 3.701811425917325e-06, "loss": 0.8336, "step": 11168 }, { "epoch": 0.6669652454317448, "grad_norm": 1.903005838394165, "learning_rate": 3.7011478999402826e-06, "loss": 0.8571, "step": 11169 }, { "epoch": 0.6670249611847605, "grad_norm": 2.0909721851348877, "learning_rate": 3.700484373963241e-06, "loss": 0.8854, "step": 11170 }, { "epoch": 0.6670846769377762, "grad_norm": 1.9617620706558228, "learning_rate": 3.6998208479861992e-06, "loss": 0.8344, "step": 11171 }, { "epoch": 0.6671443926907918, "grad_norm": 2.1307263374328613, "learning_rate": 3.699157322009157e-06, "loss": 0.8636, "step": 11172 }, { "epoch": 0.6672041084438075, "grad_norm": 1.764329433441162, "learning_rate": 3.698493796032115e-06, "loss": 0.8356, "step": 11173 }, { "epoch": 0.6672638241968232, "grad_norm": 2.4993813037872314, "learning_rate": 3.6978302700550727e-06, "loss": 0.8538, "step": 11174 }, { "epoch": 0.6673235399498387, "grad_norm": 2.994019031524658, "learning_rate": 3.6971667440780312e-06, "loss": 0.8439, "step": 11175 }, { "epoch": 0.6673832557028544, "grad_norm": 2.3491199016571045, "learning_rate": 3.696503218100989e-06, "loss": 0.8675, "step": 11176 }, { "epoch": 0.6674429714558701, "grad_norm": 1.7772899866104126, "learning_rate": 3.695839692123947e-06, "loss": 0.8518, "step": 11177 }, { "epoch": 0.6675026872088857, "grad_norm": 2.0411791801452637, "learning_rate": 3.6951761661469047e-06, "loss": 0.7936, "step": 11178 }, { "epoch": 0.6675624029619014, "grad_norm": 2.0506231784820557, "learning_rate": 3.6945126401698628e-06, "loss": 0.8488, "step": 11179 }, { "epoch": 0.667622118714917, "grad_norm": 2.5815656185150146, "learning_rate": 3.6938491141928213e-06, "loss": 0.8421, "step": 11180 }, { "epoch": 0.6676818344679326, "grad_norm": 1.8784276247024536, "learning_rate": 3.693185588215779e-06, "loss": 0.8303, "step": 11181 }, { "epoch": 0.6677415502209483, "grad_norm": 2.208298921585083, "learning_rate": 3.692522062238737e-06, "loss": 0.8332, "step": 11182 }, { "epoch": 0.6678012659739639, "grad_norm": 2.2001867294311523, "learning_rate": 3.6918585362616948e-06, "loss": 0.8824, "step": 11183 }, { "epoch": 0.6678609817269796, "grad_norm": 2.453402519226074, "learning_rate": 3.691195010284653e-06, "loss": 0.8293, "step": 11184 }, { "epoch": 0.6679206974799953, "grad_norm": 2.3291118144989014, "learning_rate": 3.6905314843076114e-06, "loss": 0.7975, "step": 11185 }, { "epoch": 0.6679804132330108, "grad_norm": 2.4586851596832275, "learning_rate": 3.689867958330569e-06, "loss": 0.8474, "step": 11186 }, { "epoch": 0.6680401289860265, "grad_norm": 2.0888264179229736, "learning_rate": 3.689204432353527e-06, "loss": 0.8316, "step": 11187 }, { "epoch": 0.6680998447390422, "grad_norm": 1.7462286949157715, "learning_rate": 3.688540906376485e-06, "loss": 0.8392, "step": 11188 }, { "epoch": 0.6681595604920578, "grad_norm": 1.8998823165893555, "learning_rate": 3.687877380399443e-06, "loss": 0.8756, "step": 11189 }, { "epoch": 0.6682192762450735, "grad_norm": 1.7406253814697266, "learning_rate": 3.687213854422401e-06, "loss": 0.8155, "step": 11190 }, { "epoch": 0.668278991998089, "grad_norm": 2.5408945083618164, "learning_rate": 3.686550328445359e-06, "loss": 0.8291, "step": 11191 }, { "epoch": 0.6683387077511047, "grad_norm": 2.4046971797943115, "learning_rate": 3.685886802468317e-06, "loss": 0.8649, "step": 11192 }, { "epoch": 0.6683984235041204, "grad_norm": 2.080446243286133, "learning_rate": 3.685223276491275e-06, "loss": 0.8491, "step": 11193 }, { "epoch": 0.668458139257136, "grad_norm": 2.9526684284210205, "learning_rate": 3.6845597505142326e-06, "loss": 0.8354, "step": 11194 }, { "epoch": 0.6685178550101517, "grad_norm": 2.8320016860961914, "learning_rate": 3.683896224537191e-06, "loss": 0.847, "step": 11195 }, { "epoch": 0.6685775707631674, "grad_norm": 2.7183313369750977, "learning_rate": 3.6832326985601492e-06, "loss": 0.8355, "step": 11196 }, { "epoch": 0.6686372865161829, "grad_norm": 2.5862491130828857, "learning_rate": 3.682569172583107e-06, "loss": 0.8165, "step": 11197 }, { "epoch": 0.6686970022691986, "grad_norm": 2.977513074874878, "learning_rate": 3.681905646606065e-06, "loss": 0.806, "step": 11198 }, { "epoch": 0.6687567180222143, "grad_norm": 2.0643553733825684, "learning_rate": 3.6812421206290227e-06, "loss": 0.8446, "step": 11199 }, { "epoch": 0.6688164337752299, "grad_norm": 2.1807312965393066, "learning_rate": 3.680578594651981e-06, "loss": 0.8277, "step": 11200 }, { "epoch": 0.6688164337752299, "eval_text_loss": 0.9004031419754028, "eval_text_runtime": 15.1826, "eval_text_samples_per_second": 263.459, "eval_text_steps_per_second": 0.527, "step": 11200 }, { "epoch": 0.6688164337752299, "eval_image_loss": 0.6090208292007446, "eval_image_runtime": 5.0112, "eval_image_samples_per_second": 798.212, "eval_image_steps_per_second": 1.596, "step": 11200 }, { "epoch": 0.6688164337752299, "eval_video_loss": 1.0423805713653564, "eval_video_runtime": 77.4728, "eval_video_samples_per_second": 51.631, "eval_video_steps_per_second": 0.103, "step": 11200 }, { "epoch": 0.6688761495282456, "grad_norm": 2.0839688777923584, "learning_rate": 3.6799150686749393e-06, "loss": 0.8478, "step": 11201 }, { "epoch": 0.6689358652812611, "grad_norm": 3.960095167160034, "learning_rate": 3.679251542697897e-06, "loss": 0.8373, "step": 11202 }, { "epoch": 0.6689955810342768, "grad_norm": 2.6088333129882812, "learning_rate": 3.678588016720855e-06, "loss": 0.8282, "step": 11203 }, { "epoch": 0.6690552967872925, "grad_norm": 1.9626883268356323, "learning_rate": 3.6779244907438127e-06, "loss": 0.8705, "step": 11204 }, { "epoch": 0.6691150125403081, "grad_norm": 2.5101442337036133, "learning_rate": 3.6772609647667713e-06, "loss": 0.8078, "step": 11205 }, { "epoch": 0.6691747282933238, "grad_norm": 2.3498358726501465, "learning_rate": 3.676597438789729e-06, "loss": 0.8174, "step": 11206 }, { "epoch": 0.6692344440463395, "grad_norm": 2.359222412109375, "learning_rate": 3.675933912812687e-06, "loss": 0.7795, "step": 11207 }, { "epoch": 0.669294159799355, "grad_norm": 1.9571748971939087, "learning_rate": 3.6752703868356447e-06, "loss": 0.8731, "step": 11208 }, { "epoch": 0.6693538755523707, "grad_norm": 4.256694793701172, "learning_rate": 3.674606860858603e-06, "loss": 0.8386, "step": 11209 }, { "epoch": 0.6694135913053864, "grad_norm": 2.329432249069214, "learning_rate": 3.6739433348815614e-06, "loss": 0.8193, "step": 11210 }, { "epoch": 0.669473307058402, "grad_norm": 1.560355544090271, "learning_rate": 3.673279808904519e-06, "loss": 0.8405, "step": 11211 }, { "epoch": 0.6695330228114177, "grad_norm": 2.397714853286743, "learning_rate": 3.672616282927477e-06, "loss": 0.8523, "step": 11212 }, { "epoch": 0.6695927385644332, "grad_norm": 2.169400453567505, "learning_rate": 3.671952756950435e-06, "loss": 0.8207, "step": 11213 }, { "epoch": 0.6696524543174489, "grad_norm": 2.492759943008423, "learning_rate": 3.671289230973393e-06, "loss": 0.8607, "step": 11214 }, { "epoch": 0.6697121700704646, "grad_norm": 1.8246781826019287, "learning_rate": 3.670625704996351e-06, "loss": 0.8319, "step": 11215 }, { "epoch": 0.6697718858234802, "grad_norm": 3.371448516845703, "learning_rate": 3.669962179019309e-06, "loss": 0.8673, "step": 11216 }, { "epoch": 0.6698316015764959, "grad_norm": 7.16314697265625, "learning_rate": 3.6692986530422668e-06, "loss": 0.822, "step": 11217 }, { "epoch": 0.6698913173295116, "grad_norm": 3.705852746963501, "learning_rate": 3.668635127065225e-06, "loss": 0.8596, "step": 11218 }, { "epoch": 0.6699510330825271, "grad_norm": 1.973604440689087, "learning_rate": 3.6679716010881826e-06, "loss": 0.8132, "step": 11219 }, { "epoch": 0.6700107488355428, "grad_norm": 2.6645944118499756, "learning_rate": 3.667308075111141e-06, "loss": 0.8344, "step": 11220 }, { "epoch": 0.6700704645885585, "grad_norm": 1.9873719215393066, "learning_rate": 3.666644549134099e-06, "loss": 0.8182, "step": 11221 }, { "epoch": 0.6701301803415741, "grad_norm": 3.6386523246765137, "learning_rate": 3.665981023157057e-06, "loss": 0.8189, "step": 11222 }, { "epoch": 0.6701898960945898, "grad_norm": 1.958212971687317, "learning_rate": 3.665317497180015e-06, "loss": 0.8342, "step": 11223 }, { "epoch": 0.6702496118476055, "grad_norm": 3.1856491565704346, "learning_rate": 3.6646539712029726e-06, "loss": 0.8353, "step": 11224 }, { "epoch": 0.670309327600621, "grad_norm": 3.2882025241851807, "learning_rate": 3.663990445225931e-06, "loss": 0.8246, "step": 11225 }, { "epoch": 0.6703690433536367, "grad_norm": 1.9260900020599365, "learning_rate": 3.6633269192488893e-06, "loss": 0.8138, "step": 11226 }, { "epoch": 0.6704287591066523, "grad_norm": 2.040329933166504, "learning_rate": 3.662663393271847e-06, "loss": 0.8041, "step": 11227 }, { "epoch": 0.670488474859668, "grad_norm": 2.1299054622650146, "learning_rate": 3.661999867294805e-06, "loss": 0.8387, "step": 11228 }, { "epoch": 0.6705481906126837, "grad_norm": 2.8825480937957764, "learning_rate": 3.6613363413177627e-06, "loss": 0.8306, "step": 11229 }, { "epoch": 0.6706079063656992, "grad_norm": 1.92241632938385, "learning_rate": 3.6606728153407212e-06, "loss": 0.8051, "step": 11230 }, { "epoch": 0.6706676221187149, "grad_norm": 1.8522003889083862, "learning_rate": 3.660009289363679e-06, "loss": 0.8412, "step": 11231 }, { "epoch": 0.6707273378717306, "grad_norm": 2.331397294998169, "learning_rate": 3.659345763386637e-06, "loss": 0.8425, "step": 11232 }, { "epoch": 0.6707870536247462, "grad_norm": 6.509199619293213, "learning_rate": 3.6586822374095947e-06, "loss": 0.8112, "step": 11233 }, { "epoch": 0.6708467693777619, "grad_norm": 2.791517496109009, "learning_rate": 3.658018711432553e-06, "loss": 0.8235, "step": 11234 }, { "epoch": 0.6709064851307776, "grad_norm": 3.893967390060425, "learning_rate": 3.6573551854555113e-06, "loss": 0.798, "step": 11235 }, { "epoch": 0.6709662008837931, "grad_norm": 1.952445387840271, "learning_rate": 3.656691659478469e-06, "loss": 0.8228, "step": 11236 }, { "epoch": 0.6710259166368088, "grad_norm": 1.9050309658050537, "learning_rate": 3.656028133501427e-06, "loss": 0.8088, "step": 11237 }, { "epoch": 0.6710856323898244, "grad_norm": 2.9208340644836426, "learning_rate": 3.6553646075243848e-06, "loss": 0.831, "step": 11238 }, { "epoch": 0.6711453481428401, "grad_norm": 2.1678450107574463, "learning_rate": 3.654701081547343e-06, "loss": 0.8417, "step": 11239 }, { "epoch": 0.6712050638958558, "grad_norm": 1.7347378730773926, "learning_rate": 3.654037555570301e-06, "loss": 0.7902, "step": 11240 }, { "epoch": 0.6712647796488713, "grad_norm": 1.9651925563812256, "learning_rate": 3.653374029593259e-06, "loss": 0.8103, "step": 11241 }, { "epoch": 0.671324495401887, "grad_norm": 1.8730829954147339, "learning_rate": 3.6527105036162168e-06, "loss": 0.8389, "step": 11242 }, { "epoch": 0.6713842111549027, "grad_norm": 2.0863327980041504, "learning_rate": 3.652046977639175e-06, "loss": 0.8208, "step": 11243 }, { "epoch": 0.6714439269079183, "grad_norm": 3.0342392921447754, "learning_rate": 3.6513834516621325e-06, "loss": 0.8405, "step": 11244 }, { "epoch": 0.671503642660934, "grad_norm": 2.6800246238708496, "learning_rate": 3.6507199256850906e-06, "loss": 0.8361, "step": 11245 }, { "epoch": 0.6715633584139497, "grad_norm": 2.0751001834869385, "learning_rate": 3.650056399708049e-06, "loss": 0.8422, "step": 11246 }, { "epoch": 0.6716230741669652, "grad_norm": 3.6629626750946045, "learning_rate": 3.649392873731007e-06, "loss": 0.8166, "step": 11247 }, { "epoch": 0.6716827899199809, "grad_norm": 2.1314890384674072, "learning_rate": 3.648729347753965e-06, "loss": 0.871, "step": 11248 }, { "epoch": 0.6717425056729965, "grad_norm": 3.4887869358062744, "learning_rate": 3.6480658217769226e-06, "loss": 0.8896, "step": 11249 }, { "epoch": 0.6718022214260122, "grad_norm": 2.1317033767700195, "learning_rate": 3.6474022957998807e-06, "loss": 0.831, "step": 11250 }, { "epoch": 0.6718619371790279, "grad_norm": 2.5677032470703125, "learning_rate": 3.6467387698228392e-06, "loss": 0.8433, "step": 11251 }, { "epoch": 0.6719216529320434, "grad_norm": 2.242436408996582, "learning_rate": 3.646075243845797e-06, "loss": 0.8191, "step": 11252 }, { "epoch": 0.6719813686850591, "grad_norm": 1.9203968048095703, "learning_rate": 3.645411717868755e-06, "loss": 0.8328, "step": 11253 }, { "epoch": 0.6720410844380748, "grad_norm": 1.6278387308120728, "learning_rate": 3.6447481918917127e-06, "loss": 0.8183, "step": 11254 }, { "epoch": 0.6721008001910904, "grad_norm": 3.291308641433716, "learning_rate": 3.644084665914671e-06, "loss": 0.8389, "step": 11255 }, { "epoch": 0.6721605159441061, "grad_norm": 2.438469886779785, "learning_rate": 3.643421139937629e-06, "loss": 0.8052, "step": 11256 }, { "epoch": 0.6722202316971218, "grad_norm": 2.4717857837677, "learning_rate": 3.642757613960587e-06, "loss": 0.791, "step": 11257 }, { "epoch": 0.6722799474501373, "grad_norm": 2.030949592590332, "learning_rate": 3.6420940879835447e-06, "loss": 0.8311, "step": 11258 }, { "epoch": 0.672339663203153, "grad_norm": 1.8907313346862793, "learning_rate": 3.6414305620065028e-06, "loss": 0.8043, "step": 11259 }, { "epoch": 0.6723993789561686, "grad_norm": 2.7929837703704834, "learning_rate": 3.6407670360294604e-06, "loss": 0.8436, "step": 11260 }, { "epoch": 0.6724590947091843, "grad_norm": 2.1888105869293213, "learning_rate": 3.640103510052419e-06, "loss": 0.8521, "step": 11261 }, { "epoch": 0.6725188104622, "grad_norm": 3.4818973541259766, "learning_rate": 3.639439984075377e-06, "loss": 0.8681, "step": 11262 }, { "epoch": 0.6725785262152155, "grad_norm": 2.8458452224731445, "learning_rate": 3.6387764580983347e-06, "loss": 0.8243, "step": 11263 }, { "epoch": 0.6726382419682312, "grad_norm": 2.6725964546203613, "learning_rate": 3.638112932121293e-06, "loss": 0.8189, "step": 11264 }, { "epoch": 0.6726979577212469, "grad_norm": 1.768910527229309, "learning_rate": 3.6374494061442505e-06, "loss": 0.7888, "step": 11265 }, { "epoch": 0.6727576734742625, "grad_norm": 2.302309513092041, "learning_rate": 3.636785880167209e-06, "loss": 0.8167, "step": 11266 }, { "epoch": 0.6728173892272782, "grad_norm": 2.674485206604004, "learning_rate": 3.6361223541901667e-06, "loss": 0.8632, "step": 11267 }, { "epoch": 0.6728771049802938, "grad_norm": 2.2140142917633057, "learning_rate": 3.635458828213125e-06, "loss": 0.8257, "step": 11268 }, { "epoch": 0.6729368207333094, "grad_norm": 1.8838001489639282, "learning_rate": 3.6347953022360825e-06, "loss": 0.8098, "step": 11269 }, { "epoch": 0.6729965364863251, "grad_norm": 2.581256151199341, "learning_rate": 3.6341317762590406e-06, "loss": 0.7964, "step": 11270 }, { "epoch": 0.6730562522393407, "grad_norm": 1.674407958984375, "learning_rate": 3.633468250281999e-06, "loss": 0.8399, "step": 11271 }, { "epoch": 0.6731159679923564, "grad_norm": 3.1932544708251953, "learning_rate": 3.632804724304957e-06, "loss": 0.8143, "step": 11272 }, { "epoch": 0.6731756837453721, "grad_norm": 1.7905960083007812, "learning_rate": 3.632141198327915e-06, "loss": 0.8161, "step": 11273 }, { "epoch": 0.6732353994983876, "grad_norm": 2.026987314224243, "learning_rate": 3.6314776723508726e-06, "loss": 0.8302, "step": 11274 }, { "epoch": 0.6732951152514033, "grad_norm": 3.396221160888672, "learning_rate": 3.6308141463738307e-06, "loss": 0.8389, "step": 11275 }, { "epoch": 0.673354831004419, "grad_norm": 2.9326589107513428, "learning_rate": 3.630150620396789e-06, "loss": 0.8462, "step": 11276 }, { "epoch": 0.6734145467574346, "grad_norm": 2.484912872314453, "learning_rate": 3.629487094419747e-06, "loss": 0.8197, "step": 11277 }, { "epoch": 0.6734742625104503, "grad_norm": 1.899137020111084, "learning_rate": 3.628823568442705e-06, "loss": 0.8449, "step": 11278 }, { "epoch": 0.673533978263466, "grad_norm": 1.5727006196975708, "learning_rate": 3.6281600424656627e-06, "loss": 0.8417, "step": 11279 }, { "epoch": 0.6735936940164815, "grad_norm": 2.6445159912109375, "learning_rate": 3.6274965164886208e-06, "loss": 0.8581, "step": 11280 }, { "epoch": 0.6736534097694972, "grad_norm": 2.707995653152466, "learning_rate": 3.626832990511579e-06, "loss": 0.8536, "step": 11281 }, { "epoch": 0.6737131255225128, "grad_norm": 2.2352402210235596, "learning_rate": 3.626169464534537e-06, "loss": 0.8514, "step": 11282 }, { "epoch": 0.6737728412755285, "grad_norm": 1.8460427522659302, "learning_rate": 3.6255059385574946e-06, "loss": 0.8283, "step": 11283 }, { "epoch": 0.6738325570285442, "grad_norm": 1.5869219303131104, "learning_rate": 3.6248424125804527e-06, "loss": 0.8405, "step": 11284 }, { "epoch": 0.6738922727815597, "grad_norm": 1.7849518060684204, "learning_rate": 3.6241788866034104e-06, "loss": 0.8587, "step": 11285 }, { "epoch": 0.6739519885345754, "grad_norm": 2.0099377632141113, "learning_rate": 3.623515360626369e-06, "loss": 0.7984, "step": 11286 }, { "epoch": 0.674011704287591, "grad_norm": 3.57908296585083, "learning_rate": 3.622851834649327e-06, "loss": 0.8855, "step": 11287 }, { "epoch": 0.6740714200406067, "grad_norm": 2.3929944038391113, "learning_rate": 3.6221883086722847e-06, "loss": 0.8246, "step": 11288 }, { "epoch": 0.6741311357936224, "grad_norm": 2.2107126712799072, "learning_rate": 3.621524782695243e-06, "loss": 0.8364, "step": 11289 }, { "epoch": 0.674190851546638, "grad_norm": 2.00669002532959, "learning_rate": 3.6208612567182005e-06, "loss": 0.8189, "step": 11290 }, { "epoch": 0.6742505672996536, "grad_norm": 2.047480583190918, "learning_rate": 3.620197730741159e-06, "loss": 0.8516, "step": 11291 }, { "epoch": 0.6743102830526693, "grad_norm": 2.383610725402832, "learning_rate": 3.6195342047641167e-06, "loss": 0.8046, "step": 11292 }, { "epoch": 0.6743699988056849, "grad_norm": 1.702143907546997, "learning_rate": 3.618870678787075e-06, "loss": 0.8633, "step": 11293 }, { "epoch": 0.6744297145587006, "grad_norm": 3.096906900405884, "learning_rate": 3.6182071528100325e-06, "loss": 0.823, "step": 11294 }, { "epoch": 0.6744894303117163, "grad_norm": 3.580705165863037, "learning_rate": 3.6175436268329906e-06, "loss": 0.8561, "step": 11295 }, { "epoch": 0.6745491460647319, "grad_norm": 1.7108186483383179, "learning_rate": 3.616880100855949e-06, "loss": 0.8375, "step": 11296 }, { "epoch": 0.6746088618177475, "grad_norm": 1.8203167915344238, "learning_rate": 3.6162165748789068e-06, "loss": 0.8512, "step": 11297 }, { "epoch": 0.6746685775707632, "grad_norm": 3.2279319763183594, "learning_rate": 3.615553048901865e-06, "loss": 0.8653, "step": 11298 }, { "epoch": 0.6747282933237788, "grad_norm": 1.632843017578125, "learning_rate": 3.6148895229248226e-06, "loss": 0.8343, "step": 11299 }, { "epoch": 0.6747880090767945, "grad_norm": 3.1413419246673584, "learning_rate": 3.6142259969477807e-06, "loss": 0.8852, "step": 11300 }, { "epoch": 0.6748477248298101, "grad_norm": 2.7130260467529297, "learning_rate": 3.613562470970739e-06, "loss": 0.8298, "step": 11301 }, { "epoch": 0.6749074405828257, "grad_norm": 2.150702714920044, "learning_rate": 3.612898944993697e-06, "loss": 0.8558, "step": 11302 }, { "epoch": 0.6749671563358414, "grad_norm": 2.2729713916778564, "learning_rate": 3.612235419016655e-06, "loss": 0.8715, "step": 11303 }, { "epoch": 0.675026872088857, "grad_norm": 2.5585198402404785, "learning_rate": 3.6115718930396126e-06, "loss": 0.8596, "step": 11304 }, { "epoch": 0.6750865878418727, "grad_norm": 1.7167713642120361, "learning_rate": 3.6109083670625707e-06, "loss": 0.8267, "step": 11305 }, { "epoch": 0.6751463035948884, "grad_norm": 2.823798656463623, "learning_rate": 3.610244841085529e-06, "loss": 0.8367, "step": 11306 }, { "epoch": 0.675206019347904, "grad_norm": 2.8375043869018555, "learning_rate": 3.609581315108487e-06, "loss": 0.8228, "step": 11307 }, { "epoch": 0.6752657351009196, "grad_norm": 1.8791539669036865, "learning_rate": 3.6089177891314446e-06, "loss": 0.8381, "step": 11308 }, { "epoch": 0.6753254508539352, "grad_norm": 2.1357803344726562, "learning_rate": 3.6082542631544027e-06, "loss": 0.8061, "step": 11309 }, { "epoch": 0.6753851666069509, "grad_norm": 2.055785655975342, "learning_rate": 3.6075907371773604e-06, "loss": 0.852, "step": 11310 }, { "epoch": 0.6754448823599666, "grad_norm": 2.04215407371521, "learning_rate": 3.606927211200319e-06, "loss": 0.8475, "step": 11311 }, { "epoch": 0.6755045981129822, "grad_norm": 1.8073896169662476, "learning_rate": 3.606263685223277e-06, "loss": 0.8492, "step": 11312 }, { "epoch": 0.6755643138659978, "grad_norm": 1.7008408308029175, "learning_rate": 3.6056001592462347e-06, "loss": 0.8381, "step": 11313 }, { "epoch": 0.6756240296190135, "grad_norm": 2.2127673625946045, "learning_rate": 3.604936633269193e-06, "loss": 0.8337, "step": 11314 }, { "epoch": 0.6756837453720291, "grad_norm": 2.1629858016967773, "learning_rate": 3.6042731072921505e-06, "loss": 0.8261, "step": 11315 }, { "epoch": 0.6757434611250448, "grad_norm": 1.9882524013519287, "learning_rate": 3.603609581315109e-06, "loss": 0.8184, "step": 11316 }, { "epoch": 0.6758031768780605, "grad_norm": 1.8149731159210205, "learning_rate": 3.6029460553380667e-06, "loss": 0.8096, "step": 11317 }, { "epoch": 0.6758628926310761, "grad_norm": 1.7314260005950928, "learning_rate": 3.6022825293610248e-06, "loss": 0.8448, "step": 11318 }, { "epoch": 0.6759226083840917, "grad_norm": 2.089718818664551, "learning_rate": 3.6016190033839824e-06, "loss": 0.8607, "step": 11319 }, { "epoch": 0.6759823241371073, "grad_norm": 3.3332512378692627, "learning_rate": 3.6009554774069405e-06, "loss": 0.8382, "step": 11320 }, { "epoch": 0.676042039890123, "grad_norm": 1.8225692510604858, "learning_rate": 3.600291951429899e-06, "loss": 0.8578, "step": 11321 }, { "epoch": 0.6761017556431387, "grad_norm": 2.0288422107696533, "learning_rate": 3.5996284254528567e-06, "loss": 0.8421, "step": 11322 }, { "epoch": 0.6761614713961543, "grad_norm": 2.750622510910034, "learning_rate": 3.598964899475815e-06, "loss": 0.8317, "step": 11323 }, { "epoch": 0.6762211871491699, "grad_norm": 2.5906565189361572, "learning_rate": 3.5983013734987725e-06, "loss": 0.8543, "step": 11324 }, { "epoch": 0.6762809029021856, "grad_norm": 2.2329487800598145, "learning_rate": 3.5976378475217306e-06, "loss": 0.8347, "step": 11325 }, { "epoch": 0.6763406186552012, "grad_norm": 1.9084936380386353, "learning_rate": 3.596974321544689e-06, "loss": 0.8258, "step": 11326 }, { "epoch": 0.6764003344082169, "grad_norm": 2.114697217941284, "learning_rate": 3.596310795567647e-06, "loss": 0.8423, "step": 11327 }, { "epoch": 0.6764600501612326, "grad_norm": 2.598986864089966, "learning_rate": 3.595647269590605e-06, "loss": 0.8012, "step": 11328 }, { "epoch": 0.6765197659142482, "grad_norm": 2.0216007232666016, "learning_rate": 3.5949837436135626e-06, "loss": 0.8535, "step": 11329 }, { "epoch": 0.6765794816672638, "grad_norm": 1.7113909721374512, "learning_rate": 3.5943202176365207e-06, "loss": 0.8423, "step": 11330 }, { "epoch": 0.6766391974202794, "grad_norm": 2.020803213119507, "learning_rate": 3.593656691659479e-06, "loss": 0.8423, "step": 11331 }, { "epoch": 0.6766989131732951, "grad_norm": 2.644975185394287, "learning_rate": 3.592993165682437e-06, "loss": 0.8884, "step": 11332 }, { "epoch": 0.6767586289263108, "grad_norm": 2.0559475421905518, "learning_rate": 3.5923296397053946e-06, "loss": 0.8387, "step": 11333 }, { "epoch": 0.6768183446793264, "grad_norm": 1.7583400011062622, "learning_rate": 3.5916661137283527e-06, "loss": 0.8475, "step": 11334 }, { "epoch": 0.676878060432342, "grad_norm": 1.9111111164093018, "learning_rate": 3.5910025877513104e-06, "loss": 0.8183, "step": 11335 }, { "epoch": 0.6769377761853577, "grad_norm": 1.8013601303100586, "learning_rate": 3.590339061774269e-06, "loss": 0.8849, "step": 11336 }, { "epoch": 0.6769974919383733, "grad_norm": 2.6640701293945312, "learning_rate": 3.589675535797227e-06, "loss": 0.8669, "step": 11337 }, { "epoch": 0.677057207691389, "grad_norm": 2.0944013595581055, "learning_rate": 3.5890120098201847e-06, "loss": 0.8443, "step": 11338 }, { "epoch": 0.6771169234444047, "grad_norm": 1.9200565814971924, "learning_rate": 3.5883484838431428e-06, "loss": 0.849, "step": 11339 }, { "epoch": 0.6771766391974203, "grad_norm": 1.9202171564102173, "learning_rate": 3.5876849578661004e-06, "loss": 0.8716, "step": 11340 }, { "epoch": 0.6772363549504359, "grad_norm": 2.561528205871582, "learning_rate": 3.587021431889059e-06, "loss": 0.8218, "step": 11341 }, { "epoch": 0.6772960707034515, "grad_norm": 1.8576858043670654, "learning_rate": 3.5863579059120166e-06, "loss": 0.7746, "step": 11342 }, { "epoch": 0.6773557864564672, "grad_norm": 2.77923321723938, "learning_rate": 3.5856943799349747e-06, "loss": 0.8259, "step": 11343 }, { "epoch": 0.6774155022094829, "grad_norm": 2.5528979301452637, "learning_rate": 3.5850308539579324e-06, "loss": 0.8222, "step": 11344 }, { "epoch": 0.6774752179624985, "grad_norm": 1.8974940776824951, "learning_rate": 3.5843673279808905e-06, "loss": 0.8249, "step": 11345 }, { "epoch": 0.6775349337155141, "grad_norm": 1.5301167964935303, "learning_rate": 3.583703802003849e-06, "loss": 0.8617, "step": 11346 }, { "epoch": 0.6775946494685298, "grad_norm": 3.7053534984588623, "learning_rate": 3.5830402760268067e-06, "loss": 0.8658, "step": 11347 }, { "epoch": 0.6776543652215454, "grad_norm": 2.090740919113159, "learning_rate": 3.582376750049765e-06, "loss": 0.8666, "step": 11348 }, { "epoch": 0.6777140809745611, "grad_norm": 1.5613716840744019, "learning_rate": 3.5817132240727225e-06, "loss": 0.8713, "step": 11349 }, { "epoch": 0.6777737967275768, "grad_norm": 2.4816324710845947, "learning_rate": 3.5810496980956806e-06, "loss": 0.9065, "step": 11350 }, { "epoch": 0.6778335124805924, "grad_norm": 3.039036989212036, "learning_rate": 3.580386172118639e-06, "loss": 0.8135, "step": 11351 }, { "epoch": 0.677893228233608, "grad_norm": 1.5110663175582886, "learning_rate": 3.579722646141597e-06, "loss": 0.828, "step": 11352 }, { "epoch": 0.6779529439866236, "grad_norm": 1.9105113744735718, "learning_rate": 3.579059120164555e-06, "loss": 0.86, "step": 11353 }, { "epoch": 0.6780126597396393, "grad_norm": 2.2500717639923096, "learning_rate": 3.5783955941875126e-06, "loss": 0.8657, "step": 11354 }, { "epoch": 0.678072375492655, "grad_norm": 1.7676465511322021, "learning_rate": 3.5777320682104707e-06, "loss": 0.8393, "step": 11355 }, { "epoch": 0.6781320912456706, "grad_norm": 1.80845046043396, "learning_rate": 3.5770685422334288e-06, "loss": 0.8173, "step": 11356 }, { "epoch": 0.6781918069986863, "grad_norm": 2.7645139694213867, "learning_rate": 3.576405016256387e-06, "loss": 0.82, "step": 11357 }, { "epoch": 0.6782515227517019, "grad_norm": 2.728102922439575, "learning_rate": 3.5757414902793446e-06, "loss": 0.8445, "step": 11358 }, { "epoch": 0.6783112385047175, "grad_norm": 2.8044674396514893, "learning_rate": 3.5750779643023027e-06, "loss": 0.8653, "step": 11359 }, { "epoch": 0.6783709542577332, "grad_norm": 1.9664580821990967, "learning_rate": 3.5744144383252603e-06, "loss": 0.8242, "step": 11360 }, { "epoch": 0.6784306700107489, "grad_norm": 2.7309226989746094, "learning_rate": 3.573750912348219e-06, "loss": 0.8426, "step": 11361 }, { "epoch": 0.6784903857637645, "grad_norm": 2.1382434368133545, "learning_rate": 3.573087386371177e-06, "loss": 0.8285, "step": 11362 }, { "epoch": 0.6785501015167801, "grad_norm": 1.5387611389160156, "learning_rate": 3.5724238603941346e-06, "loss": 0.8223, "step": 11363 }, { "epoch": 0.6786098172697957, "grad_norm": 1.9966845512390137, "learning_rate": 3.5717603344170927e-06, "loss": 0.7844, "step": 11364 }, { "epoch": 0.6786695330228114, "grad_norm": 1.6907527446746826, "learning_rate": 3.5710968084400504e-06, "loss": 0.8602, "step": 11365 }, { "epoch": 0.6787292487758271, "grad_norm": 1.774430751800537, "learning_rate": 3.570433282463009e-06, "loss": 0.8283, "step": 11366 }, { "epoch": 0.6787889645288427, "grad_norm": 2.143026828765869, "learning_rate": 3.5697697564859666e-06, "loss": 0.8269, "step": 11367 }, { "epoch": 0.6788486802818584, "grad_norm": 2.378164291381836, "learning_rate": 3.5691062305089247e-06, "loss": 0.8564, "step": 11368 }, { "epoch": 0.678908396034874, "grad_norm": 2.835230588912964, "learning_rate": 3.5684427045318824e-06, "loss": 0.8052, "step": 11369 }, { "epoch": 0.6789681117878896, "grad_norm": 2.5613327026367188, "learning_rate": 3.5677791785548405e-06, "loss": 0.8374, "step": 11370 }, { "epoch": 0.6790278275409053, "grad_norm": 2.814434051513672, "learning_rate": 3.567115652577799e-06, "loss": 0.8327, "step": 11371 }, { "epoch": 0.679087543293921, "grad_norm": 2.197117328643799, "learning_rate": 3.5664521266007567e-06, "loss": 0.8486, "step": 11372 }, { "epoch": 0.6791472590469366, "grad_norm": 1.7517447471618652, "learning_rate": 3.565788600623715e-06, "loss": 0.795, "step": 11373 }, { "epoch": 0.6792069747999522, "grad_norm": 1.9185842275619507, "learning_rate": 3.5651250746466725e-06, "loss": 0.8045, "step": 11374 }, { "epoch": 0.6792666905529678, "grad_norm": 2.1545026302337646, "learning_rate": 3.5644615486696306e-06, "loss": 0.8191, "step": 11375 }, { "epoch": 0.6793264063059835, "grad_norm": 2.274726629257202, "learning_rate": 3.563798022692589e-06, "loss": 0.8248, "step": 11376 }, { "epoch": 0.6793861220589992, "grad_norm": 1.5553791522979736, "learning_rate": 3.5631344967155468e-06, "loss": 0.8291, "step": 11377 }, { "epoch": 0.6794458378120148, "grad_norm": 3.2826578617095947, "learning_rate": 3.562470970738505e-06, "loss": 0.8524, "step": 11378 }, { "epoch": 0.6795055535650305, "grad_norm": 1.6989160776138306, "learning_rate": 3.5618074447614625e-06, "loss": 0.85, "step": 11379 }, { "epoch": 0.6795652693180461, "grad_norm": 2.331829309463501, "learning_rate": 3.5611439187844206e-06, "loss": 0.8459, "step": 11380 }, { "epoch": 0.6796249850710617, "grad_norm": 2.1436660289764404, "learning_rate": 3.5604803928073787e-06, "loss": 0.8283, "step": 11381 }, { "epoch": 0.6796847008240774, "grad_norm": 2.3330931663513184, "learning_rate": 3.559816866830337e-06, "loss": 0.8398, "step": 11382 }, { "epoch": 0.6797444165770931, "grad_norm": 1.6609548330307007, "learning_rate": 3.5591533408532945e-06, "loss": 0.8446, "step": 11383 }, { "epoch": 0.6798041323301087, "grad_norm": 2.1996195316314697, "learning_rate": 3.5584898148762526e-06, "loss": 0.836, "step": 11384 }, { "epoch": 0.6798638480831243, "grad_norm": 1.857980728149414, "learning_rate": 3.5578262888992103e-06, "loss": 0.8036, "step": 11385 }, { "epoch": 0.6799235638361399, "grad_norm": 2.2445614337921143, "learning_rate": 3.557162762922169e-06, "loss": 0.8204, "step": 11386 }, { "epoch": 0.6799832795891556, "grad_norm": 2.5469703674316406, "learning_rate": 3.556499236945127e-06, "loss": 0.8333, "step": 11387 }, { "epoch": 0.6800429953421713, "grad_norm": 2.2196288108825684, "learning_rate": 3.5558357109680846e-06, "loss": 0.8606, "step": 11388 }, { "epoch": 0.6801027110951869, "grad_norm": 7.6112284660339355, "learning_rate": 3.5551721849910427e-06, "loss": 0.872, "step": 11389 }, { "epoch": 0.6801624268482026, "grad_norm": 1.876097321510315, "learning_rate": 3.5545086590140004e-06, "loss": 0.8499, "step": 11390 }, { "epoch": 0.6802221426012182, "grad_norm": 3.917872428894043, "learning_rate": 3.553845133036959e-06, "loss": 0.8527, "step": 11391 }, { "epoch": 0.6802818583542338, "grad_norm": 4.436947345733643, "learning_rate": 3.5531816070599166e-06, "loss": 0.8458, "step": 11392 }, { "epoch": 0.6803415741072495, "grad_norm": 6.752603054046631, "learning_rate": 3.5525180810828747e-06, "loss": 0.8184, "step": 11393 }, { "epoch": 0.6804012898602652, "grad_norm": 1.8730391263961792, "learning_rate": 3.5518545551058324e-06, "loss": 0.8276, "step": 11394 }, { "epoch": 0.6804610056132808, "grad_norm": 2.0033061504364014, "learning_rate": 3.5511910291287905e-06, "loss": 0.8331, "step": 11395 }, { "epoch": 0.6805207213662964, "grad_norm": 2.789588212966919, "learning_rate": 3.550527503151749e-06, "loss": 0.8177, "step": 11396 }, { "epoch": 0.680580437119312, "grad_norm": 2.582425594329834, "learning_rate": 3.5498639771747067e-06, "loss": 0.8325, "step": 11397 }, { "epoch": 0.6806401528723277, "grad_norm": 2.4171907901763916, "learning_rate": 3.5492004511976648e-06, "loss": 0.8517, "step": 11398 }, { "epoch": 0.6806998686253434, "grad_norm": 2.566495656967163, "learning_rate": 3.5485369252206224e-06, "loss": 0.8521, "step": 11399 }, { "epoch": 0.680759584378359, "grad_norm": 1.9066060781478882, "learning_rate": 3.5478733992435805e-06, "loss": 0.8379, "step": 11400 }, { "epoch": 0.680759584378359, "eval_text_loss": 0.8989037871360779, "eval_text_runtime": 15.1927, "eval_text_samples_per_second": 263.284, "eval_text_steps_per_second": 0.527, "step": 11400 }, { "epoch": 0.680759584378359, "eval_image_loss": 0.6083500385284424, "eval_image_runtime": 5.0126, "eval_image_samples_per_second": 797.986, "eval_image_steps_per_second": 1.596, "step": 11400 }, { "epoch": 0.680759584378359, "eval_video_loss": 1.041046142578125, "eval_video_runtime": 77.3005, "eval_video_samples_per_second": 51.746, "eval_video_steps_per_second": 0.103, "step": 11400 }, { "epoch": 0.6808193001313747, "grad_norm": 1.8237483501434326, "learning_rate": 3.547209873266539e-06, "loss": 0.7957, "step": 11401 }, { "epoch": 0.6808790158843903, "grad_norm": 2.8386595249176025, "learning_rate": 3.5465463472894967e-06, "loss": 0.8648, "step": 11402 }, { "epoch": 0.6809387316374059, "grad_norm": 1.7839900255203247, "learning_rate": 3.545882821312455e-06, "loss": 0.7902, "step": 11403 }, { "epoch": 0.6809984473904216, "grad_norm": 2.0094637870788574, "learning_rate": 3.5452192953354125e-06, "loss": 0.7952, "step": 11404 }, { "epoch": 0.6810581631434373, "grad_norm": 2.0842370986938477, "learning_rate": 3.5445557693583706e-06, "loss": 0.8586, "step": 11405 }, { "epoch": 0.6811178788964529, "grad_norm": 2.0794782638549805, "learning_rate": 3.5438922433813287e-06, "loss": 0.8543, "step": 11406 }, { "epoch": 0.6811775946494685, "grad_norm": 2.120617151260376, "learning_rate": 3.543228717404287e-06, "loss": 0.8274, "step": 11407 }, { "epoch": 0.6812373104024841, "grad_norm": 2.9449920654296875, "learning_rate": 3.5425651914272445e-06, "loss": 0.8362, "step": 11408 }, { "epoch": 0.6812970261554998, "grad_norm": 2.5498085021972656, "learning_rate": 3.5419016654502026e-06, "loss": 0.8822, "step": 11409 }, { "epoch": 0.6813567419085155, "grad_norm": 2.0978384017944336, "learning_rate": 3.5412381394731603e-06, "loss": 0.7914, "step": 11410 }, { "epoch": 0.6814164576615311, "grad_norm": 4.803315162658691, "learning_rate": 3.540574613496119e-06, "loss": 0.8284, "step": 11411 }, { "epoch": 0.6814761734145468, "grad_norm": 2.3770992755889893, "learning_rate": 3.539911087519077e-06, "loss": 0.8677, "step": 11412 }, { "epoch": 0.6815358891675624, "grad_norm": 1.9704304933547974, "learning_rate": 3.5392475615420346e-06, "loss": 0.8275, "step": 11413 }, { "epoch": 0.681595604920578, "grad_norm": 2.606947898864746, "learning_rate": 3.5385840355649927e-06, "loss": 0.8581, "step": 11414 }, { "epoch": 0.6816553206735937, "grad_norm": 1.618511438369751, "learning_rate": 3.5379205095879503e-06, "loss": 0.8221, "step": 11415 }, { "epoch": 0.6817150364266094, "grad_norm": 1.863160490989685, "learning_rate": 3.537256983610909e-06, "loss": 0.8381, "step": 11416 }, { "epoch": 0.681774752179625, "grad_norm": 3.193430185317993, "learning_rate": 3.5365934576338666e-06, "loss": 0.8307, "step": 11417 }, { "epoch": 0.6818344679326406, "grad_norm": 1.7238233089447021, "learning_rate": 3.5359299316568247e-06, "loss": 0.8622, "step": 11418 }, { "epoch": 0.6818941836856562, "grad_norm": 3.1853103637695312, "learning_rate": 3.5352664056797823e-06, "loss": 0.8223, "step": 11419 }, { "epoch": 0.6819538994386719, "grad_norm": 3.285806655883789, "learning_rate": 3.5346028797027404e-06, "loss": 0.8253, "step": 11420 }, { "epoch": 0.6820136151916876, "grad_norm": 1.5353456735610962, "learning_rate": 3.533939353725699e-06, "loss": 0.8035, "step": 11421 }, { "epoch": 0.6820733309447032, "grad_norm": 2.786959171295166, "learning_rate": 3.5332758277486566e-06, "loss": 0.8571, "step": 11422 }, { "epoch": 0.6821330466977189, "grad_norm": 1.8529651165008545, "learning_rate": 3.5326123017716147e-06, "loss": 0.7867, "step": 11423 }, { "epoch": 0.6821927624507345, "grad_norm": 2.1191725730895996, "learning_rate": 3.5319487757945724e-06, "loss": 0.8418, "step": 11424 }, { "epoch": 0.6822524782037501, "grad_norm": 3.2685587406158447, "learning_rate": 3.5312852498175305e-06, "loss": 0.8257, "step": 11425 }, { "epoch": 0.6823121939567658, "grad_norm": 2.991593837738037, "learning_rate": 3.530621723840489e-06, "loss": 0.8371, "step": 11426 }, { "epoch": 0.6823719097097815, "grad_norm": 2.765165090560913, "learning_rate": 3.5299581978634467e-06, "loss": 0.8279, "step": 11427 }, { "epoch": 0.6824316254627971, "grad_norm": 1.598623514175415, "learning_rate": 3.529294671886405e-06, "loss": 0.8443, "step": 11428 }, { "epoch": 0.6824913412158128, "grad_norm": 2.1689252853393555, "learning_rate": 3.5286311459093625e-06, "loss": 0.8518, "step": 11429 }, { "epoch": 0.6825510569688283, "grad_norm": 2.199514389038086, "learning_rate": 3.5279676199323206e-06, "loss": 0.8316, "step": 11430 }, { "epoch": 0.682610772721844, "grad_norm": 1.9618009328842163, "learning_rate": 3.5273040939552787e-06, "loss": 0.8404, "step": 11431 }, { "epoch": 0.6826704884748597, "grad_norm": 2.41020131111145, "learning_rate": 3.5266405679782368e-06, "loss": 0.8348, "step": 11432 }, { "epoch": 0.6827302042278753, "grad_norm": 2.1849100589752197, "learning_rate": 3.5259770420011945e-06, "loss": 0.8248, "step": 11433 }, { "epoch": 0.682789919980891, "grad_norm": 1.6312527656555176, "learning_rate": 3.5253135160241526e-06, "loss": 0.8227, "step": 11434 }, { "epoch": 0.6828496357339066, "grad_norm": 2.5365793704986572, "learning_rate": 3.5246499900471102e-06, "loss": 0.8831, "step": 11435 }, { "epoch": 0.6829093514869222, "grad_norm": 2.084789276123047, "learning_rate": 3.5239864640700688e-06, "loss": 0.8191, "step": 11436 }, { "epoch": 0.6829690672399379, "grad_norm": 3.9737789630889893, "learning_rate": 3.523322938093027e-06, "loss": 0.8301, "step": 11437 }, { "epoch": 0.6830287829929536, "grad_norm": 2.951754093170166, "learning_rate": 3.5226594121159845e-06, "loss": 0.8883, "step": 11438 }, { "epoch": 0.6830884987459692, "grad_norm": 2.047732353210449, "learning_rate": 3.5219958861389426e-06, "loss": 0.8208, "step": 11439 }, { "epoch": 0.6831482144989849, "grad_norm": 2.090337038040161, "learning_rate": 3.5213323601619003e-06, "loss": 0.8344, "step": 11440 }, { "epoch": 0.6832079302520004, "grad_norm": 1.6272064447402954, "learning_rate": 3.520668834184859e-06, "loss": 0.8147, "step": 11441 }, { "epoch": 0.6832676460050161, "grad_norm": 2.5908734798431396, "learning_rate": 3.5200053082078165e-06, "loss": 0.8403, "step": 11442 }, { "epoch": 0.6833273617580318, "grad_norm": 2.0889952182769775, "learning_rate": 3.5193417822307746e-06, "loss": 0.7837, "step": 11443 }, { "epoch": 0.6833870775110474, "grad_norm": 2.2386324405670166, "learning_rate": 3.5186782562537323e-06, "loss": 0.8385, "step": 11444 }, { "epoch": 0.6834467932640631, "grad_norm": 1.9159798622131348, "learning_rate": 3.5180147302766904e-06, "loss": 0.8193, "step": 11445 }, { "epoch": 0.6835065090170787, "grad_norm": 2.0368077754974365, "learning_rate": 3.517351204299649e-06, "loss": 0.8474, "step": 11446 }, { "epoch": 0.6835662247700943, "grad_norm": 1.8995410203933716, "learning_rate": 3.5166876783226066e-06, "loss": 0.8429, "step": 11447 }, { "epoch": 0.68362594052311, "grad_norm": 1.9541276693344116, "learning_rate": 3.5160241523455647e-06, "loss": 0.8464, "step": 11448 }, { "epoch": 0.6836856562761257, "grad_norm": 1.9589335918426514, "learning_rate": 3.5153606263685224e-06, "loss": 0.8189, "step": 11449 }, { "epoch": 0.6837453720291413, "grad_norm": 2.6600165367126465, "learning_rate": 3.5146971003914805e-06, "loss": 0.8238, "step": 11450 }, { "epoch": 0.683805087782157, "grad_norm": 2.29481840133667, "learning_rate": 3.514033574414439e-06, "loss": 0.8216, "step": 11451 }, { "epoch": 0.6838648035351725, "grad_norm": 2.1824960708618164, "learning_rate": 3.5133700484373967e-06, "loss": 0.8632, "step": 11452 }, { "epoch": 0.6839245192881882, "grad_norm": 2.449564218521118, "learning_rate": 3.5127065224603548e-06, "loss": 0.8504, "step": 11453 }, { "epoch": 0.6839842350412039, "grad_norm": 1.7042498588562012, "learning_rate": 3.5120429964833125e-06, "loss": 0.8738, "step": 11454 }, { "epoch": 0.6840439507942195, "grad_norm": 2.8787460327148438, "learning_rate": 3.5113794705062706e-06, "loss": 0.8136, "step": 11455 }, { "epoch": 0.6841036665472352, "grad_norm": 2.5388331413269043, "learning_rate": 3.5107159445292287e-06, "loss": 0.8307, "step": 11456 }, { "epoch": 0.6841633823002508, "grad_norm": 1.9268431663513184, "learning_rate": 3.5100524185521868e-06, "loss": 0.8288, "step": 11457 }, { "epoch": 0.6842230980532664, "grad_norm": 2.9609084129333496, "learning_rate": 3.5093888925751444e-06, "loss": 0.8514, "step": 11458 }, { "epoch": 0.6842828138062821, "grad_norm": 3.4991888999938965, "learning_rate": 3.5087253665981025e-06, "loss": 0.8269, "step": 11459 }, { "epoch": 0.6843425295592978, "grad_norm": 1.969591736793518, "learning_rate": 3.5080618406210602e-06, "loss": 0.8351, "step": 11460 }, { "epoch": 0.6844022453123134, "grad_norm": 1.6287111043930054, "learning_rate": 3.5073983146440187e-06, "loss": 0.8443, "step": 11461 }, { "epoch": 0.6844619610653291, "grad_norm": 3.269139051437378, "learning_rate": 3.506734788666977e-06, "loss": 0.8455, "step": 11462 }, { "epoch": 0.6845216768183446, "grad_norm": 2.0592174530029297, "learning_rate": 3.5060712626899345e-06, "loss": 0.8454, "step": 11463 }, { "epoch": 0.6845813925713603, "grad_norm": 2.3401548862457275, "learning_rate": 3.5054077367128926e-06, "loss": 0.8239, "step": 11464 }, { "epoch": 0.684641108324376, "grad_norm": 2.129852771759033, "learning_rate": 3.5047442107358503e-06, "loss": 0.8214, "step": 11465 }, { "epoch": 0.6847008240773916, "grad_norm": 2.1249425411224365, "learning_rate": 3.504080684758809e-06, "loss": 0.7934, "step": 11466 }, { "epoch": 0.6847605398304073, "grad_norm": 1.8698312044143677, "learning_rate": 3.5034171587817665e-06, "loss": 0.7849, "step": 11467 }, { "epoch": 0.6848202555834229, "grad_norm": 2.3682215213775635, "learning_rate": 3.5027536328047246e-06, "loss": 0.835, "step": 11468 }, { "epoch": 0.6848799713364385, "grad_norm": 2.0932624340057373, "learning_rate": 3.5020901068276823e-06, "loss": 0.8456, "step": 11469 }, { "epoch": 0.6849396870894542, "grad_norm": 1.9794647693634033, "learning_rate": 3.5014265808506404e-06, "loss": 0.843, "step": 11470 }, { "epoch": 0.6849994028424699, "grad_norm": 2.362168550491333, "learning_rate": 3.500763054873599e-06, "loss": 0.8325, "step": 11471 }, { "epoch": 0.6850591185954855, "grad_norm": 2.159771680831909, "learning_rate": 3.5000995288965566e-06, "loss": 0.8641, "step": 11472 }, { "epoch": 0.6851188343485012, "grad_norm": 2.5613389015197754, "learning_rate": 3.4994360029195147e-06, "loss": 0.8421, "step": 11473 }, { "epoch": 0.6851785501015167, "grad_norm": 2.8023109436035156, "learning_rate": 3.4987724769424723e-06, "loss": 0.8179, "step": 11474 }, { "epoch": 0.6852382658545324, "grad_norm": 2.167414665222168, "learning_rate": 3.4981089509654304e-06, "loss": 0.8263, "step": 11475 }, { "epoch": 0.6852979816075481, "grad_norm": 2.7780086994171143, "learning_rate": 3.497445424988389e-06, "loss": 0.8516, "step": 11476 }, { "epoch": 0.6853576973605637, "grad_norm": 2.1682536602020264, "learning_rate": 3.4967818990113467e-06, "loss": 0.8299, "step": 11477 }, { "epoch": 0.6854174131135794, "grad_norm": 1.9868252277374268, "learning_rate": 3.4961183730343048e-06, "loss": 0.8075, "step": 11478 }, { "epoch": 0.685477128866595, "grad_norm": 2.5255377292633057, "learning_rate": 3.4954548470572624e-06, "loss": 0.8263, "step": 11479 }, { "epoch": 0.6855368446196106, "grad_norm": 2.103367328643799, "learning_rate": 3.4947913210802205e-06, "loss": 0.8223, "step": 11480 }, { "epoch": 0.6855965603726263, "grad_norm": 3.0732460021972656, "learning_rate": 3.4941277951031786e-06, "loss": 0.8458, "step": 11481 }, { "epoch": 0.685656276125642, "grad_norm": 3.467684507369995, "learning_rate": 3.4934642691261367e-06, "loss": 0.8415, "step": 11482 }, { "epoch": 0.6857159918786576, "grad_norm": 4.855781555175781, "learning_rate": 3.4928007431490944e-06, "loss": 0.8317, "step": 11483 }, { "epoch": 0.6857757076316733, "grad_norm": 1.77099609375, "learning_rate": 3.4921372171720525e-06, "loss": 0.8444, "step": 11484 }, { "epoch": 0.6858354233846888, "grad_norm": 2.711683988571167, "learning_rate": 3.49147369119501e-06, "loss": 0.8147, "step": 11485 }, { "epoch": 0.6858951391377045, "grad_norm": 1.9816116094589233, "learning_rate": 3.4908101652179687e-06, "loss": 0.8039, "step": 11486 }, { "epoch": 0.6859548548907202, "grad_norm": 1.5228420495986938, "learning_rate": 3.490146639240927e-06, "loss": 0.854, "step": 11487 }, { "epoch": 0.6860145706437358, "grad_norm": 2.488670825958252, "learning_rate": 3.4894831132638845e-06, "loss": 0.8139, "step": 11488 }, { "epoch": 0.6860742863967515, "grad_norm": 2.612806797027588, "learning_rate": 3.4888195872868426e-06, "loss": 0.8014, "step": 11489 }, { "epoch": 0.6861340021497672, "grad_norm": 2.4711644649505615, "learning_rate": 3.4881560613098003e-06, "loss": 0.8542, "step": 11490 }, { "epoch": 0.6861937179027827, "grad_norm": 2.9686059951782227, "learning_rate": 3.4874925353327588e-06, "loss": 0.8205, "step": 11491 }, { "epoch": 0.6862534336557984, "grad_norm": 2.382932662963867, "learning_rate": 3.4868290093557165e-06, "loss": 0.8479, "step": 11492 }, { "epoch": 0.686313149408814, "grad_norm": 1.8762069940567017, "learning_rate": 3.4861654833786746e-06, "loss": 0.8204, "step": 11493 }, { "epoch": 0.6863728651618297, "grad_norm": 1.332428216934204, "learning_rate": 3.4855019574016322e-06, "loss": 0.8403, "step": 11494 }, { "epoch": 0.6864325809148454, "grad_norm": 2.4982447624206543, "learning_rate": 3.4848384314245903e-06, "loss": 0.8461, "step": 11495 }, { "epoch": 0.6864922966678609, "grad_norm": 1.8301939964294434, "learning_rate": 3.484174905447549e-06, "loss": 0.8598, "step": 11496 }, { "epoch": 0.6865520124208766, "grad_norm": 2.3722970485687256, "learning_rate": 3.4835113794705065e-06, "loss": 0.838, "step": 11497 }, { "epoch": 0.6866117281738923, "grad_norm": 1.875637173652649, "learning_rate": 3.4828478534934646e-06, "loss": 0.8163, "step": 11498 }, { "epoch": 0.6866714439269079, "grad_norm": 2.79758358001709, "learning_rate": 3.4821843275164223e-06, "loss": 0.8353, "step": 11499 }, { "epoch": 0.6867311596799236, "grad_norm": 3.2584309577941895, "learning_rate": 3.4815208015393804e-06, "loss": 0.8186, "step": 11500 }, { "epoch": 0.6867908754329393, "grad_norm": 2.8706576824188232, "learning_rate": 3.480857275562339e-06, "loss": 0.817, "step": 11501 }, { "epoch": 0.6868505911859548, "grad_norm": 2.068058729171753, "learning_rate": 3.4801937495852966e-06, "loss": 0.8196, "step": 11502 }, { "epoch": 0.6869103069389705, "grad_norm": 1.8515706062316895, "learning_rate": 3.4795302236082547e-06, "loss": 0.8419, "step": 11503 }, { "epoch": 0.6869700226919861, "grad_norm": 1.7632578611373901, "learning_rate": 3.4788666976312124e-06, "loss": 0.8036, "step": 11504 }, { "epoch": 0.6870297384450018, "grad_norm": 2.2873754501342773, "learning_rate": 3.4782031716541705e-06, "loss": 0.8473, "step": 11505 }, { "epoch": 0.6870894541980175, "grad_norm": 2.571437120437622, "learning_rate": 3.4775396456771286e-06, "loss": 0.8224, "step": 11506 }, { "epoch": 0.687149169951033, "grad_norm": 3.8857250213623047, "learning_rate": 3.4768761197000867e-06, "loss": 0.8351, "step": 11507 }, { "epoch": 0.6872088857040487, "grad_norm": 2.893629550933838, "learning_rate": 3.4762125937230444e-06, "loss": 0.8246, "step": 11508 }, { "epoch": 0.6872686014570644, "grad_norm": 4.18152379989624, "learning_rate": 3.4755490677460025e-06, "loss": 0.8253, "step": 11509 }, { "epoch": 0.68732831721008, "grad_norm": 1.760279655456543, "learning_rate": 3.47488554176896e-06, "loss": 0.8136, "step": 11510 }, { "epoch": 0.6873880329630957, "grad_norm": 1.8852070569992065, "learning_rate": 3.4742220157919187e-06, "loss": 0.8544, "step": 11511 }, { "epoch": 0.6874477487161114, "grad_norm": 1.860707402229309, "learning_rate": 3.4735584898148768e-06, "loss": 0.7998, "step": 11512 }, { "epoch": 0.6875074644691269, "grad_norm": 1.9441263675689697, "learning_rate": 3.4728949638378345e-06, "loss": 0.7999, "step": 11513 }, { "epoch": 0.6875671802221426, "grad_norm": 2.9314827919006348, "learning_rate": 3.4722314378607926e-06, "loss": 0.8335, "step": 11514 }, { "epoch": 0.6876268959751582, "grad_norm": 4.9754414558410645, "learning_rate": 3.4715679118837502e-06, "loss": 0.8395, "step": 11515 }, { "epoch": 0.6876866117281739, "grad_norm": 2.2536938190460205, "learning_rate": 3.4709043859067088e-06, "loss": 0.8393, "step": 11516 }, { "epoch": 0.6877463274811896, "grad_norm": 1.76628577709198, "learning_rate": 3.4702408599296664e-06, "loss": 0.8461, "step": 11517 }, { "epoch": 0.6878060432342051, "grad_norm": 2.030266284942627, "learning_rate": 3.4695773339526245e-06, "loss": 0.8484, "step": 11518 }, { "epoch": 0.6878657589872208, "grad_norm": 2.5634162425994873, "learning_rate": 3.468913807975582e-06, "loss": 0.8621, "step": 11519 }, { "epoch": 0.6879254747402365, "grad_norm": 1.8106673955917358, "learning_rate": 3.4682502819985403e-06, "loss": 0.8544, "step": 11520 }, { "epoch": 0.6879851904932521, "grad_norm": 1.7067252397537231, "learning_rate": 3.467586756021499e-06, "loss": 0.7918, "step": 11521 }, { "epoch": 0.6880449062462678, "grad_norm": 1.6393985748291016, "learning_rate": 3.4669232300444565e-06, "loss": 0.8327, "step": 11522 }, { "epoch": 0.6881046219992835, "grad_norm": 1.9452465772628784, "learning_rate": 3.4662597040674146e-06, "loss": 0.8437, "step": 11523 }, { "epoch": 0.688164337752299, "grad_norm": 3.206599712371826, "learning_rate": 3.4655961780903723e-06, "loss": 0.8157, "step": 11524 }, { "epoch": 0.6882240535053147, "grad_norm": 2.1893391609191895, "learning_rate": 3.4649326521133304e-06, "loss": 0.8257, "step": 11525 }, { "epoch": 0.6882837692583303, "grad_norm": 2.5995709896087646, "learning_rate": 3.464269126136289e-06, "loss": 0.8777, "step": 11526 }, { "epoch": 0.688343485011346, "grad_norm": 3.6334891319274902, "learning_rate": 3.4636056001592466e-06, "loss": 0.8416, "step": 11527 }, { "epoch": 0.6884032007643617, "grad_norm": 4.0431647300720215, "learning_rate": 3.4629420741822047e-06, "loss": 0.8346, "step": 11528 }, { "epoch": 0.6884629165173772, "grad_norm": 2.389207601547241, "learning_rate": 3.4622785482051624e-06, "loss": 0.838, "step": 11529 }, { "epoch": 0.6885226322703929, "grad_norm": 3.3842215538024902, "learning_rate": 3.4616150222281205e-06, "loss": 0.8471, "step": 11530 }, { "epoch": 0.6885823480234086, "grad_norm": 2.4358863830566406, "learning_rate": 3.4609514962510786e-06, "loss": 0.8559, "step": 11531 }, { "epoch": 0.6886420637764242, "grad_norm": 1.6714532375335693, "learning_rate": 3.4602879702740367e-06, "loss": 0.8564, "step": 11532 }, { "epoch": 0.6887017795294399, "grad_norm": 1.9438965320587158, "learning_rate": 3.4596244442969943e-06, "loss": 0.8383, "step": 11533 }, { "epoch": 0.6887614952824556, "grad_norm": 1.8609375953674316, "learning_rate": 3.4589609183199524e-06, "loss": 0.8223, "step": 11534 }, { "epoch": 0.6888212110354711, "grad_norm": 4.441301345825195, "learning_rate": 3.45829739234291e-06, "loss": 0.8557, "step": 11535 }, { "epoch": 0.6888809267884868, "grad_norm": 1.774365782737732, "learning_rate": 3.4576338663658686e-06, "loss": 0.8642, "step": 11536 }, { "epoch": 0.6889406425415024, "grad_norm": 3.152479648590088, "learning_rate": 3.4569703403888267e-06, "loss": 0.8554, "step": 11537 }, { "epoch": 0.6890003582945181, "grad_norm": 1.9973641633987427, "learning_rate": 3.4563068144117844e-06, "loss": 0.8495, "step": 11538 }, { "epoch": 0.6890600740475338, "grad_norm": 1.8769598007202148, "learning_rate": 3.4556432884347425e-06, "loss": 0.8073, "step": 11539 }, { "epoch": 0.6891197898005493, "grad_norm": 2.6841230392456055, "learning_rate": 3.4549797624577e-06, "loss": 0.8503, "step": 11540 }, { "epoch": 0.689179505553565, "grad_norm": 4.7852091789245605, "learning_rate": 3.4543162364806587e-06, "loss": 0.8291, "step": 11541 }, { "epoch": 0.6892392213065807, "grad_norm": 2.8361053466796875, "learning_rate": 3.4536527105036164e-06, "loss": 0.8735, "step": 11542 }, { "epoch": 0.6892989370595963, "grad_norm": 2.019035816192627, "learning_rate": 3.4529891845265745e-06, "loss": 0.8219, "step": 11543 }, { "epoch": 0.689358652812612, "grad_norm": 2.00763201713562, "learning_rate": 3.452325658549532e-06, "loss": 0.7914, "step": 11544 }, { "epoch": 0.6894183685656277, "grad_norm": 2.5114850997924805, "learning_rate": 3.4516621325724903e-06, "loss": 0.823, "step": 11545 }, { "epoch": 0.6894780843186432, "grad_norm": 1.6185003519058228, "learning_rate": 3.450998606595449e-06, "loss": 0.8165, "step": 11546 }, { "epoch": 0.6895378000716589, "grad_norm": 1.9742082357406616, "learning_rate": 3.4503350806184065e-06, "loss": 0.7474, "step": 11547 }, { "epoch": 0.6895975158246745, "grad_norm": 2.340890645980835, "learning_rate": 3.4496715546413646e-06, "loss": 0.8287, "step": 11548 }, { "epoch": 0.6896572315776902, "grad_norm": 2.5725841522216797, "learning_rate": 3.4490080286643223e-06, "loss": 0.8125, "step": 11549 }, { "epoch": 0.6897169473307059, "grad_norm": 2.2446508407592773, "learning_rate": 3.4483445026872804e-06, "loss": 0.844, "step": 11550 }, { "epoch": 0.6897766630837214, "grad_norm": 2.63923978805542, "learning_rate": 3.447680976710239e-06, "loss": 0.8345, "step": 11551 }, { "epoch": 0.6898363788367371, "grad_norm": 3.3651039600372314, "learning_rate": 3.4470174507331966e-06, "loss": 0.8286, "step": 11552 }, { "epoch": 0.6898960945897528, "grad_norm": 2.3284878730773926, "learning_rate": 3.4463539247561547e-06, "loss": 0.8564, "step": 11553 }, { "epoch": 0.6899558103427684, "grad_norm": 8.025605201721191, "learning_rate": 3.4456903987791123e-06, "loss": 0.8511, "step": 11554 }, { "epoch": 0.6900155260957841, "grad_norm": 2.131800889968872, "learning_rate": 3.4450268728020704e-06, "loss": 0.8459, "step": 11555 }, { "epoch": 0.6900752418487998, "grad_norm": 2.0866942405700684, "learning_rate": 3.4443633468250285e-06, "loss": 0.8008, "step": 11556 }, { "epoch": 0.6901349576018153, "grad_norm": 2.195557117462158, "learning_rate": 3.4436998208479866e-06, "loss": 0.8441, "step": 11557 }, { "epoch": 0.690194673354831, "grad_norm": 2.909575939178467, "learning_rate": 3.4430362948709443e-06, "loss": 0.8042, "step": 11558 }, { "epoch": 0.6902543891078466, "grad_norm": 2.320291519165039, "learning_rate": 3.4423727688939024e-06, "loss": 0.8393, "step": 11559 }, { "epoch": 0.6903141048608623, "grad_norm": 4.279641628265381, "learning_rate": 3.44170924291686e-06, "loss": 0.8399, "step": 11560 }, { "epoch": 0.690373820613878, "grad_norm": 1.9906206130981445, "learning_rate": 3.4410457169398186e-06, "loss": 0.8092, "step": 11561 }, { "epoch": 0.6904335363668936, "grad_norm": 1.8427964448928833, "learning_rate": 3.4403821909627767e-06, "loss": 0.8523, "step": 11562 }, { "epoch": 0.6904932521199092, "grad_norm": 3.835787057876587, "learning_rate": 3.4397186649857344e-06, "loss": 0.8515, "step": 11563 }, { "epoch": 0.6905529678729249, "grad_norm": 2.9438376426696777, "learning_rate": 3.4390551390086925e-06, "loss": 0.8373, "step": 11564 }, { "epoch": 0.6906126836259405, "grad_norm": 2.5584213733673096, "learning_rate": 3.43839161303165e-06, "loss": 0.8326, "step": 11565 }, { "epoch": 0.6906723993789562, "grad_norm": 1.5170669555664062, "learning_rate": 3.4377280870546087e-06, "loss": 0.7969, "step": 11566 }, { "epoch": 0.6907321151319719, "grad_norm": 2.6256561279296875, "learning_rate": 3.4370645610775664e-06, "loss": 0.8607, "step": 11567 }, { "epoch": 0.6907918308849874, "grad_norm": 4.0344157218933105, "learning_rate": 3.4364010351005245e-06, "loss": 0.8272, "step": 11568 }, { "epoch": 0.6908515466380031, "grad_norm": 2.5376200675964355, "learning_rate": 3.435737509123482e-06, "loss": 0.8137, "step": 11569 }, { "epoch": 0.6909112623910187, "grad_norm": 3.3005831241607666, "learning_rate": 3.4350739831464403e-06, "loss": 0.842, "step": 11570 }, { "epoch": 0.6909709781440344, "grad_norm": 2.160853624343872, "learning_rate": 3.4344104571693988e-06, "loss": 0.8328, "step": 11571 }, { "epoch": 0.6910306938970501, "grad_norm": 2.2327051162719727, "learning_rate": 3.4337469311923565e-06, "loss": 0.8446, "step": 11572 }, { "epoch": 0.6910904096500657, "grad_norm": 2.253676176071167, "learning_rate": 3.4330834052153146e-06, "loss": 0.8269, "step": 11573 }, { "epoch": 0.6911501254030813, "grad_norm": 2.6700713634490967, "learning_rate": 3.4324198792382722e-06, "loss": 0.8301, "step": 11574 }, { "epoch": 0.691209841156097, "grad_norm": 1.9165092706680298, "learning_rate": 3.4317563532612303e-06, "loss": 0.8377, "step": 11575 }, { "epoch": 0.6912695569091126, "grad_norm": 1.8725451231002808, "learning_rate": 3.431092827284189e-06, "loss": 0.8392, "step": 11576 }, { "epoch": 0.6913292726621283, "grad_norm": 2.021359443664551, "learning_rate": 3.4304293013071465e-06, "loss": 0.8246, "step": 11577 }, { "epoch": 0.691388988415144, "grad_norm": 8.566399574279785, "learning_rate": 3.4297657753301046e-06, "loss": 0.8211, "step": 11578 }, { "epoch": 0.6914487041681595, "grad_norm": 3.0193369388580322, "learning_rate": 3.4291022493530623e-06, "loss": 0.8427, "step": 11579 }, { "epoch": 0.6915084199211752, "grad_norm": 1.6984199285507202, "learning_rate": 3.4284387233760204e-06, "loss": 0.88, "step": 11580 }, { "epoch": 0.6915681356741908, "grad_norm": 2.4341418743133545, "learning_rate": 3.4277751973989785e-06, "loss": 0.854, "step": 11581 }, { "epoch": 0.6916278514272065, "grad_norm": 1.7715801000595093, "learning_rate": 3.4271116714219366e-06, "loss": 0.8661, "step": 11582 }, { "epoch": 0.6916875671802222, "grad_norm": 27.768150329589844, "learning_rate": 3.4264481454448943e-06, "loss": 0.8414, "step": 11583 }, { "epoch": 0.6917472829332378, "grad_norm": 4.092485427856445, "learning_rate": 3.4257846194678524e-06, "loss": 0.8691, "step": 11584 }, { "epoch": 0.6918069986862534, "grad_norm": 3.6852657794952393, "learning_rate": 3.42512109349081e-06, "loss": 0.8331, "step": 11585 }, { "epoch": 0.6918667144392691, "grad_norm": 1.6469441652297974, "learning_rate": 3.4244575675137686e-06, "loss": 0.8263, "step": 11586 }, { "epoch": 0.6919264301922847, "grad_norm": 1.5501377582550049, "learning_rate": 3.4237940415367267e-06, "loss": 0.8101, "step": 11587 }, { "epoch": 0.6919861459453004, "grad_norm": 1.7373664379119873, "learning_rate": 3.4231305155596844e-06, "loss": 0.8417, "step": 11588 }, { "epoch": 0.6920458616983161, "grad_norm": 2.246598720550537, "learning_rate": 3.4224669895826425e-06, "loss": 0.8096, "step": 11589 }, { "epoch": 0.6921055774513316, "grad_norm": 7.476629734039307, "learning_rate": 3.4218034636056e-06, "loss": 0.8541, "step": 11590 }, { "epoch": 0.6921652932043473, "grad_norm": 2.0975229740142822, "learning_rate": 3.4211399376285587e-06, "loss": 0.8346, "step": 11591 }, { "epoch": 0.6922250089573629, "grad_norm": 1.7277016639709473, "learning_rate": 3.4204764116515168e-06, "loss": 0.8472, "step": 11592 }, { "epoch": 0.6922847247103786, "grad_norm": 2.063720464706421, "learning_rate": 3.4198128856744744e-06, "loss": 0.812, "step": 11593 }, { "epoch": 0.6923444404633943, "grad_norm": 1.9108573198318481, "learning_rate": 3.419149359697432e-06, "loss": 0.8002, "step": 11594 }, { "epoch": 0.6924041562164099, "grad_norm": 3.386432647705078, "learning_rate": 3.4184858337203902e-06, "loss": 0.8154, "step": 11595 }, { "epoch": 0.6924638719694255, "grad_norm": 2.5679285526275635, "learning_rate": 3.4178223077433487e-06, "loss": 0.7985, "step": 11596 }, { "epoch": 0.6925235877224412, "grad_norm": 3.1727821826934814, "learning_rate": 3.4171587817663064e-06, "loss": 0.7999, "step": 11597 }, { "epoch": 0.6925833034754568, "grad_norm": 4.516024112701416, "learning_rate": 3.4164952557892645e-06, "loss": 0.8648, "step": 11598 }, { "epoch": 0.6926430192284725, "grad_norm": 1.6520332098007202, "learning_rate": 3.415831729812222e-06, "loss": 0.8147, "step": 11599 }, { "epoch": 0.6927027349814882, "grad_norm": 1.684389591217041, "learning_rate": 3.4151682038351803e-06, "loss": 0.808, "step": 11600 }, { "epoch": 0.6927027349814882, "eval_text_loss": 0.8980914354324341, "eval_text_runtime": 15.2346, "eval_text_samples_per_second": 262.56, "eval_text_steps_per_second": 0.525, "step": 11600 }, { "epoch": 0.6927027349814882, "eval_image_loss": 0.6052571535110474, "eval_image_runtime": 4.9698, "eval_image_samples_per_second": 804.855, "eval_image_steps_per_second": 1.61, "step": 11600 }, { "epoch": 0.6927027349814882, "eval_video_loss": 1.0392177104949951, "eval_video_runtime": 76.2762, "eval_video_samples_per_second": 52.441, "eval_video_steps_per_second": 0.105, "step": 11600 }, { "epoch": 0.6927624507345037, "grad_norm": 1.9938186407089233, "learning_rate": 3.414504677858139e-06, "loss": 0.8512, "step": 11601 }, { "epoch": 0.6928221664875194, "grad_norm": 1.5207350254058838, "learning_rate": 3.4138411518810965e-06, "loss": 0.8195, "step": 11602 }, { "epoch": 0.692881882240535, "grad_norm": 2.204638957977295, "learning_rate": 3.4131776259040546e-06, "loss": 0.86, "step": 11603 }, { "epoch": 0.6929415979935507, "grad_norm": 1.7392157316207886, "learning_rate": 3.4125140999270123e-06, "loss": 0.8441, "step": 11604 }, { "epoch": 0.6930013137465664, "grad_norm": 2.050368309020996, "learning_rate": 3.4118505739499704e-06, "loss": 0.8076, "step": 11605 }, { "epoch": 0.693061029499582, "grad_norm": 2.2375476360321045, "learning_rate": 3.4111870479729285e-06, "loss": 0.8639, "step": 11606 }, { "epoch": 0.6931207452525976, "grad_norm": 4.491366386413574, "learning_rate": 3.4105235219958866e-06, "loss": 0.8102, "step": 11607 }, { "epoch": 0.6931804610056133, "grad_norm": 3.4695303440093994, "learning_rate": 3.4098599960188443e-06, "loss": 0.8193, "step": 11608 }, { "epoch": 0.6932401767586289, "grad_norm": 2.256593942642212, "learning_rate": 3.4091964700418024e-06, "loss": 0.8549, "step": 11609 }, { "epoch": 0.6932998925116446, "grad_norm": 2.1124284267425537, "learning_rate": 3.40853294406476e-06, "loss": 0.8607, "step": 11610 }, { "epoch": 0.6933596082646603, "grad_norm": 1.9141048192977905, "learning_rate": 3.4078694180877186e-06, "loss": 0.8353, "step": 11611 }, { "epoch": 0.6934193240176758, "grad_norm": 1.7881077527999878, "learning_rate": 3.4072058921106767e-06, "loss": 0.8113, "step": 11612 }, { "epoch": 0.6934790397706915, "grad_norm": 4.197149753570557, "learning_rate": 3.4065423661336343e-06, "loss": 0.8401, "step": 11613 }, { "epoch": 0.6935387555237071, "grad_norm": 2.7377851009368896, "learning_rate": 3.4058788401565924e-06, "loss": 0.8099, "step": 11614 }, { "epoch": 0.6935984712767228, "grad_norm": 2.0624449253082275, "learning_rate": 3.40521531417955e-06, "loss": 0.8434, "step": 11615 }, { "epoch": 0.6936581870297385, "grad_norm": 2.3014914989471436, "learning_rate": 3.4045517882025086e-06, "loss": 0.847, "step": 11616 }, { "epoch": 0.6937179027827541, "grad_norm": 2.553556442260742, "learning_rate": 3.4038882622254667e-06, "loss": 0.8292, "step": 11617 }, { "epoch": 0.6937776185357697, "grad_norm": 2.558368682861328, "learning_rate": 3.4032247362484244e-06, "loss": 0.8331, "step": 11618 }, { "epoch": 0.6938373342887854, "grad_norm": 1.7694010734558105, "learning_rate": 3.4025612102713825e-06, "loss": 0.8312, "step": 11619 }, { "epoch": 0.693897050041801, "grad_norm": 1.9577020406723022, "learning_rate": 3.40189768429434e-06, "loss": 0.8573, "step": 11620 }, { "epoch": 0.6939567657948167, "grad_norm": 1.7750357389450073, "learning_rate": 3.4012341583172987e-06, "loss": 0.8511, "step": 11621 }, { "epoch": 0.6940164815478324, "grad_norm": 3.417048931121826, "learning_rate": 3.4005706323402564e-06, "loss": 0.8183, "step": 11622 }, { "epoch": 0.694076197300848, "grad_norm": 1.7239018678665161, "learning_rate": 3.3999071063632145e-06, "loss": 0.8456, "step": 11623 }, { "epoch": 0.6941359130538636, "grad_norm": 2.297584056854248, "learning_rate": 3.399243580386172e-06, "loss": 0.822, "step": 11624 }, { "epoch": 0.6941956288068792, "grad_norm": 2.3148391246795654, "learning_rate": 3.3985800544091303e-06, "loss": 0.8124, "step": 11625 }, { "epoch": 0.6942553445598949, "grad_norm": 1.5230931043624878, "learning_rate": 3.397916528432089e-06, "loss": 0.8324, "step": 11626 }, { "epoch": 0.6943150603129106, "grad_norm": 6.04342794418335, "learning_rate": 3.3972530024550465e-06, "loss": 0.8171, "step": 11627 }, { "epoch": 0.6943747760659262, "grad_norm": 1.7967798709869385, "learning_rate": 3.3965894764780046e-06, "loss": 0.8722, "step": 11628 }, { "epoch": 0.6944344918189418, "grad_norm": 2.2822535037994385, "learning_rate": 3.3959259505009623e-06, "loss": 0.8098, "step": 11629 }, { "epoch": 0.6944942075719575, "grad_norm": 2.349348783493042, "learning_rate": 3.3952624245239204e-06, "loss": 0.8096, "step": 11630 }, { "epoch": 0.6945539233249731, "grad_norm": 2.444061756134033, "learning_rate": 3.3945988985468785e-06, "loss": 0.8457, "step": 11631 }, { "epoch": 0.6946136390779888, "grad_norm": 1.8011696338653564, "learning_rate": 3.3939353725698366e-06, "loss": 0.8572, "step": 11632 }, { "epoch": 0.6946733548310045, "grad_norm": 2.4139719009399414, "learning_rate": 3.3932718465927942e-06, "loss": 0.8417, "step": 11633 }, { "epoch": 0.6947330705840201, "grad_norm": 2.595478057861328, "learning_rate": 3.3926083206157523e-06, "loss": 0.8334, "step": 11634 }, { "epoch": 0.6947927863370357, "grad_norm": 1.7243791818618774, "learning_rate": 3.39194479463871e-06, "loss": 0.8253, "step": 11635 }, { "epoch": 0.6948525020900513, "grad_norm": 2.389559268951416, "learning_rate": 3.3912812686616685e-06, "loss": 0.8255, "step": 11636 }, { "epoch": 0.694912217843067, "grad_norm": 1.8437800407409668, "learning_rate": 3.3906177426846266e-06, "loss": 0.8062, "step": 11637 }, { "epoch": 0.6949719335960827, "grad_norm": 2.51804256439209, "learning_rate": 3.3899542167075843e-06, "loss": 0.8597, "step": 11638 }, { "epoch": 0.6950316493490983, "grad_norm": 1.879050612449646, "learning_rate": 3.3892906907305424e-06, "loss": 0.7858, "step": 11639 }, { "epoch": 0.6950913651021139, "grad_norm": 2.576409101486206, "learning_rate": 3.3886271647535e-06, "loss": 0.8351, "step": 11640 }, { "epoch": 0.6951510808551296, "grad_norm": 2.0935800075531006, "learning_rate": 3.3879636387764586e-06, "loss": 0.8328, "step": 11641 }, { "epoch": 0.6952107966081452, "grad_norm": 1.7147750854492188, "learning_rate": 3.3873001127994167e-06, "loss": 0.8315, "step": 11642 }, { "epoch": 0.6952705123611609, "grad_norm": 1.6260842084884644, "learning_rate": 3.3866365868223744e-06, "loss": 0.8319, "step": 11643 }, { "epoch": 0.6953302281141766, "grad_norm": 1.9889031648635864, "learning_rate": 3.3859730608453325e-06, "loss": 0.8208, "step": 11644 }, { "epoch": 0.6953899438671922, "grad_norm": 1.8078583478927612, "learning_rate": 3.38530953486829e-06, "loss": 0.8538, "step": 11645 }, { "epoch": 0.6954496596202078, "grad_norm": 2.529557704925537, "learning_rate": 3.3846460088912487e-06, "loss": 0.8601, "step": 11646 }, { "epoch": 0.6955093753732234, "grad_norm": 2.107954740524292, "learning_rate": 3.3839824829142064e-06, "loss": 0.8397, "step": 11647 }, { "epoch": 0.6955690911262391, "grad_norm": 2.611626625061035, "learning_rate": 3.3833189569371645e-06, "loss": 0.8277, "step": 11648 }, { "epoch": 0.6956288068792548, "grad_norm": 2.7741782665252686, "learning_rate": 3.382655430960122e-06, "loss": 0.8026, "step": 11649 }, { "epoch": 0.6956885226322704, "grad_norm": 1.8793513774871826, "learning_rate": 3.3819919049830802e-06, "loss": 0.8463, "step": 11650 }, { "epoch": 0.695748238385286, "grad_norm": 1.581977128982544, "learning_rate": 3.3813283790060388e-06, "loss": 0.833, "step": 11651 }, { "epoch": 0.6958079541383017, "grad_norm": 1.908708095550537, "learning_rate": 3.3806648530289964e-06, "loss": 0.802, "step": 11652 }, { "epoch": 0.6958676698913173, "grad_norm": 2.726731538772583, "learning_rate": 3.3800013270519545e-06, "loss": 0.8617, "step": 11653 }, { "epoch": 0.695927385644333, "grad_norm": 3.2709412574768066, "learning_rate": 3.3793378010749122e-06, "loss": 0.8383, "step": 11654 }, { "epoch": 0.6959871013973487, "grad_norm": 3.5730857849121094, "learning_rate": 3.3786742750978703e-06, "loss": 0.8421, "step": 11655 }, { "epoch": 0.6960468171503643, "grad_norm": 1.9253156185150146, "learning_rate": 3.3780107491208284e-06, "loss": 0.7853, "step": 11656 }, { "epoch": 0.6961065329033799, "grad_norm": 1.985835313796997, "learning_rate": 3.3773472231437865e-06, "loss": 0.813, "step": 11657 }, { "epoch": 0.6961662486563955, "grad_norm": 1.8453360795974731, "learning_rate": 3.376683697166744e-06, "loss": 0.8177, "step": 11658 }, { "epoch": 0.6962259644094112, "grad_norm": 2.0547988414764404, "learning_rate": 3.3760201711897023e-06, "loss": 0.8436, "step": 11659 }, { "epoch": 0.6962856801624269, "grad_norm": 2.987480640411377, "learning_rate": 3.37535664521266e-06, "loss": 0.8315, "step": 11660 }, { "epoch": 0.6963453959154425, "grad_norm": 1.7085487842559814, "learning_rate": 3.3746931192356185e-06, "loss": 0.8624, "step": 11661 }, { "epoch": 0.6964051116684581, "grad_norm": 1.9694160223007202, "learning_rate": 3.3740295932585766e-06, "loss": 0.851, "step": 11662 }, { "epoch": 0.6964648274214738, "grad_norm": 2.127645969390869, "learning_rate": 3.3733660672815343e-06, "loss": 0.8238, "step": 11663 }, { "epoch": 0.6965245431744894, "grad_norm": 2.072037935256958, "learning_rate": 3.3727025413044924e-06, "loss": 0.7791, "step": 11664 }, { "epoch": 0.6965842589275051, "grad_norm": 3.2143824100494385, "learning_rate": 3.37203901532745e-06, "loss": 0.8584, "step": 11665 }, { "epoch": 0.6966439746805208, "grad_norm": 3.249861001968384, "learning_rate": 3.3713754893504086e-06, "loss": 0.8227, "step": 11666 }, { "epoch": 0.6967036904335364, "grad_norm": 1.978689193725586, "learning_rate": 3.3707119633733667e-06, "loss": 0.8511, "step": 11667 }, { "epoch": 0.696763406186552, "grad_norm": 1.913812279701233, "learning_rate": 3.3700484373963244e-06, "loss": 0.8066, "step": 11668 }, { "epoch": 0.6968231219395676, "grad_norm": 1.7990399599075317, "learning_rate": 3.3693849114192825e-06, "loss": 0.8457, "step": 11669 }, { "epoch": 0.6968828376925833, "grad_norm": 2.549562692642212, "learning_rate": 3.36872138544224e-06, "loss": 0.8618, "step": 11670 }, { "epoch": 0.696942553445599, "grad_norm": 2.3897273540496826, "learning_rate": 3.3680578594651987e-06, "loss": 0.8729, "step": 11671 }, { "epoch": 0.6970022691986146, "grad_norm": 1.7351042032241821, "learning_rate": 3.3673943334881563e-06, "loss": 0.8255, "step": 11672 }, { "epoch": 0.6970619849516302, "grad_norm": 1.6568963527679443, "learning_rate": 3.3667308075111144e-06, "loss": 0.818, "step": 11673 }, { "epoch": 0.6971217007046459, "grad_norm": 1.6248934268951416, "learning_rate": 3.366067281534072e-06, "loss": 0.8339, "step": 11674 }, { "epoch": 0.6971814164576615, "grad_norm": 2.236323356628418, "learning_rate": 3.3654037555570302e-06, "loss": 0.845, "step": 11675 }, { "epoch": 0.6972411322106772, "grad_norm": 2.076451063156128, "learning_rate": 3.3647402295799887e-06, "loss": 0.8273, "step": 11676 }, { "epoch": 0.6973008479636928, "grad_norm": 3.151331901550293, "learning_rate": 3.3640767036029464e-06, "loss": 0.8114, "step": 11677 }, { "epoch": 0.6973605637167085, "grad_norm": 3.98673152923584, "learning_rate": 3.3634131776259045e-06, "loss": 0.7999, "step": 11678 }, { "epoch": 0.6974202794697241, "grad_norm": 2.8057992458343506, "learning_rate": 3.362749651648862e-06, "loss": 0.8305, "step": 11679 }, { "epoch": 0.6974799952227397, "grad_norm": 2.183135747909546, "learning_rate": 3.3620861256718203e-06, "loss": 0.8219, "step": 11680 }, { "epoch": 0.6975397109757554, "grad_norm": 1.90086030960083, "learning_rate": 3.3614225996947784e-06, "loss": 0.854, "step": 11681 }, { "epoch": 0.6975994267287711, "grad_norm": 1.8351001739501953, "learning_rate": 3.3607590737177365e-06, "loss": 0.8199, "step": 11682 }, { "epoch": 0.6976591424817867, "grad_norm": 2.2148847579956055, "learning_rate": 3.360095547740694e-06, "loss": 0.8346, "step": 11683 }, { "epoch": 0.6977188582348023, "grad_norm": 3.251887321472168, "learning_rate": 3.3594320217636523e-06, "loss": 0.7822, "step": 11684 }, { "epoch": 0.697778573987818, "grad_norm": 3.0225932598114014, "learning_rate": 3.35876849578661e-06, "loss": 0.8817, "step": 11685 }, { "epoch": 0.6978382897408336, "grad_norm": 2.655247449874878, "learning_rate": 3.3581049698095685e-06, "loss": 0.7936, "step": 11686 }, { "epoch": 0.6978980054938493, "grad_norm": 1.5927541255950928, "learning_rate": 3.3574414438325266e-06, "loss": 0.8541, "step": 11687 }, { "epoch": 0.697957721246865, "grad_norm": 2.6557881832122803, "learning_rate": 3.3567779178554843e-06, "loss": 0.8345, "step": 11688 }, { "epoch": 0.6980174369998806, "grad_norm": 2.8461947441101074, "learning_rate": 3.3561143918784424e-06, "loss": 0.8356, "step": 11689 }, { "epoch": 0.6980771527528962, "grad_norm": 2.2333717346191406, "learning_rate": 3.3554508659014e-06, "loss": 0.8246, "step": 11690 }, { "epoch": 0.6981368685059118, "grad_norm": 1.9295812845230103, "learning_rate": 3.3547873399243586e-06, "loss": 0.818, "step": 11691 }, { "epoch": 0.6981965842589275, "grad_norm": 2.8551831245422363, "learning_rate": 3.3541238139473167e-06, "loss": 0.8454, "step": 11692 }, { "epoch": 0.6982563000119432, "grad_norm": 6.157182693481445, "learning_rate": 3.3534602879702743e-06, "loss": 0.8407, "step": 11693 }, { "epoch": 0.6983160157649588, "grad_norm": 2.3599414825439453, "learning_rate": 3.3527967619932324e-06, "loss": 0.8488, "step": 11694 }, { "epoch": 0.6983757315179745, "grad_norm": 2.833204746246338, "learning_rate": 3.35213323601619e-06, "loss": 0.8257, "step": 11695 }, { "epoch": 0.69843544727099, "grad_norm": 3.30678653717041, "learning_rate": 3.3514697100391486e-06, "loss": 0.8597, "step": 11696 }, { "epoch": 0.6984951630240057, "grad_norm": 2.221147060394287, "learning_rate": 3.3508061840621063e-06, "loss": 0.8426, "step": 11697 }, { "epoch": 0.6985548787770214, "grad_norm": 1.9555429220199585, "learning_rate": 3.3501426580850644e-06, "loss": 0.8282, "step": 11698 }, { "epoch": 0.698614594530037, "grad_norm": 2.56115460395813, "learning_rate": 3.349479132108022e-06, "loss": 0.8376, "step": 11699 }, { "epoch": 0.6986743102830527, "grad_norm": 1.7402857542037964, "learning_rate": 3.34881560613098e-06, "loss": 0.8328, "step": 11700 }, { "epoch": 0.6987340260360683, "grad_norm": 3.304492235183716, "learning_rate": 3.3481520801539387e-06, "loss": 0.8149, "step": 11701 }, { "epoch": 0.6987937417890839, "grad_norm": 2.3173627853393555, "learning_rate": 3.3474885541768964e-06, "loss": 0.8757, "step": 11702 }, { "epoch": 0.6988534575420996, "grad_norm": 1.6732734441757202, "learning_rate": 3.3468250281998545e-06, "loss": 0.8167, "step": 11703 }, { "epoch": 0.6989131732951153, "grad_norm": 1.7165617942810059, "learning_rate": 3.346161502222812e-06, "loss": 0.8225, "step": 11704 }, { "epoch": 0.6989728890481309, "grad_norm": 2.1136679649353027, "learning_rate": 3.3454979762457703e-06, "loss": 0.8172, "step": 11705 }, { "epoch": 0.6990326048011466, "grad_norm": 2.191530466079712, "learning_rate": 3.3448344502687284e-06, "loss": 0.793, "step": 11706 }, { "epoch": 0.6990923205541621, "grad_norm": 2.1795029640197754, "learning_rate": 3.3441709242916865e-06, "loss": 0.8319, "step": 11707 }, { "epoch": 0.6991520363071778, "grad_norm": 3.400506019592285, "learning_rate": 3.343507398314644e-06, "loss": 0.8332, "step": 11708 }, { "epoch": 0.6992117520601935, "grad_norm": 2.1093907356262207, "learning_rate": 3.3428438723376022e-06, "loss": 0.8237, "step": 11709 }, { "epoch": 0.6992714678132091, "grad_norm": 2.3540120124816895, "learning_rate": 3.34218034636056e-06, "loss": 0.8317, "step": 11710 }, { "epoch": 0.6993311835662248, "grad_norm": 2.6033833026885986, "learning_rate": 3.3415168203835184e-06, "loss": 0.8398, "step": 11711 }, { "epoch": 0.6993908993192404, "grad_norm": 1.6776257753372192, "learning_rate": 3.3408532944064765e-06, "loss": 0.7943, "step": 11712 }, { "epoch": 0.699450615072256, "grad_norm": 3.225595235824585, "learning_rate": 3.3401897684294342e-06, "loss": 0.8117, "step": 11713 }, { "epoch": 0.6995103308252717, "grad_norm": 2.256316661834717, "learning_rate": 3.3395262424523923e-06, "loss": 0.8126, "step": 11714 }, { "epoch": 0.6995700465782874, "grad_norm": 2.7740743160247803, "learning_rate": 3.33886271647535e-06, "loss": 0.8452, "step": 11715 }, { "epoch": 0.699629762331303, "grad_norm": 1.9542264938354492, "learning_rate": 3.3381991904983085e-06, "loss": 0.8421, "step": 11716 }, { "epoch": 0.6996894780843187, "grad_norm": 1.6707743406295776, "learning_rate": 3.3375356645212666e-06, "loss": 0.8635, "step": 11717 }, { "epoch": 0.6997491938373342, "grad_norm": 2.3304529190063477, "learning_rate": 3.3368721385442243e-06, "loss": 0.8615, "step": 11718 }, { "epoch": 0.6998089095903499, "grad_norm": 2.587550401687622, "learning_rate": 3.3362086125671824e-06, "loss": 0.8192, "step": 11719 }, { "epoch": 0.6998686253433656, "grad_norm": 7.247949123382568, "learning_rate": 3.33554508659014e-06, "loss": 0.8476, "step": 11720 }, { "epoch": 0.6999283410963812, "grad_norm": 4.3424201011657715, "learning_rate": 3.3348815606130986e-06, "loss": 0.8683, "step": 11721 }, { "epoch": 0.6999880568493969, "grad_norm": 4.0888471603393555, "learning_rate": 3.3342180346360563e-06, "loss": 0.8567, "step": 11722 }, { "epoch": 0.7000477726024125, "grad_norm": 1.7826576232910156, "learning_rate": 3.3335545086590144e-06, "loss": 0.8202, "step": 11723 }, { "epoch": 0.7001074883554281, "grad_norm": 2.1458749771118164, "learning_rate": 3.332890982681972e-06, "loss": 0.8153, "step": 11724 }, { "epoch": 0.7001672041084438, "grad_norm": 3.4002020359039307, "learning_rate": 3.33222745670493e-06, "loss": 0.8367, "step": 11725 }, { "epoch": 0.7002269198614595, "grad_norm": 9.027114868164062, "learning_rate": 3.3315639307278887e-06, "loss": 0.8204, "step": 11726 }, { "epoch": 0.7002866356144751, "grad_norm": 2.2314674854278564, "learning_rate": 3.3309004047508464e-06, "loss": 0.7993, "step": 11727 }, { "epoch": 0.7003463513674908, "grad_norm": 1.7260634899139404, "learning_rate": 3.3302368787738045e-06, "loss": 0.7853, "step": 11728 }, { "epoch": 0.7004060671205063, "grad_norm": 2.0953330993652344, "learning_rate": 3.329573352796762e-06, "loss": 0.8121, "step": 11729 }, { "epoch": 0.700465782873522, "grad_norm": 3.19260573387146, "learning_rate": 3.3289098268197202e-06, "loss": 0.8186, "step": 11730 }, { "epoch": 0.7005254986265377, "grad_norm": 1.9632813930511475, "learning_rate": 3.3282463008426783e-06, "loss": 0.8067, "step": 11731 }, { "epoch": 0.7005852143795533, "grad_norm": 2.4083445072174072, "learning_rate": 3.3275827748656364e-06, "loss": 0.8193, "step": 11732 }, { "epoch": 0.700644930132569, "grad_norm": 2.050631523132324, "learning_rate": 3.326919248888594e-06, "loss": 0.8215, "step": 11733 }, { "epoch": 0.7007046458855846, "grad_norm": 2.515289545059204, "learning_rate": 3.3262557229115522e-06, "loss": 0.7927, "step": 11734 }, { "epoch": 0.7007643616386002, "grad_norm": 2.3263800144195557, "learning_rate": 3.32559219693451e-06, "loss": 0.829, "step": 11735 }, { "epoch": 0.7008240773916159, "grad_norm": 2.203192949295044, "learning_rate": 3.3249286709574684e-06, "loss": 0.8302, "step": 11736 }, { "epoch": 0.7008837931446316, "grad_norm": 1.7629220485687256, "learning_rate": 3.3242651449804265e-06, "loss": 0.8403, "step": 11737 }, { "epoch": 0.7009435088976472, "grad_norm": 2.5143449306488037, "learning_rate": 3.323601619003384e-06, "loss": 0.8253, "step": 11738 }, { "epoch": 0.7010032246506629, "grad_norm": 3.5978593826293945, "learning_rate": 3.3229380930263423e-06, "loss": 0.8593, "step": 11739 }, { "epoch": 0.7010629404036784, "grad_norm": 3.022719621658325, "learning_rate": 3.3222745670493e-06, "loss": 0.8139, "step": 11740 }, { "epoch": 0.7011226561566941, "grad_norm": 1.8113560676574707, "learning_rate": 3.3216110410722585e-06, "loss": 0.831, "step": 11741 }, { "epoch": 0.7011823719097098, "grad_norm": 3.0532774925231934, "learning_rate": 3.3209475150952166e-06, "loss": 0.8113, "step": 11742 }, { "epoch": 0.7012420876627254, "grad_norm": 1.9688960313796997, "learning_rate": 3.3202839891181743e-06, "loss": 0.8415, "step": 11743 }, { "epoch": 0.7013018034157411, "grad_norm": 2.516594171524048, "learning_rate": 3.3196204631411324e-06, "loss": 0.8087, "step": 11744 }, { "epoch": 0.7013615191687567, "grad_norm": 1.8575624227523804, "learning_rate": 3.31895693716409e-06, "loss": 0.8207, "step": 11745 }, { "epoch": 0.7014212349217723, "grad_norm": 2.622112274169922, "learning_rate": 3.3182934111870486e-06, "loss": 0.8283, "step": 11746 }, { "epoch": 0.701480950674788, "grad_norm": 2.2462079524993896, "learning_rate": 3.3176298852100063e-06, "loss": 0.8232, "step": 11747 }, { "epoch": 0.7015406664278037, "grad_norm": 1.8634651899337769, "learning_rate": 3.3169663592329644e-06, "loss": 0.8822, "step": 11748 }, { "epoch": 0.7016003821808193, "grad_norm": 2.1796023845672607, "learning_rate": 3.316302833255922e-06, "loss": 0.8165, "step": 11749 }, { "epoch": 0.701660097933835, "grad_norm": 1.9595409631729126, "learning_rate": 3.31563930727888e-06, "loss": 0.8361, "step": 11750 }, { "epoch": 0.7017198136868505, "grad_norm": 2.3948652744293213, "learning_rate": 3.3149757813018387e-06, "loss": 0.8272, "step": 11751 }, { "epoch": 0.7017795294398662, "grad_norm": 2.498382568359375, "learning_rate": 3.3143122553247963e-06, "loss": 0.8416, "step": 11752 }, { "epoch": 0.7018392451928819, "grad_norm": 2.8214128017425537, "learning_rate": 3.3136487293477544e-06, "loss": 0.852, "step": 11753 }, { "epoch": 0.7018989609458975, "grad_norm": 1.5541322231292725, "learning_rate": 3.312985203370712e-06, "loss": 0.8159, "step": 11754 }, { "epoch": 0.7019586766989132, "grad_norm": 1.5897046327590942, "learning_rate": 3.31232167739367e-06, "loss": 0.8292, "step": 11755 }, { "epoch": 0.7020183924519289, "grad_norm": 1.6343272924423218, "learning_rate": 3.3116581514166283e-06, "loss": 0.8527, "step": 11756 }, { "epoch": 0.7020781082049444, "grad_norm": 2.10693621635437, "learning_rate": 3.3109946254395864e-06, "loss": 0.8212, "step": 11757 }, { "epoch": 0.7021378239579601, "grad_norm": 2.6853702068328857, "learning_rate": 3.310331099462544e-06, "loss": 0.8028, "step": 11758 }, { "epoch": 0.7021975397109758, "grad_norm": 1.9356878995895386, "learning_rate": 3.309667573485502e-06, "loss": 0.82, "step": 11759 }, { "epoch": 0.7022572554639914, "grad_norm": 2.7803945541381836, "learning_rate": 3.30900404750846e-06, "loss": 0.8366, "step": 11760 }, { "epoch": 0.7023169712170071, "grad_norm": 2.0551366806030273, "learning_rate": 3.3083405215314184e-06, "loss": 0.8659, "step": 11761 }, { "epoch": 0.7023766869700226, "grad_norm": 2.6437666416168213, "learning_rate": 3.3076769955543765e-06, "loss": 0.8536, "step": 11762 }, { "epoch": 0.7024364027230383, "grad_norm": 2.1376192569732666, "learning_rate": 3.307013469577334e-06, "loss": 0.8422, "step": 11763 }, { "epoch": 0.702496118476054, "grad_norm": 1.8831431865692139, "learning_rate": 3.3063499436002923e-06, "loss": 0.8322, "step": 11764 }, { "epoch": 0.7025558342290696, "grad_norm": 3.3021187782287598, "learning_rate": 3.30568641762325e-06, "loss": 0.8025, "step": 11765 }, { "epoch": 0.7026155499820853, "grad_norm": 1.6815955638885498, "learning_rate": 3.3050228916462085e-06, "loss": 0.8431, "step": 11766 }, { "epoch": 0.702675265735101, "grad_norm": 2.019547700881958, "learning_rate": 3.3043593656691666e-06, "loss": 0.8502, "step": 11767 }, { "epoch": 0.7027349814881165, "grad_norm": 3.1339964866638184, "learning_rate": 3.3036958396921242e-06, "loss": 0.8325, "step": 11768 }, { "epoch": 0.7027946972411322, "grad_norm": 1.723122000694275, "learning_rate": 3.3030323137150823e-06, "loss": 0.8438, "step": 11769 }, { "epoch": 0.7028544129941479, "grad_norm": 3.5147972106933594, "learning_rate": 3.30236878773804e-06, "loss": 0.8187, "step": 11770 }, { "epoch": 0.7029141287471635, "grad_norm": 3.1403698921203613, "learning_rate": 3.3017052617609985e-06, "loss": 0.8062, "step": 11771 }, { "epoch": 0.7029738445001792, "grad_norm": 1.8403505086898804, "learning_rate": 3.3010417357839562e-06, "loss": 0.8351, "step": 11772 }, { "epoch": 0.7030335602531947, "grad_norm": 3.043851613998413, "learning_rate": 3.3003782098069143e-06, "loss": 0.8334, "step": 11773 }, { "epoch": 0.7030932760062104, "grad_norm": 1.7783418893814087, "learning_rate": 3.299714683829872e-06, "loss": 0.8035, "step": 11774 }, { "epoch": 0.7031529917592261, "grad_norm": 2.198885440826416, "learning_rate": 3.29905115785283e-06, "loss": 0.8233, "step": 11775 }, { "epoch": 0.7032127075122417, "grad_norm": 2.2994656562805176, "learning_rate": 3.2983876318757886e-06, "loss": 0.847, "step": 11776 }, { "epoch": 0.7032724232652574, "grad_norm": 3.4829723834991455, "learning_rate": 3.2977241058987463e-06, "loss": 0.8554, "step": 11777 }, { "epoch": 0.7033321390182731, "grad_norm": 2.3499255180358887, "learning_rate": 3.2970605799217044e-06, "loss": 0.818, "step": 11778 }, { "epoch": 0.7033918547712886, "grad_norm": 2.117607831954956, "learning_rate": 3.296397053944662e-06, "loss": 0.8274, "step": 11779 }, { "epoch": 0.7034515705243043, "grad_norm": 1.8874738216400146, "learning_rate": 3.29573352796762e-06, "loss": 0.7711, "step": 11780 }, { "epoch": 0.70351128627732, "grad_norm": 2.2953672409057617, "learning_rate": 3.2950700019905783e-06, "loss": 0.86, "step": 11781 }, { "epoch": 0.7035710020303356, "grad_norm": 2.71777081489563, "learning_rate": 3.2944064760135364e-06, "loss": 0.8293, "step": 11782 }, { "epoch": 0.7036307177833513, "grad_norm": 1.8303537368774414, "learning_rate": 3.293742950036494e-06, "loss": 0.8474, "step": 11783 }, { "epoch": 0.7036904335363668, "grad_norm": 2.4784274101257324, "learning_rate": 3.293079424059452e-06, "loss": 0.8064, "step": 11784 }, { "epoch": 0.7037501492893825, "grad_norm": 2.8940110206604004, "learning_rate": 3.29241589808241e-06, "loss": 0.8536, "step": 11785 }, { "epoch": 0.7038098650423982, "grad_norm": 2.571446657180786, "learning_rate": 3.2917523721053684e-06, "loss": 0.8131, "step": 11786 }, { "epoch": 0.7038695807954138, "grad_norm": 2.0406529903411865, "learning_rate": 3.2910888461283265e-06, "loss": 0.8564, "step": 11787 }, { "epoch": 0.7039292965484295, "grad_norm": 2.5087623596191406, "learning_rate": 3.290425320151284e-06, "loss": 0.8302, "step": 11788 }, { "epoch": 0.7039890123014452, "grad_norm": 2.13474178314209, "learning_rate": 3.2897617941742422e-06, "loss": 0.818, "step": 11789 }, { "epoch": 0.7040487280544607, "grad_norm": 2.028257131576538, "learning_rate": 3.2890982681972e-06, "loss": 0.8552, "step": 11790 }, { "epoch": 0.7041084438074764, "grad_norm": 1.6762715578079224, "learning_rate": 3.2884347422201584e-06, "loss": 0.8437, "step": 11791 }, { "epoch": 0.7041681595604921, "grad_norm": 4.037757396697998, "learning_rate": 3.2877712162431165e-06, "loss": 0.8048, "step": 11792 }, { "epoch": 0.7042278753135077, "grad_norm": 1.9864046573638916, "learning_rate": 3.2871076902660742e-06, "loss": 0.8323, "step": 11793 }, { "epoch": 0.7042875910665234, "grad_norm": 2.133066415786743, "learning_rate": 3.2864441642890323e-06, "loss": 0.8046, "step": 11794 }, { "epoch": 0.7043473068195389, "grad_norm": 2.1152188777923584, "learning_rate": 3.28578063831199e-06, "loss": 0.816, "step": 11795 }, { "epoch": 0.7044070225725546, "grad_norm": 2.1724960803985596, "learning_rate": 3.2851171123349485e-06, "loss": 0.8484, "step": 11796 }, { "epoch": 0.7044667383255703, "grad_norm": 2.281687021255493, "learning_rate": 3.284453586357906e-06, "loss": 0.8129, "step": 11797 }, { "epoch": 0.7045264540785859, "grad_norm": 2.0876858234405518, "learning_rate": 3.2837900603808643e-06, "loss": 0.8348, "step": 11798 }, { "epoch": 0.7045861698316016, "grad_norm": 2.1432225704193115, "learning_rate": 3.283126534403822e-06, "loss": 0.8699, "step": 11799 }, { "epoch": 0.7046458855846173, "grad_norm": 2.9711639881134033, "learning_rate": 3.28246300842678e-06, "loss": 0.8188, "step": 11800 }, { "epoch": 0.7046458855846173, "eval_text_loss": 0.8980076909065247, "eval_text_runtime": 15.1874, "eval_text_samples_per_second": 263.377, "eval_text_steps_per_second": 0.527, "step": 11800 }, { "epoch": 0.7046458855846173, "eval_image_loss": 0.6038068532943726, "eval_image_runtime": 5.0185, "eval_image_samples_per_second": 797.049, "eval_image_steps_per_second": 1.594, "step": 11800 }, { "epoch": 0.7046458855846173, "eval_video_loss": 1.0365641117095947, "eval_video_runtime": 76.5718, "eval_video_samples_per_second": 52.239, "eval_video_steps_per_second": 0.104, "step": 11800 }, { "epoch": 0.7047056013376328, "grad_norm": 2.0251262187957764, "learning_rate": 3.2817994824497386e-06, "loss": 0.8438, "step": 11801 }, { "epoch": 0.7047653170906485, "grad_norm": 3.827110767364502, "learning_rate": 3.2811359564726963e-06, "loss": 0.863, "step": 11802 }, { "epoch": 0.7048250328436642, "grad_norm": 1.8644875288009644, "learning_rate": 3.2804724304956544e-06, "loss": 0.8188, "step": 11803 }, { "epoch": 0.7048847485966798, "grad_norm": 2.8794407844543457, "learning_rate": 3.279808904518612e-06, "loss": 0.8521, "step": 11804 }, { "epoch": 0.7049444643496955, "grad_norm": 2.075077772140503, "learning_rate": 3.27914537854157e-06, "loss": 0.8592, "step": 11805 }, { "epoch": 0.705004180102711, "grad_norm": 1.7515687942504883, "learning_rate": 3.2784818525645282e-06, "loss": 0.8074, "step": 11806 }, { "epoch": 0.7050638958557267, "grad_norm": 4.556320667266846, "learning_rate": 3.2778183265874864e-06, "loss": 0.8418, "step": 11807 }, { "epoch": 0.7051236116087424, "grad_norm": 2.458021879196167, "learning_rate": 3.277154800610444e-06, "loss": 0.8035, "step": 11808 }, { "epoch": 0.705183327361758, "grad_norm": 1.98654043674469, "learning_rate": 3.276491274633402e-06, "loss": 0.8335, "step": 11809 }, { "epoch": 0.7052430431147737, "grad_norm": 1.6535849571228027, "learning_rate": 3.27582774865636e-06, "loss": 0.8204, "step": 11810 }, { "epoch": 0.7053027588677894, "grad_norm": 2.561274528503418, "learning_rate": 3.2751642226793183e-06, "loss": 0.8463, "step": 11811 }, { "epoch": 0.7053624746208049, "grad_norm": 2.0907130241394043, "learning_rate": 3.2745006967022764e-06, "loss": 0.8848, "step": 11812 }, { "epoch": 0.7054221903738206, "grad_norm": 2.6200456619262695, "learning_rate": 3.273837170725234e-06, "loss": 0.8222, "step": 11813 }, { "epoch": 0.7054819061268363, "grad_norm": 2.344852924346924, "learning_rate": 3.273173644748192e-06, "loss": 0.8378, "step": 11814 }, { "epoch": 0.7055416218798519, "grad_norm": 1.8240984678268433, "learning_rate": 3.27251011877115e-06, "loss": 0.7986, "step": 11815 }, { "epoch": 0.7056013376328676, "grad_norm": 2.5238518714904785, "learning_rate": 3.2718465927941084e-06, "loss": 0.8085, "step": 11816 }, { "epoch": 0.7056610533858831, "grad_norm": 2.770829439163208, "learning_rate": 3.2711830668170665e-06, "loss": 0.8439, "step": 11817 }, { "epoch": 0.7057207691388988, "grad_norm": 2.0911056995391846, "learning_rate": 3.270519540840024e-06, "loss": 0.8257, "step": 11818 }, { "epoch": 0.7057804848919145, "grad_norm": 5.103206634521484, "learning_rate": 3.2698560148629823e-06, "loss": 0.8413, "step": 11819 }, { "epoch": 0.7058402006449301, "grad_norm": 3.2031054496765137, "learning_rate": 3.26919248888594e-06, "loss": 0.8342, "step": 11820 }, { "epoch": 0.7058999163979458, "grad_norm": 3.2409188747406006, "learning_rate": 3.268528962908898e-06, "loss": 0.8809, "step": 11821 }, { "epoch": 0.7059596321509615, "grad_norm": 4.195801258087158, "learning_rate": 3.267865436931856e-06, "loss": 0.8199, "step": 11822 }, { "epoch": 0.706019347903977, "grad_norm": 2.8067104816436768, "learning_rate": 3.2672019109548143e-06, "loss": 0.7908, "step": 11823 }, { "epoch": 0.7060790636569927, "grad_norm": 2.10105299949646, "learning_rate": 3.266538384977772e-06, "loss": 0.8548, "step": 11824 }, { "epoch": 0.7061387794100084, "grad_norm": 1.8491179943084717, "learning_rate": 3.26587485900073e-06, "loss": 0.8175, "step": 11825 }, { "epoch": 0.706198495163024, "grad_norm": 3.2812857627868652, "learning_rate": 3.2652113330236877e-06, "loss": 0.8373, "step": 11826 }, { "epoch": 0.7062582109160397, "grad_norm": 4.566958427429199, "learning_rate": 3.2645478070466462e-06, "loss": 0.8578, "step": 11827 }, { "epoch": 0.7063179266690554, "grad_norm": 2.369563341140747, "learning_rate": 3.2638842810696043e-06, "loss": 0.7965, "step": 11828 }, { "epoch": 0.7063776424220709, "grad_norm": 1.9461572170257568, "learning_rate": 3.263220755092562e-06, "loss": 0.8464, "step": 11829 }, { "epoch": 0.7064373581750866, "grad_norm": 2.0594637393951416, "learning_rate": 3.26255722911552e-06, "loss": 0.8137, "step": 11830 }, { "epoch": 0.7064970739281022, "grad_norm": 1.9479749202728271, "learning_rate": 3.261893703138478e-06, "loss": 0.8083, "step": 11831 }, { "epoch": 0.7065567896811179, "grad_norm": 3.0783119201660156, "learning_rate": 3.2612301771614363e-06, "loss": 0.8342, "step": 11832 }, { "epoch": 0.7066165054341336, "grad_norm": 1.9015283584594727, "learning_rate": 3.260566651184394e-06, "loss": 0.8363, "step": 11833 }, { "epoch": 0.7066762211871491, "grad_norm": 2.167450428009033, "learning_rate": 3.259903125207352e-06, "loss": 0.8181, "step": 11834 }, { "epoch": 0.7067359369401648, "grad_norm": 1.946533203125, "learning_rate": 3.2592395992303098e-06, "loss": 0.8514, "step": 11835 }, { "epoch": 0.7067956526931805, "grad_norm": 2.6251771450042725, "learning_rate": 3.258576073253268e-06, "loss": 0.8337, "step": 11836 }, { "epoch": 0.7068553684461961, "grad_norm": 2.8012566566467285, "learning_rate": 3.2579125472762264e-06, "loss": 0.8503, "step": 11837 }, { "epoch": 0.7069150841992118, "grad_norm": 2.3394243717193604, "learning_rate": 3.257249021299184e-06, "loss": 0.8644, "step": 11838 }, { "epoch": 0.7069747999522275, "grad_norm": 2.0455338954925537, "learning_rate": 3.256585495322142e-06, "loss": 0.8335, "step": 11839 }, { "epoch": 0.707034515705243, "grad_norm": 2.2949178218841553, "learning_rate": 3.2559219693451e-06, "loss": 0.8415, "step": 11840 }, { "epoch": 0.7070942314582587, "grad_norm": 2.5194902420043945, "learning_rate": 3.255258443368058e-06, "loss": 0.8314, "step": 11841 }, { "epoch": 0.7071539472112743, "grad_norm": 1.8233757019042969, "learning_rate": 3.2545949173910165e-06, "loss": 0.8371, "step": 11842 }, { "epoch": 0.70721366296429, "grad_norm": 2.2142786979675293, "learning_rate": 3.253931391413974e-06, "loss": 0.839, "step": 11843 }, { "epoch": 0.7072733787173057, "grad_norm": 1.4749828577041626, "learning_rate": 3.2532678654369323e-06, "loss": 0.8077, "step": 11844 }, { "epoch": 0.7073330944703212, "grad_norm": 2.9497649669647217, "learning_rate": 3.25260433945989e-06, "loss": 0.8436, "step": 11845 }, { "epoch": 0.7073928102233369, "grad_norm": 2.7106122970581055, "learning_rate": 3.251940813482848e-06, "loss": 0.8449, "step": 11846 }, { "epoch": 0.7074525259763526, "grad_norm": 1.6226414442062378, "learning_rate": 3.251277287505806e-06, "loss": 0.8172, "step": 11847 }, { "epoch": 0.7075122417293682, "grad_norm": 2.850783586502075, "learning_rate": 3.2506137615287642e-06, "loss": 0.829, "step": 11848 }, { "epoch": 0.7075719574823839, "grad_norm": 2.0880789756774902, "learning_rate": 3.249950235551722e-06, "loss": 0.8358, "step": 11849 }, { "epoch": 0.7076316732353995, "grad_norm": 2.2216720581054688, "learning_rate": 3.24928670957468e-06, "loss": 0.8579, "step": 11850 }, { "epoch": 0.7076913889884151, "grad_norm": 4.30049467086792, "learning_rate": 3.2486231835976377e-06, "loss": 0.839, "step": 11851 }, { "epoch": 0.7077511047414308, "grad_norm": 1.6837612390518188, "learning_rate": 3.2479596576205962e-06, "loss": 0.8283, "step": 11852 }, { "epoch": 0.7078108204944464, "grad_norm": 2.822807550430298, "learning_rate": 3.2472961316435543e-06, "loss": 0.8503, "step": 11853 }, { "epoch": 0.7078705362474621, "grad_norm": 2.896986484527588, "learning_rate": 3.246632605666512e-06, "loss": 0.8303, "step": 11854 }, { "epoch": 0.7079302520004778, "grad_norm": 3.5591580867767334, "learning_rate": 3.24596907968947e-06, "loss": 0.8097, "step": 11855 }, { "epoch": 0.7079899677534933, "grad_norm": 2.2945423126220703, "learning_rate": 3.2453055537124278e-06, "loss": 0.8226, "step": 11856 }, { "epoch": 0.708049683506509, "grad_norm": 2.6672089099884033, "learning_rate": 3.2446420277353863e-06, "loss": 0.8419, "step": 11857 }, { "epoch": 0.7081093992595247, "grad_norm": 3.3669919967651367, "learning_rate": 3.243978501758344e-06, "loss": 0.8313, "step": 11858 }, { "epoch": 0.7081691150125403, "grad_norm": 2.150135040283203, "learning_rate": 3.243314975781302e-06, "loss": 0.8304, "step": 11859 }, { "epoch": 0.708228830765556, "grad_norm": 2.221019744873047, "learning_rate": 3.2426514498042597e-06, "loss": 0.8059, "step": 11860 }, { "epoch": 0.7082885465185716, "grad_norm": 2.0165586471557617, "learning_rate": 3.241987923827218e-06, "loss": 0.822, "step": 11861 }, { "epoch": 0.7083482622715872, "grad_norm": 2.6658785343170166, "learning_rate": 3.2413243978501764e-06, "loss": 0.81, "step": 11862 }, { "epoch": 0.7084079780246029, "grad_norm": 5.273646354675293, "learning_rate": 3.240660871873134e-06, "loss": 0.8399, "step": 11863 }, { "epoch": 0.7084676937776185, "grad_norm": 1.9072723388671875, "learning_rate": 3.239997345896092e-06, "loss": 0.8283, "step": 11864 }, { "epoch": 0.7085274095306342, "grad_norm": 2.276052474975586, "learning_rate": 3.23933381991905e-06, "loss": 0.8447, "step": 11865 }, { "epoch": 0.7085871252836499, "grad_norm": 3.8993914127349854, "learning_rate": 3.238670293942008e-06, "loss": 0.832, "step": 11866 }, { "epoch": 0.7086468410366654, "grad_norm": 2.3721821308135986, "learning_rate": 3.2380067679649664e-06, "loss": 0.8424, "step": 11867 }, { "epoch": 0.7087065567896811, "grad_norm": 2.33505916595459, "learning_rate": 3.237343241987924e-06, "loss": 0.8447, "step": 11868 }, { "epoch": 0.7087662725426968, "grad_norm": 2.6431398391723633, "learning_rate": 3.2366797160108822e-06, "loss": 0.8126, "step": 11869 }, { "epoch": 0.7088259882957124, "grad_norm": 2.5584917068481445, "learning_rate": 3.23601619003384e-06, "loss": 0.8359, "step": 11870 }, { "epoch": 0.7088857040487281, "grad_norm": 3.24125599861145, "learning_rate": 3.235352664056798e-06, "loss": 0.872, "step": 11871 }, { "epoch": 0.7089454198017437, "grad_norm": 2.71496319770813, "learning_rate": 3.234689138079756e-06, "loss": 0.8247, "step": 11872 }, { "epoch": 0.7090051355547593, "grad_norm": 2.5118935108184814, "learning_rate": 3.234025612102714e-06, "loss": 0.8327, "step": 11873 }, { "epoch": 0.709064851307775, "grad_norm": 2.632396697998047, "learning_rate": 3.233362086125672e-06, "loss": 0.8105, "step": 11874 }, { "epoch": 0.7091245670607906, "grad_norm": 1.7206627130508423, "learning_rate": 3.23269856014863e-06, "loss": 0.8199, "step": 11875 }, { "epoch": 0.7091842828138063, "grad_norm": 2.9262678623199463, "learning_rate": 3.2320350341715877e-06, "loss": 0.8425, "step": 11876 }, { "epoch": 0.709243998566822, "grad_norm": 11.523343086242676, "learning_rate": 3.231371508194546e-06, "loss": 0.8333, "step": 11877 }, { "epoch": 0.7093037143198375, "grad_norm": 1.8142796754837036, "learning_rate": 3.2307079822175043e-06, "loss": 0.8509, "step": 11878 }, { "epoch": 0.7093634300728532, "grad_norm": 1.8715680837631226, "learning_rate": 3.230044456240462e-06, "loss": 0.8161, "step": 11879 }, { "epoch": 0.7094231458258689, "grad_norm": 2.3243160247802734, "learning_rate": 3.22938093026342e-06, "loss": 0.8213, "step": 11880 }, { "epoch": 0.7094828615788845, "grad_norm": 2.723024606704712, "learning_rate": 3.2287174042863777e-06, "loss": 0.8431, "step": 11881 }, { "epoch": 0.7095425773319002, "grad_norm": 1.7484639883041382, "learning_rate": 3.2280538783093363e-06, "loss": 0.8069, "step": 11882 }, { "epoch": 0.7096022930849158, "grad_norm": 1.8369076251983643, "learning_rate": 3.227390352332294e-06, "loss": 0.8498, "step": 11883 }, { "epoch": 0.7096620088379314, "grad_norm": 2.1260998249053955, "learning_rate": 3.226726826355252e-06, "loss": 0.8331, "step": 11884 }, { "epoch": 0.7097217245909471, "grad_norm": 1.7864130735397339, "learning_rate": 3.2260633003782097e-06, "loss": 0.8035, "step": 11885 }, { "epoch": 0.7097814403439627, "grad_norm": 2.6731503009796143, "learning_rate": 3.225399774401168e-06, "loss": 0.7837, "step": 11886 }, { "epoch": 0.7098411560969784, "grad_norm": 1.8213883638381958, "learning_rate": 3.2247362484241263e-06, "loss": 0.8434, "step": 11887 }, { "epoch": 0.7099008718499941, "grad_norm": 1.9104965925216675, "learning_rate": 3.224072722447084e-06, "loss": 0.8404, "step": 11888 }, { "epoch": 0.7099605876030096, "grad_norm": 2.0094425678253174, "learning_rate": 3.223409196470042e-06, "loss": 0.8119, "step": 11889 }, { "epoch": 0.7100203033560253, "grad_norm": 1.7213294506072998, "learning_rate": 3.222745670493e-06, "loss": 0.8377, "step": 11890 }, { "epoch": 0.710080019109041, "grad_norm": 2.849961519241333, "learning_rate": 3.222082144515958e-06, "loss": 0.8864, "step": 11891 }, { "epoch": 0.7101397348620566, "grad_norm": 2.271207094192505, "learning_rate": 3.2214186185389164e-06, "loss": 0.8252, "step": 11892 }, { "epoch": 0.7101994506150723, "grad_norm": 2.251671075820923, "learning_rate": 3.220755092561874e-06, "loss": 0.8238, "step": 11893 }, { "epoch": 0.710259166368088, "grad_norm": 2.016981601715088, "learning_rate": 3.220091566584832e-06, "loss": 0.859, "step": 11894 }, { "epoch": 0.7103188821211035, "grad_norm": 2.6119704246520996, "learning_rate": 3.21942804060779e-06, "loss": 0.854, "step": 11895 }, { "epoch": 0.7103785978741192, "grad_norm": 3.067678928375244, "learning_rate": 3.218764514630748e-06, "loss": 0.8249, "step": 11896 }, { "epoch": 0.7104383136271348, "grad_norm": 2.2702581882476807, "learning_rate": 3.218100988653706e-06, "loss": 0.7933, "step": 11897 }, { "epoch": 0.7104980293801505, "grad_norm": 1.6853537559509277, "learning_rate": 3.217437462676664e-06, "loss": 0.8299, "step": 11898 }, { "epoch": 0.7105577451331662, "grad_norm": 4.2752556800842285, "learning_rate": 3.216773936699622e-06, "loss": 0.8451, "step": 11899 }, { "epoch": 0.7106174608861818, "grad_norm": 5.643845081329346, "learning_rate": 3.21611041072258e-06, "loss": 0.8413, "step": 11900 }, { "epoch": 0.7106771766391974, "grad_norm": 2.810330629348755, "learning_rate": 3.2154468847455376e-06, "loss": 0.8365, "step": 11901 }, { "epoch": 0.710736892392213, "grad_norm": 2.7866287231445312, "learning_rate": 3.214783358768496e-06, "loss": 0.8198, "step": 11902 }, { "epoch": 0.7107966081452287, "grad_norm": 4.895188331604004, "learning_rate": 3.2141198327914543e-06, "loss": 0.8275, "step": 11903 }, { "epoch": 0.7108563238982444, "grad_norm": 1.7848643064498901, "learning_rate": 3.213456306814412e-06, "loss": 0.8348, "step": 11904 }, { "epoch": 0.71091603965126, "grad_norm": 3.341937780380249, "learning_rate": 3.21279278083737e-06, "loss": 0.8527, "step": 11905 }, { "epoch": 0.7109757554042756, "grad_norm": 4.530251502990723, "learning_rate": 3.2121292548603277e-06, "loss": 0.8185, "step": 11906 }, { "epoch": 0.7110354711572913, "grad_norm": 2.408707618713379, "learning_rate": 3.2114657288832862e-06, "loss": 0.8497, "step": 11907 }, { "epoch": 0.7110951869103069, "grad_norm": 2.1825127601623535, "learning_rate": 3.210802202906244e-06, "loss": 0.8336, "step": 11908 }, { "epoch": 0.7111549026633226, "grad_norm": 2.059081554412842, "learning_rate": 3.210138676929202e-06, "loss": 0.8432, "step": 11909 }, { "epoch": 0.7112146184163383, "grad_norm": 1.7586331367492676, "learning_rate": 3.2094751509521597e-06, "loss": 0.8429, "step": 11910 }, { "epoch": 0.7112743341693539, "grad_norm": 1.6702407598495483, "learning_rate": 3.208811624975118e-06, "loss": 0.8255, "step": 11911 }, { "epoch": 0.7113340499223695, "grad_norm": 1.8616636991500854, "learning_rate": 3.2081480989980763e-06, "loss": 0.8207, "step": 11912 }, { "epoch": 0.7113937656753851, "grad_norm": 1.6548713445663452, "learning_rate": 3.207484573021034e-06, "loss": 0.7942, "step": 11913 }, { "epoch": 0.7114534814284008, "grad_norm": 4.439494609832764, "learning_rate": 3.206821047043992e-06, "loss": 0.8275, "step": 11914 }, { "epoch": 0.7115131971814165, "grad_norm": 3.4832911491394043, "learning_rate": 3.2061575210669498e-06, "loss": 0.8608, "step": 11915 }, { "epoch": 0.7115729129344321, "grad_norm": 3.0367581844329834, "learning_rate": 3.205493995089908e-06, "loss": 0.8646, "step": 11916 }, { "epoch": 0.7116326286874477, "grad_norm": 1.9235138893127441, "learning_rate": 3.2048304691128664e-06, "loss": 0.8479, "step": 11917 }, { "epoch": 0.7116923444404634, "grad_norm": 2.591311454772949, "learning_rate": 3.204166943135824e-06, "loss": 0.818, "step": 11918 }, { "epoch": 0.711752060193479, "grad_norm": 2.0660290718078613, "learning_rate": 3.203503417158782e-06, "loss": 0.806, "step": 11919 }, { "epoch": 0.7118117759464947, "grad_norm": 3.4946959018707275, "learning_rate": 3.20283989118174e-06, "loss": 0.8786, "step": 11920 }, { "epoch": 0.7118714916995104, "grad_norm": 2.2078421115875244, "learning_rate": 3.202176365204698e-06, "loss": 0.8288, "step": 11921 }, { "epoch": 0.711931207452526, "grad_norm": 2.0216336250305176, "learning_rate": 3.201512839227656e-06, "loss": 0.8477, "step": 11922 }, { "epoch": 0.7119909232055416, "grad_norm": 2.4926810264587402, "learning_rate": 3.200849313250614e-06, "loss": 0.7807, "step": 11923 }, { "epoch": 0.7120506389585572, "grad_norm": 2.6854631900787354, "learning_rate": 3.200185787273572e-06, "loss": 0.8541, "step": 11924 }, { "epoch": 0.7121103547115729, "grad_norm": 2.015768527984619, "learning_rate": 3.19952226129653e-06, "loss": 0.8666, "step": 11925 }, { "epoch": 0.7121700704645886, "grad_norm": 2.2865419387817383, "learning_rate": 3.1988587353194876e-06, "loss": 0.8592, "step": 11926 }, { "epoch": 0.7122297862176042, "grad_norm": 2.3178939819335938, "learning_rate": 3.198195209342446e-06, "loss": 0.8345, "step": 11927 }, { "epoch": 0.7122895019706198, "grad_norm": 2.273977518081665, "learning_rate": 3.1975316833654042e-06, "loss": 0.8277, "step": 11928 }, { "epoch": 0.7123492177236355, "grad_norm": 2.907698154449463, "learning_rate": 3.196868157388362e-06, "loss": 0.8533, "step": 11929 }, { "epoch": 0.7124089334766511, "grad_norm": 4.282543659210205, "learning_rate": 3.19620463141132e-06, "loss": 0.8509, "step": 11930 }, { "epoch": 0.7124686492296668, "grad_norm": 5.533401966094971, "learning_rate": 3.1955411054342777e-06, "loss": 0.8694, "step": 11931 }, { "epoch": 0.7125283649826825, "grad_norm": 1.597447395324707, "learning_rate": 3.194877579457236e-06, "loss": 0.8299, "step": 11932 }, { "epoch": 0.7125880807356981, "grad_norm": 5.524341106414795, "learning_rate": 3.194214053480194e-06, "loss": 0.8457, "step": 11933 }, { "epoch": 0.7126477964887137, "grad_norm": 2.0855202674865723, "learning_rate": 3.193550527503152e-06, "loss": 0.8092, "step": 11934 }, { "epoch": 0.7127075122417293, "grad_norm": 2.0637388229370117, "learning_rate": 3.1928870015261097e-06, "loss": 0.8346, "step": 11935 }, { "epoch": 0.712767227994745, "grad_norm": 1.8441721200942993, "learning_rate": 3.1922234755490678e-06, "loss": 0.8145, "step": 11936 }, { "epoch": 0.7128269437477607, "grad_norm": 3.440199375152588, "learning_rate": 3.1915599495720263e-06, "loss": 0.8516, "step": 11937 }, { "epoch": 0.7128866595007763, "grad_norm": 2.138909339904785, "learning_rate": 3.190896423594984e-06, "loss": 0.8069, "step": 11938 }, { "epoch": 0.7129463752537919, "grad_norm": 2.2841508388519287, "learning_rate": 3.190232897617942e-06, "loss": 0.7892, "step": 11939 }, { "epoch": 0.7130060910068076, "grad_norm": 2.1798453330993652, "learning_rate": 3.1895693716408997e-06, "loss": 0.801, "step": 11940 }, { "epoch": 0.7130658067598232, "grad_norm": 1.5826839208602905, "learning_rate": 3.188905845663858e-06, "loss": 0.7981, "step": 11941 }, { "epoch": 0.7131255225128389, "grad_norm": 2.2571020126342773, "learning_rate": 3.1882423196868164e-06, "loss": 0.7941, "step": 11942 }, { "epoch": 0.7131852382658546, "grad_norm": 2.561980724334717, "learning_rate": 3.187578793709774e-06, "loss": 0.8354, "step": 11943 }, { "epoch": 0.7132449540188702, "grad_norm": 2.7410950660705566, "learning_rate": 3.186915267732732e-06, "loss": 0.8359, "step": 11944 }, { "epoch": 0.7133046697718858, "grad_norm": 2.6077640056610107, "learning_rate": 3.18625174175569e-06, "loss": 0.8155, "step": 11945 }, { "epoch": 0.7133643855249014, "grad_norm": 2.0350427627563477, "learning_rate": 3.185588215778648e-06, "loss": 0.8295, "step": 11946 }, { "epoch": 0.7134241012779171, "grad_norm": 1.9909007549285889, "learning_rate": 3.184924689801606e-06, "loss": 0.8527, "step": 11947 }, { "epoch": 0.7134838170309328, "grad_norm": 2.4037911891937256, "learning_rate": 3.184261163824564e-06, "loss": 0.8697, "step": 11948 }, { "epoch": 0.7135435327839484, "grad_norm": 2.068222999572754, "learning_rate": 3.183597637847522e-06, "loss": 0.8465, "step": 11949 }, { "epoch": 0.713603248536964, "grad_norm": 2.2254927158355713, "learning_rate": 3.18293411187048e-06, "loss": 0.8029, "step": 11950 }, { "epoch": 0.7136629642899797, "grad_norm": 2.475433111190796, "learning_rate": 3.1822705858934376e-06, "loss": 0.8111, "step": 11951 }, { "epoch": 0.7137226800429953, "grad_norm": 2.1018779277801514, "learning_rate": 3.181607059916396e-06, "loss": 0.8003, "step": 11952 }, { "epoch": 0.713782395796011, "grad_norm": 1.7823352813720703, "learning_rate": 3.180943533939354e-06, "loss": 0.8288, "step": 11953 }, { "epoch": 0.7138421115490267, "grad_norm": 3.813058853149414, "learning_rate": 3.180280007962312e-06, "loss": 0.854, "step": 11954 }, { "epoch": 0.7139018273020423, "grad_norm": 1.894434928894043, "learning_rate": 3.17961648198527e-06, "loss": 0.8182, "step": 11955 }, { "epoch": 0.7139615430550579, "grad_norm": 2.13051700592041, "learning_rate": 3.1789529560082277e-06, "loss": 0.8283, "step": 11956 }, { "epoch": 0.7140212588080735, "grad_norm": 2.6311511993408203, "learning_rate": 3.178289430031186e-06, "loss": 0.8219, "step": 11957 }, { "epoch": 0.7140809745610892, "grad_norm": 2.0044071674346924, "learning_rate": 3.177625904054144e-06, "loss": 0.8794, "step": 11958 }, { "epoch": 0.7141406903141049, "grad_norm": 2.6820597648620605, "learning_rate": 3.176962378077102e-06, "loss": 0.8474, "step": 11959 }, { "epoch": 0.7142004060671205, "grad_norm": 1.912790060043335, "learning_rate": 3.1762988521000596e-06, "loss": 0.8091, "step": 11960 }, { "epoch": 0.7142601218201362, "grad_norm": 3.583989143371582, "learning_rate": 3.1756353261230177e-06, "loss": 0.8272, "step": 11961 }, { "epoch": 0.7143198375731518, "grad_norm": 2.497387647628784, "learning_rate": 3.1749718001459763e-06, "loss": 0.8221, "step": 11962 }, { "epoch": 0.7143795533261674, "grad_norm": 2.8831377029418945, "learning_rate": 3.174308274168934e-06, "loss": 0.8519, "step": 11963 }, { "epoch": 0.7144392690791831, "grad_norm": 2.229811906814575, "learning_rate": 3.173644748191892e-06, "loss": 0.832, "step": 11964 }, { "epoch": 0.7144989848321988, "grad_norm": 2.754443883895874, "learning_rate": 3.1729812222148497e-06, "loss": 0.8295, "step": 11965 }, { "epoch": 0.7145587005852144, "grad_norm": 1.8620681762695312, "learning_rate": 3.172317696237808e-06, "loss": 0.8085, "step": 11966 }, { "epoch": 0.71461841633823, "grad_norm": 2.760606527328491, "learning_rate": 3.1716541702607663e-06, "loss": 0.841, "step": 11967 }, { "epoch": 0.7146781320912456, "grad_norm": 4.893248558044434, "learning_rate": 3.170990644283724e-06, "loss": 0.8469, "step": 11968 }, { "epoch": 0.7147378478442613, "grad_norm": 2.8175816535949707, "learning_rate": 3.170327118306682e-06, "loss": 0.8225, "step": 11969 }, { "epoch": 0.714797563597277, "grad_norm": 2.2428858280181885, "learning_rate": 3.1696635923296398e-06, "loss": 0.8351, "step": 11970 }, { "epoch": 0.7148572793502926, "grad_norm": 1.9050205945968628, "learning_rate": 3.169000066352598e-06, "loss": 0.8725, "step": 11971 }, { "epoch": 0.7149169951033083, "grad_norm": 2.9201767444610596, "learning_rate": 3.168336540375556e-06, "loss": 0.8349, "step": 11972 }, { "epoch": 0.7149767108563239, "grad_norm": 1.8616656064987183, "learning_rate": 3.167673014398514e-06, "loss": 0.829, "step": 11973 }, { "epoch": 0.7150364266093395, "grad_norm": 3.6377532482147217, "learning_rate": 3.1670094884214718e-06, "loss": 0.8297, "step": 11974 }, { "epoch": 0.7150961423623552, "grad_norm": 2.187091827392578, "learning_rate": 3.16634596244443e-06, "loss": 0.8351, "step": 11975 }, { "epoch": 0.7151558581153709, "grad_norm": 1.8187910318374634, "learning_rate": 3.1656824364673875e-06, "loss": 0.8241, "step": 11976 }, { "epoch": 0.7152155738683865, "grad_norm": 2.360792398452759, "learning_rate": 3.165018910490346e-06, "loss": 0.8359, "step": 11977 }, { "epoch": 0.7152752896214021, "grad_norm": 1.8812315464019775, "learning_rate": 3.164355384513304e-06, "loss": 0.7758, "step": 11978 }, { "epoch": 0.7153350053744177, "grad_norm": 3.1223201751708984, "learning_rate": 3.163691858536262e-06, "loss": 0.8416, "step": 11979 }, { "epoch": 0.7153947211274334, "grad_norm": 1.827671766281128, "learning_rate": 3.16302833255922e-06, "loss": 0.8363, "step": 11980 }, { "epoch": 0.7154544368804491, "grad_norm": 2.429569959640503, "learning_rate": 3.1623648065821776e-06, "loss": 0.8576, "step": 11981 }, { "epoch": 0.7155141526334647, "grad_norm": 2.558295726776123, "learning_rate": 3.161701280605136e-06, "loss": 0.8263, "step": 11982 }, { "epoch": 0.7155738683864804, "grad_norm": 1.785526990890503, "learning_rate": 3.1610377546280942e-06, "loss": 0.8571, "step": 11983 }, { "epoch": 0.715633584139496, "grad_norm": 3.367682695388794, "learning_rate": 3.160374228651052e-06, "loss": 0.8393, "step": 11984 }, { "epoch": 0.7156932998925116, "grad_norm": 1.9737731218338013, "learning_rate": 3.1597107026740096e-06, "loss": 0.8099, "step": 11985 }, { "epoch": 0.7157530156455273, "grad_norm": 1.615198016166687, "learning_rate": 3.1590471766969677e-06, "loss": 0.8268, "step": 11986 }, { "epoch": 0.715812731398543, "grad_norm": 2.312328338623047, "learning_rate": 3.1583836507199262e-06, "loss": 0.86, "step": 11987 }, { "epoch": 0.7158724471515586, "grad_norm": 2.269176721572876, "learning_rate": 3.157720124742884e-06, "loss": 0.8212, "step": 11988 }, { "epoch": 0.7159321629045742, "grad_norm": 2.09615421295166, "learning_rate": 3.157056598765842e-06, "loss": 0.8576, "step": 11989 }, { "epoch": 0.7159918786575898, "grad_norm": 2.1762146949768066, "learning_rate": 3.1563930727887997e-06, "loss": 0.8238, "step": 11990 }, { "epoch": 0.7160515944106055, "grad_norm": 1.9300835132598877, "learning_rate": 3.1557295468117578e-06, "loss": 0.8532, "step": 11991 }, { "epoch": 0.7161113101636212, "grad_norm": 2.6022629737854004, "learning_rate": 3.1550660208347163e-06, "loss": 0.828, "step": 11992 }, { "epoch": 0.7161710259166368, "grad_norm": 2.386645793914795, "learning_rate": 3.154402494857674e-06, "loss": 0.8063, "step": 11993 }, { "epoch": 0.7162307416696525, "grad_norm": 2.0041697025299072, "learning_rate": 3.153738968880632e-06, "loss": 0.8284, "step": 11994 }, { "epoch": 0.7162904574226681, "grad_norm": 3.4726474285125732, "learning_rate": 3.1530754429035898e-06, "loss": 0.8348, "step": 11995 }, { "epoch": 0.7163501731756837, "grad_norm": 2.4503984451293945, "learning_rate": 3.152411916926548e-06, "loss": 0.8509, "step": 11996 }, { "epoch": 0.7164098889286994, "grad_norm": 2.341967821121216, "learning_rate": 3.151748390949506e-06, "loss": 0.8239, "step": 11997 }, { "epoch": 0.716469604681715, "grad_norm": 2.210405111312866, "learning_rate": 3.151084864972464e-06, "loss": 0.8144, "step": 11998 }, { "epoch": 0.7165293204347307, "grad_norm": 2.0354247093200684, "learning_rate": 3.1504213389954217e-06, "loss": 0.8606, "step": 11999 }, { "epoch": 0.7165890361877463, "grad_norm": 2.914146661758423, "learning_rate": 3.14975781301838e-06, "loss": 0.8497, "step": 12000 }, { "epoch": 0.7165890361877463, "eval_text_loss": 0.896327555179596, "eval_text_runtime": 15.2376, "eval_text_samples_per_second": 262.508, "eval_text_steps_per_second": 0.525, "step": 12000 }, { "epoch": 0.7165890361877463, "eval_image_loss": 0.6020808219909668, "eval_image_runtime": 4.9969, "eval_image_samples_per_second": 800.496, "eval_image_steps_per_second": 1.601, "step": 12000 }, { "epoch": 0.7165890361877463, "eval_video_loss": 1.035422682762146, "eval_video_runtime": 76.4826, "eval_video_samples_per_second": 52.299, "eval_video_steps_per_second": 0.105, "step": 12000 }, { "epoch": 0.7166487519407619, "grad_norm": 3.331970453262329, "learning_rate": 3.1490942870413375e-06, "loss": 0.7917, "step": 12001 }, { "epoch": 0.7167084676937776, "grad_norm": 2.1580095291137695, "learning_rate": 3.148430761064296e-06, "loss": 0.8096, "step": 12002 }, { "epoch": 0.7167681834467933, "grad_norm": 2.1039373874664307, "learning_rate": 3.147767235087254e-06, "loss": 0.8319, "step": 12003 }, { "epoch": 0.7168278991998089, "grad_norm": 2.4816458225250244, "learning_rate": 3.147103709110212e-06, "loss": 0.7916, "step": 12004 }, { "epoch": 0.7168876149528246, "grad_norm": 1.9460638761520386, "learning_rate": 3.14644018313317e-06, "loss": 0.8456, "step": 12005 }, { "epoch": 0.7169473307058402, "grad_norm": 2.1033453941345215, "learning_rate": 3.1457766571561276e-06, "loss": 0.8385, "step": 12006 }, { "epoch": 0.7170070464588558, "grad_norm": 1.7876254320144653, "learning_rate": 3.145113131179086e-06, "loss": 0.8385, "step": 12007 }, { "epoch": 0.7170667622118715, "grad_norm": 1.981685996055603, "learning_rate": 3.1444496052020442e-06, "loss": 0.8314, "step": 12008 }, { "epoch": 0.7171264779648872, "grad_norm": 2.5766396522521973, "learning_rate": 3.143786079225002e-06, "loss": 0.862, "step": 12009 }, { "epoch": 0.7171861937179028, "grad_norm": 2.726742744445801, "learning_rate": 3.14312255324796e-06, "loss": 0.8253, "step": 12010 }, { "epoch": 0.7172459094709184, "grad_norm": 3.0408706665039062, "learning_rate": 3.1424590272709177e-06, "loss": 0.85, "step": 12011 }, { "epoch": 0.717305625223934, "grad_norm": 3.0129776000976562, "learning_rate": 3.141795501293876e-06, "loss": 0.8397, "step": 12012 }, { "epoch": 0.7173653409769497, "grad_norm": 2.2606775760650635, "learning_rate": 3.141131975316834e-06, "loss": 0.8302, "step": 12013 }, { "epoch": 0.7174250567299654, "grad_norm": 2.5869901180267334, "learning_rate": 3.140468449339792e-06, "loss": 0.8538, "step": 12014 }, { "epoch": 0.717484772482981, "grad_norm": 2.102461814880371, "learning_rate": 3.1398049233627497e-06, "loss": 0.8406, "step": 12015 }, { "epoch": 0.7175444882359967, "grad_norm": 3.48260235786438, "learning_rate": 3.1391413973857078e-06, "loss": 0.827, "step": 12016 }, { "epoch": 0.7176042039890123, "grad_norm": 3.422268867492676, "learning_rate": 3.1384778714086663e-06, "loss": 0.8123, "step": 12017 }, { "epoch": 0.7176639197420279, "grad_norm": 1.8873870372772217, "learning_rate": 3.137814345431624e-06, "loss": 0.7945, "step": 12018 }, { "epoch": 0.7177236354950436, "grad_norm": 1.8487404584884644, "learning_rate": 3.137150819454582e-06, "loss": 0.8064, "step": 12019 }, { "epoch": 0.7177833512480593, "grad_norm": 3.196824073791504, "learning_rate": 3.1364872934775397e-06, "loss": 0.8236, "step": 12020 }, { "epoch": 0.7178430670010749, "grad_norm": 3.536128044128418, "learning_rate": 3.135823767500498e-06, "loss": 0.8476, "step": 12021 }, { "epoch": 0.7179027827540905, "grad_norm": 2.1196413040161133, "learning_rate": 3.135160241523456e-06, "loss": 0.8611, "step": 12022 }, { "epoch": 0.7179624985071061, "grad_norm": 1.7022184133529663, "learning_rate": 3.134496715546414e-06, "loss": 0.8615, "step": 12023 }, { "epoch": 0.7180222142601218, "grad_norm": 1.9068232774734497, "learning_rate": 3.1338331895693717e-06, "loss": 0.8109, "step": 12024 }, { "epoch": 0.7180819300131375, "grad_norm": 3.1375796794891357, "learning_rate": 3.13316966359233e-06, "loss": 0.7963, "step": 12025 }, { "epoch": 0.7181416457661531, "grad_norm": 2.3418807983398438, "learning_rate": 3.1325061376152875e-06, "loss": 0.8241, "step": 12026 }, { "epoch": 0.7182013615191688, "grad_norm": 1.9948382377624512, "learning_rate": 3.131842611638246e-06, "loss": 0.8194, "step": 12027 }, { "epoch": 0.7182610772721844, "grad_norm": 1.9982460737228394, "learning_rate": 3.131179085661204e-06, "loss": 0.8313, "step": 12028 }, { "epoch": 0.7183207930252, "grad_norm": 2.752375602722168, "learning_rate": 3.1305155596841618e-06, "loss": 0.8275, "step": 12029 }, { "epoch": 0.7183805087782157, "grad_norm": 2.761370897293091, "learning_rate": 3.12985203370712e-06, "loss": 0.836, "step": 12030 }, { "epoch": 0.7184402245312314, "grad_norm": 4.710614204406738, "learning_rate": 3.1291885077300776e-06, "loss": 0.8787, "step": 12031 }, { "epoch": 0.718499940284247, "grad_norm": 3.1432712078094482, "learning_rate": 3.128524981753036e-06, "loss": 0.8582, "step": 12032 }, { "epoch": 0.7185596560372627, "grad_norm": 2.064351797103882, "learning_rate": 3.127861455775994e-06, "loss": 0.8416, "step": 12033 }, { "epoch": 0.7186193717902782, "grad_norm": 2.064082384109497, "learning_rate": 3.127197929798952e-06, "loss": 0.8318, "step": 12034 }, { "epoch": 0.7186790875432939, "grad_norm": 1.6579177379608154, "learning_rate": 3.12653440382191e-06, "loss": 0.8132, "step": 12035 }, { "epoch": 0.7187388032963096, "grad_norm": 2.543783664703369, "learning_rate": 3.1258708778448676e-06, "loss": 0.8163, "step": 12036 }, { "epoch": 0.7187985190493252, "grad_norm": 2.058941125869751, "learning_rate": 3.125207351867826e-06, "loss": 0.9036, "step": 12037 }, { "epoch": 0.7188582348023409, "grad_norm": 2.0805294513702393, "learning_rate": 3.124543825890784e-06, "loss": 0.8038, "step": 12038 }, { "epoch": 0.7189179505553565, "grad_norm": 2.1767873764038086, "learning_rate": 3.123880299913742e-06, "loss": 0.8041, "step": 12039 }, { "epoch": 0.7189776663083721, "grad_norm": 2.6004796028137207, "learning_rate": 3.1232167739366996e-06, "loss": 0.8307, "step": 12040 }, { "epoch": 0.7190373820613878, "grad_norm": 1.684277892112732, "learning_rate": 3.1225532479596577e-06, "loss": 0.7778, "step": 12041 }, { "epoch": 0.7190970978144035, "grad_norm": 2.244520425796509, "learning_rate": 3.1218897219826162e-06, "loss": 0.8352, "step": 12042 }, { "epoch": 0.7191568135674191, "grad_norm": 1.9103096723556519, "learning_rate": 3.121226196005574e-06, "loss": 0.8202, "step": 12043 }, { "epoch": 0.7192165293204348, "grad_norm": 2.3721823692321777, "learning_rate": 3.120562670028532e-06, "loss": 0.8009, "step": 12044 }, { "epoch": 0.7192762450734503, "grad_norm": 1.7468593120574951, "learning_rate": 3.1198991440514897e-06, "loss": 0.8206, "step": 12045 }, { "epoch": 0.719335960826466, "grad_norm": 2.406754493713379, "learning_rate": 3.119235618074448e-06, "loss": 0.8433, "step": 12046 }, { "epoch": 0.7193956765794817, "grad_norm": 2.968346118927002, "learning_rate": 3.118572092097406e-06, "loss": 0.8459, "step": 12047 }, { "epoch": 0.7194553923324973, "grad_norm": 3.702404499053955, "learning_rate": 3.117908566120364e-06, "loss": 0.8083, "step": 12048 }, { "epoch": 0.719515108085513, "grad_norm": 1.965713620185852, "learning_rate": 3.1172450401433217e-06, "loss": 0.8226, "step": 12049 }, { "epoch": 0.7195748238385286, "grad_norm": 3.9505746364593506, "learning_rate": 3.1165815141662798e-06, "loss": 0.8434, "step": 12050 }, { "epoch": 0.7196345395915442, "grad_norm": 1.8459023237228394, "learning_rate": 3.1159179881892375e-06, "loss": 0.8473, "step": 12051 }, { "epoch": 0.7196942553445599, "grad_norm": 1.678920030593872, "learning_rate": 3.115254462212196e-06, "loss": 0.8293, "step": 12052 }, { "epoch": 0.7197539710975756, "grad_norm": 2.189222574234009, "learning_rate": 3.114590936235154e-06, "loss": 0.8296, "step": 12053 }, { "epoch": 0.7198136868505912, "grad_norm": 1.612674593925476, "learning_rate": 3.1139274102581118e-06, "loss": 0.8444, "step": 12054 }, { "epoch": 0.7198734026036069, "grad_norm": 2.4278345108032227, "learning_rate": 3.11326388428107e-06, "loss": 0.8378, "step": 12055 }, { "epoch": 0.7199331183566224, "grad_norm": 1.9421513080596924, "learning_rate": 3.1126003583040275e-06, "loss": 0.7831, "step": 12056 }, { "epoch": 0.7199928341096381, "grad_norm": 1.9126214981079102, "learning_rate": 3.111936832326986e-06, "loss": 0.8192, "step": 12057 }, { "epoch": 0.7200525498626538, "grad_norm": 2.390521287918091, "learning_rate": 3.111273306349944e-06, "loss": 0.8192, "step": 12058 }, { "epoch": 0.7201122656156694, "grad_norm": 3.175400733947754, "learning_rate": 3.110609780372902e-06, "loss": 0.8149, "step": 12059 }, { "epoch": 0.7201719813686851, "grad_norm": 1.572357416152954, "learning_rate": 3.10994625439586e-06, "loss": 0.8414, "step": 12060 }, { "epoch": 0.7202316971217007, "grad_norm": 2.3612141609191895, "learning_rate": 3.1092827284188176e-06, "loss": 0.8077, "step": 12061 }, { "epoch": 0.7202914128747163, "grad_norm": 7.080209255218506, "learning_rate": 3.108619202441776e-06, "loss": 0.8749, "step": 12062 }, { "epoch": 0.720351128627732, "grad_norm": 1.699331521987915, "learning_rate": 3.107955676464734e-06, "loss": 0.8625, "step": 12063 }, { "epoch": 0.7204108443807477, "grad_norm": 2.636357307434082, "learning_rate": 3.107292150487692e-06, "loss": 0.8426, "step": 12064 }, { "epoch": 0.7204705601337633, "grad_norm": 2.20404314994812, "learning_rate": 3.1066286245106496e-06, "loss": 0.8588, "step": 12065 }, { "epoch": 0.720530275886779, "grad_norm": 2.341681957244873, "learning_rate": 3.1059650985336077e-06, "loss": 0.8027, "step": 12066 }, { "epoch": 0.7205899916397945, "grad_norm": 2.810035467147827, "learning_rate": 3.1053015725565662e-06, "loss": 0.8151, "step": 12067 }, { "epoch": 0.7206497073928102, "grad_norm": 2.1568806171417236, "learning_rate": 3.104638046579524e-06, "loss": 0.8034, "step": 12068 }, { "epoch": 0.7207094231458259, "grad_norm": 2.1745541095733643, "learning_rate": 3.103974520602482e-06, "loss": 0.7968, "step": 12069 }, { "epoch": 0.7207691388988415, "grad_norm": 2.2376489639282227, "learning_rate": 3.1033109946254397e-06, "loss": 0.8101, "step": 12070 }, { "epoch": 0.7208288546518572, "grad_norm": 3.0919692516326904, "learning_rate": 3.1026474686483978e-06, "loss": 0.8559, "step": 12071 }, { "epoch": 0.7208885704048728, "grad_norm": 2.726576089859009, "learning_rate": 3.101983942671356e-06, "loss": 0.7858, "step": 12072 }, { "epoch": 0.7209482861578884, "grad_norm": 2.439284086227417, "learning_rate": 3.101320416694314e-06, "loss": 0.82, "step": 12073 }, { "epoch": 0.7210080019109041, "grad_norm": 4.006473064422607, "learning_rate": 3.1006568907172716e-06, "loss": 0.8326, "step": 12074 }, { "epoch": 0.7210677176639197, "grad_norm": 3.639206886291504, "learning_rate": 3.0999933647402297e-06, "loss": 0.847, "step": 12075 }, { "epoch": 0.7211274334169354, "grad_norm": 2.7089645862579346, "learning_rate": 3.0993298387631874e-06, "loss": 0.8551, "step": 12076 }, { "epoch": 0.7211871491699511, "grad_norm": 2.9311022758483887, "learning_rate": 3.098666312786146e-06, "loss": 0.8635, "step": 12077 }, { "epoch": 0.7212468649229666, "grad_norm": 3.151073694229126, "learning_rate": 3.098002786809104e-06, "loss": 0.8297, "step": 12078 }, { "epoch": 0.7213065806759823, "grad_norm": 3.0872855186462402, "learning_rate": 3.0973392608320617e-06, "loss": 0.848, "step": 12079 }, { "epoch": 0.721366296428998, "grad_norm": 2.1528165340423584, "learning_rate": 3.09667573485502e-06, "loss": 0.8088, "step": 12080 }, { "epoch": 0.7214260121820136, "grad_norm": 2.079430341720581, "learning_rate": 3.0960122088779775e-06, "loss": 0.827, "step": 12081 }, { "epoch": 0.7214857279350293, "grad_norm": 1.767303228378296, "learning_rate": 3.095348682900936e-06, "loss": 0.7643, "step": 12082 }, { "epoch": 0.7215454436880449, "grad_norm": 2.540081024169922, "learning_rate": 3.094685156923894e-06, "loss": 0.8406, "step": 12083 }, { "epoch": 0.7216051594410605, "grad_norm": 3.095773458480835, "learning_rate": 3.094021630946852e-06, "loss": 0.8647, "step": 12084 }, { "epoch": 0.7216648751940762, "grad_norm": 2.110727310180664, "learning_rate": 3.09335810496981e-06, "loss": 0.8177, "step": 12085 }, { "epoch": 0.7217245909470918, "grad_norm": 1.893728256225586, "learning_rate": 3.0926945789927676e-06, "loss": 0.8528, "step": 12086 }, { "epoch": 0.7217843067001075, "grad_norm": 2.4588866233825684, "learning_rate": 3.092031053015726e-06, "loss": 0.8383, "step": 12087 }, { "epoch": 0.7218440224531232, "grad_norm": 2.2755770683288574, "learning_rate": 3.0913675270386838e-06, "loss": 0.8218, "step": 12088 }, { "epoch": 0.7219037382061387, "grad_norm": 2.2716686725616455, "learning_rate": 3.090704001061642e-06, "loss": 0.8229, "step": 12089 }, { "epoch": 0.7219634539591544, "grad_norm": 2.8982484340667725, "learning_rate": 3.0900404750845996e-06, "loss": 0.8185, "step": 12090 }, { "epoch": 0.7220231697121701, "grad_norm": 3.014002799987793, "learning_rate": 3.0893769491075577e-06, "loss": 0.8357, "step": 12091 }, { "epoch": 0.7220828854651857, "grad_norm": 3.1776788234710693, "learning_rate": 3.088713423130516e-06, "loss": 0.7929, "step": 12092 }, { "epoch": 0.7221426012182014, "grad_norm": 1.679064393043518, "learning_rate": 3.088049897153474e-06, "loss": 0.8329, "step": 12093 }, { "epoch": 0.7222023169712171, "grad_norm": 2.6042776107788086, "learning_rate": 3.087386371176432e-06, "loss": 0.835, "step": 12094 }, { "epoch": 0.7222620327242326, "grad_norm": 3.141028642654419, "learning_rate": 3.0867228451993896e-06, "loss": 0.8259, "step": 12095 }, { "epoch": 0.7223217484772483, "grad_norm": 1.7699755430221558, "learning_rate": 3.0860593192223477e-06, "loss": 0.8338, "step": 12096 }, { "epoch": 0.722381464230264, "grad_norm": 1.8909900188446045, "learning_rate": 3.085395793245306e-06, "loss": 0.8355, "step": 12097 }, { "epoch": 0.7224411799832796, "grad_norm": 1.6776540279388428, "learning_rate": 3.084732267268264e-06, "loss": 0.8044, "step": 12098 }, { "epoch": 0.7225008957362953, "grad_norm": 2.169955015182495, "learning_rate": 3.0840687412912216e-06, "loss": 0.8128, "step": 12099 }, { "epoch": 0.7225606114893108, "grad_norm": 2.3347277641296387, "learning_rate": 3.0834052153141797e-06, "loss": 0.8576, "step": 12100 }, { "epoch": 0.7226203272423265, "grad_norm": 2.2443490028381348, "learning_rate": 3.0827416893371374e-06, "loss": 0.7842, "step": 12101 }, { "epoch": 0.7226800429953422, "grad_norm": 2.2062363624572754, "learning_rate": 3.082078163360096e-06, "loss": 0.8254, "step": 12102 }, { "epoch": 0.7227397587483578, "grad_norm": 3.140479564666748, "learning_rate": 3.081414637383054e-06, "loss": 0.8257, "step": 12103 }, { "epoch": 0.7227994745013735, "grad_norm": 2.215237855911255, "learning_rate": 3.0807511114060117e-06, "loss": 0.8363, "step": 12104 }, { "epoch": 0.7228591902543892, "grad_norm": 1.7360352277755737, "learning_rate": 3.08008758542897e-06, "loss": 0.802, "step": 12105 }, { "epoch": 0.7229189060074047, "grad_norm": 2.078488826751709, "learning_rate": 3.0794240594519275e-06, "loss": 0.8491, "step": 12106 }, { "epoch": 0.7229786217604204, "grad_norm": 3.2794363498687744, "learning_rate": 3.078760533474886e-06, "loss": 0.8503, "step": 12107 }, { "epoch": 0.723038337513436, "grad_norm": 3.6224966049194336, "learning_rate": 3.078097007497844e-06, "loss": 0.8246, "step": 12108 }, { "epoch": 0.7230980532664517, "grad_norm": 2.190298318862915, "learning_rate": 3.0774334815208018e-06, "loss": 0.8499, "step": 12109 }, { "epoch": 0.7231577690194674, "grad_norm": 2.822145938873291, "learning_rate": 3.07676995554376e-06, "loss": 0.8387, "step": 12110 }, { "epoch": 0.7232174847724829, "grad_norm": 2.898257255554199, "learning_rate": 3.0761064295667176e-06, "loss": 0.8048, "step": 12111 }, { "epoch": 0.7232772005254986, "grad_norm": 2.514472723007202, "learning_rate": 3.075442903589676e-06, "loss": 0.8143, "step": 12112 }, { "epoch": 0.7233369162785143, "grad_norm": 1.994144320487976, "learning_rate": 3.0747793776126338e-06, "loss": 0.7975, "step": 12113 }, { "epoch": 0.7233966320315299, "grad_norm": 2.4486682415008545, "learning_rate": 3.074115851635592e-06, "loss": 0.8292, "step": 12114 }, { "epoch": 0.7234563477845456, "grad_norm": 2.7888681888580322, "learning_rate": 3.0734523256585495e-06, "loss": 0.8571, "step": 12115 }, { "epoch": 0.7235160635375613, "grad_norm": 3.748220443725586, "learning_rate": 3.0727887996815076e-06, "loss": 0.8334, "step": 12116 }, { "epoch": 0.7235757792905768, "grad_norm": 2.2898943424224854, "learning_rate": 3.072125273704466e-06, "loss": 0.8294, "step": 12117 }, { "epoch": 0.7236354950435925, "grad_norm": 1.726446509361267, "learning_rate": 3.071461747727424e-06, "loss": 0.7998, "step": 12118 }, { "epoch": 0.7236952107966081, "grad_norm": 3.0527565479278564, "learning_rate": 3.070798221750382e-06, "loss": 0.8448, "step": 12119 }, { "epoch": 0.7237549265496238, "grad_norm": 2.0622575283050537, "learning_rate": 3.0701346957733396e-06, "loss": 0.8439, "step": 12120 }, { "epoch": 0.7238146423026395, "grad_norm": 1.9451100826263428, "learning_rate": 3.0694711697962977e-06, "loss": 0.849, "step": 12121 }, { "epoch": 0.723874358055655, "grad_norm": 1.6215741634368896, "learning_rate": 3.068807643819256e-06, "loss": 0.8296, "step": 12122 }, { "epoch": 0.7239340738086707, "grad_norm": 2.022364616394043, "learning_rate": 3.068144117842214e-06, "loss": 0.7874, "step": 12123 }, { "epoch": 0.7239937895616864, "grad_norm": 2.6327199935913086, "learning_rate": 3.0674805918651716e-06, "loss": 0.8417, "step": 12124 }, { "epoch": 0.724053505314702, "grad_norm": 1.8400204181671143, "learning_rate": 3.0668170658881297e-06, "loss": 0.875, "step": 12125 }, { "epoch": 0.7241132210677177, "grad_norm": 3.3468005657196045, "learning_rate": 3.0661535399110874e-06, "loss": 0.8434, "step": 12126 }, { "epoch": 0.7241729368207334, "grad_norm": 2.0461864471435547, "learning_rate": 3.065490013934046e-06, "loss": 0.8539, "step": 12127 }, { "epoch": 0.7242326525737489, "grad_norm": 1.8886477947235107, "learning_rate": 3.064826487957004e-06, "loss": 0.8154, "step": 12128 }, { "epoch": 0.7242923683267646, "grad_norm": 2.6101014614105225, "learning_rate": 3.0641629619799617e-06, "loss": 0.8067, "step": 12129 }, { "epoch": 0.7243520840797802, "grad_norm": 2.039358377456665, "learning_rate": 3.0634994360029198e-06, "loss": 0.8601, "step": 12130 }, { "epoch": 0.7244117998327959, "grad_norm": 2.2905795574188232, "learning_rate": 3.0628359100258774e-06, "loss": 0.833, "step": 12131 }, { "epoch": 0.7244715155858116, "grad_norm": 2.604405164718628, "learning_rate": 3.062172384048836e-06, "loss": 0.8182, "step": 12132 }, { "epoch": 0.7245312313388271, "grad_norm": 1.9963985681533813, "learning_rate": 3.061508858071794e-06, "loss": 0.8285, "step": 12133 }, { "epoch": 0.7245909470918428, "grad_norm": 2.307859420776367, "learning_rate": 3.0608453320947517e-06, "loss": 0.8287, "step": 12134 }, { "epoch": 0.7246506628448585, "grad_norm": 1.9928929805755615, "learning_rate": 3.06018180611771e-06, "loss": 0.8176, "step": 12135 }, { "epoch": 0.7247103785978741, "grad_norm": 2.1954433917999268, "learning_rate": 3.0595182801406675e-06, "loss": 0.8127, "step": 12136 }, { "epoch": 0.7247700943508898, "grad_norm": 2.4057085514068604, "learning_rate": 3.058854754163626e-06, "loss": 0.8313, "step": 12137 }, { "epoch": 0.7248298101039055, "grad_norm": 3.12583065032959, "learning_rate": 3.0581912281865837e-06, "loss": 0.8305, "step": 12138 }, { "epoch": 0.724889525856921, "grad_norm": 1.9181360006332397, "learning_rate": 3.057527702209542e-06, "loss": 0.8193, "step": 12139 }, { "epoch": 0.7249492416099367, "grad_norm": 2.400679349899292, "learning_rate": 3.0568641762324995e-06, "loss": 0.8565, "step": 12140 }, { "epoch": 0.7250089573629523, "grad_norm": 2.737128973007202, "learning_rate": 3.0562006502554576e-06, "loss": 0.7906, "step": 12141 }, { "epoch": 0.725068673115968, "grad_norm": 2.322145938873291, "learning_rate": 3.055537124278416e-06, "loss": 0.8252, "step": 12142 }, { "epoch": 0.7251283888689837, "grad_norm": 2.0810272693634033, "learning_rate": 3.054873598301374e-06, "loss": 0.796, "step": 12143 }, { "epoch": 0.7251881046219992, "grad_norm": 4.735297203063965, "learning_rate": 3.054210072324332e-06, "loss": 0.8387, "step": 12144 }, { "epoch": 0.7252478203750149, "grad_norm": 2.363292694091797, "learning_rate": 3.0535465463472896e-06, "loss": 0.8408, "step": 12145 }, { "epoch": 0.7253075361280306, "grad_norm": 2.3658230304718018, "learning_rate": 3.0528830203702477e-06, "loss": 0.7897, "step": 12146 }, { "epoch": 0.7253672518810462, "grad_norm": 2.1837973594665527, "learning_rate": 3.0522194943932058e-06, "loss": 0.8044, "step": 12147 }, { "epoch": 0.7254269676340619, "grad_norm": 2.641746997833252, "learning_rate": 3.051555968416164e-06, "loss": 0.8106, "step": 12148 }, { "epoch": 0.7254866833870776, "grad_norm": 2.1359267234802246, "learning_rate": 3.0508924424391216e-06, "loss": 0.8182, "step": 12149 }, { "epoch": 0.7255463991400931, "grad_norm": 2.0381553173065186, "learning_rate": 3.0502289164620797e-06, "loss": 0.8337, "step": 12150 }, { "epoch": 0.7256061148931088, "grad_norm": 2.3879282474517822, "learning_rate": 3.0495653904850373e-06, "loss": 0.8098, "step": 12151 }, { "epoch": 0.7256658306461244, "grad_norm": 1.789692759513855, "learning_rate": 3.048901864507996e-06, "loss": 0.8256, "step": 12152 }, { "epoch": 0.7257255463991401, "grad_norm": 2.27325701713562, "learning_rate": 3.048238338530954e-06, "loss": 0.8318, "step": 12153 }, { "epoch": 0.7257852621521558, "grad_norm": 1.798221230506897, "learning_rate": 3.0475748125539116e-06, "loss": 0.8161, "step": 12154 }, { "epoch": 0.7258449779051713, "grad_norm": 4.030104637145996, "learning_rate": 3.0469112865768697e-06, "loss": 0.8302, "step": 12155 }, { "epoch": 0.725904693658187, "grad_norm": 2.0508625507354736, "learning_rate": 3.0462477605998274e-06, "loss": 0.8566, "step": 12156 }, { "epoch": 0.7259644094112027, "grad_norm": 2.2821052074432373, "learning_rate": 3.045584234622786e-06, "loss": 0.8602, "step": 12157 }, { "epoch": 0.7260241251642183, "grad_norm": 1.6938579082489014, "learning_rate": 3.044920708645744e-06, "loss": 0.7938, "step": 12158 }, { "epoch": 0.726083840917234, "grad_norm": 5.813528537750244, "learning_rate": 3.0442571826687017e-06, "loss": 0.829, "step": 12159 }, { "epoch": 0.7261435566702497, "grad_norm": 3.370288610458374, "learning_rate": 3.04359365669166e-06, "loss": 0.8231, "step": 12160 }, { "epoch": 0.7262032724232652, "grad_norm": 1.9183323383331299, "learning_rate": 3.0429301307146175e-06, "loss": 0.8035, "step": 12161 }, { "epoch": 0.7262629881762809, "grad_norm": 2.511481285095215, "learning_rate": 3.042266604737576e-06, "loss": 0.8711, "step": 12162 }, { "epoch": 0.7263227039292965, "grad_norm": 2.55655574798584, "learning_rate": 3.0416030787605337e-06, "loss": 0.8163, "step": 12163 }, { "epoch": 0.7263824196823122, "grad_norm": 2.198242664337158, "learning_rate": 3.040939552783492e-06, "loss": 0.8169, "step": 12164 }, { "epoch": 0.7264421354353279, "grad_norm": 1.8964283466339111, "learning_rate": 3.0402760268064495e-06, "loss": 0.8161, "step": 12165 }, { "epoch": 0.7265018511883435, "grad_norm": 2.3876070976257324, "learning_rate": 3.0396125008294076e-06, "loss": 0.8441, "step": 12166 }, { "epoch": 0.7265615669413591, "grad_norm": 8.519670486450195, "learning_rate": 3.038948974852366e-06, "loss": 0.852, "step": 12167 }, { "epoch": 0.7266212826943748, "grad_norm": 2.1408660411834717, "learning_rate": 3.0382854488753238e-06, "loss": 0.8455, "step": 12168 }, { "epoch": 0.7266809984473904, "grad_norm": 1.8459161520004272, "learning_rate": 3.037621922898282e-06, "loss": 0.8302, "step": 12169 }, { "epoch": 0.7267407142004061, "grad_norm": 6.8983917236328125, "learning_rate": 3.0369583969212396e-06, "loss": 0.7911, "step": 12170 }, { "epoch": 0.7268004299534218, "grad_norm": 1.8971989154815674, "learning_rate": 3.0362948709441977e-06, "loss": 0.8765, "step": 12171 }, { "epoch": 0.7268601457064373, "grad_norm": 2.5398061275482178, "learning_rate": 3.0356313449671558e-06, "loss": 0.8093, "step": 12172 }, { "epoch": 0.726919861459453, "grad_norm": 3.9869654178619385, "learning_rate": 3.034967818990114e-06, "loss": 0.8332, "step": 12173 }, { "epoch": 0.7269795772124686, "grad_norm": 3.4463679790496826, "learning_rate": 3.0343042930130715e-06, "loss": 0.8848, "step": 12174 }, { "epoch": 0.7270392929654843, "grad_norm": 1.5606698989868164, "learning_rate": 3.0336407670360296e-06, "loss": 0.8205, "step": 12175 }, { "epoch": 0.7270990087185, "grad_norm": 1.900034785270691, "learning_rate": 3.0329772410589873e-06, "loss": 0.8178, "step": 12176 }, { "epoch": 0.7271587244715156, "grad_norm": 2.7060184478759766, "learning_rate": 3.032313715081946e-06, "loss": 0.8376, "step": 12177 }, { "epoch": 0.7272184402245312, "grad_norm": 2.097761869430542, "learning_rate": 3.031650189104904e-06, "loss": 0.834, "step": 12178 }, { "epoch": 0.7272781559775469, "grad_norm": 2.093794107437134, "learning_rate": 3.0309866631278616e-06, "loss": 0.8505, "step": 12179 }, { "epoch": 0.7273378717305625, "grad_norm": 3.266413450241089, "learning_rate": 3.0303231371508197e-06, "loss": 0.8298, "step": 12180 }, { "epoch": 0.7273975874835782, "grad_norm": 2.090925931930542, "learning_rate": 3.0296596111737774e-06, "loss": 0.8109, "step": 12181 }, { "epoch": 0.7274573032365939, "grad_norm": 1.7779728174209595, "learning_rate": 3.028996085196736e-06, "loss": 0.8196, "step": 12182 }, { "epoch": 0.7275170189896094, "grad_norm": 2.4834439754486084, "learning_rate": 3.028332559219694e-06, "loss": 0.8607, "step": 12183 }, { "epoch": 0.7275767347426251, "grad_norm": 2.260038137435913, "learning_rate": 3.0276690332426517e-06, "loss": 0.8003, "step": 12184 }, { "epoch": 0.7276364504956407, "grad_norm": 1.6943655014038086, "learning_rate": 3.02700550726561e-06, "loss": 0.8224, "step": 12185 }, { "epoch": 0.7276961662486564, "grad_norm": 2.280780076980591, "learning_rate": 3.0263419812885675e-06, "loss": 0.8212, "step": 12186 }, { "epoch": 0.7277558820016721, "grad_norm": 2.669222116470337, "learning_rate": 3.025678455311526e-06, "loss": 0.8239, "step": 12187 }, { "epoch": 0.7278155977546877, "grad_norm": 2.1858625411987305, "learning_rate": 3.0250149293344837e-06, "loss": 0.8099, "step": 12188 }, { "epoch": 0.7278753135077033, "grad_norm": 3.4690725803375244, "learning_rate": 3.0243514033574418e-06, "loss": 0.8438, "step": 12189 }, { "epoch": 0.727935029260719, "grad_norm": 1.8802108764648438, "learning_rate": 3.0236878773803994e-06, "loss": 0.8314, "step": 12190 }, { "epoch": 0.7279947450137346, "grad_norm": 2.5249693393707275, "learning_rate": 3.0230243514033575e-06, "loss": 0.8577, "step": 12191 }, { "epoch": 0.7280544607667503, "grad_norm": 2.5807156562805176, "learning_rate": 3.022360825426316e-06, "loss": 0.8212, "step": 12192 }, { "epoch": 0.728114176519766, "grad_norm": 1.8322184085845947, "learning_rate": 3.0216972994492737e-06, "loss": 0.796, "step": 12193 }, { "epoch": 0.7281738922727815, "grad_norm": 2.096376657485962, "learning_rate": 3.021033773472232e-06, "loss": 0.836, "step": 12194 }, { "epoch": 0.7282336080257972, "grad_norm": 2.0158843994140625, "learning_rate": 3.0203702474951895e-06, "loss": 0.8557, "step": 12195 }, { "epoch": 0.7282933237788128, "grad_norm": 2.704815626144409, "learning_rate": 3.0197067215181476e-06, "loss": 0.8236, "step": 12196 }, { "epoch": 0.7283530395318285, "grad_norm": 2.142662286758423, "learning_rate": 3.0190431955411057e-06, "loss": 0.8276, "step": 12197 }, { "epoch": 0.7284127552848442, "grad_norm": 2.225503444671631, "learning_rate": 3.018379669564064e-06, "loss": 0.8141, "step": 12198 }, { "epoch": 0.7284724710378598, "grad_norm": 1.5463497638702393, "learning_rate": 3.0177161435870215e-06, "loss": 0.816, "step": 12199 }, { "epoch": 0.7285321867908754, "grad_norm": 2.5951669216156006, "learning_rate": 3.0170526176099796e-06, "loss": 0.8055, "step": 12200 }, { "epoch": 0.7285321867908754, "eval_text_loss": 0.895301342010498, "eval_text_runtime": 15.1762, "eval_text_samples_per_second": 263.57, "eval_text_steps_per_second": 0.527, "step": 12200 }, { "epoch": 0.7285321867908754, "eval_image_loss": 0.601141095161438, "eval_image_runtime": 5.0144, "eval_image_samples_per_second": 797.704, "eval_image_steps_per_second": 1.595, "step": 12200 }, { "epoch": 0.7285321867908754, "eval_video_loss": 1.0325884819030762, "eval_video_runtime": 77.1883, "eval_video_samples_per_second": 51.821, "eval_video_steps_per_second": 0.104, "step": 12200 }, { "epoch": 0.7285919025438911, "grad_norm": 2.3799939155578613, "learning_rate": 3.0163890916329373e-06, "loss": 0.8571, "step": 12201 }, { "epoch": 0.7286516182969067, "grad_norm": 2.34394907951355, "learning_rate": 3.015725565655896e-06, "loss": 0.8098, "step": 12202 }, { "epoch": 0.7287113340499224, "grad_norm": 2.0129737854003906, "learning_rate": 3.015062039678854e-06, "loss": 0.8385, "step": 12203 }, { "epoch": 0.728771049802938, "grad_norm": 2.477283000946045, "learning_rate": 3.0143985137018116e-06, "loss": 0.8541, "step": 12204 }, { "epoch": 0.7288307655559536, "grad_norm": 1.7944633960723877, "learning_rate": 3.0137349877247697e-06, "loss": 0.8017, "step": 12205 }, { "epoch": 0.7288904813089693, "grad_norm": 1.6516919136047363, "learning_rate": 3.0130714617477274e-06, "loss": 0.8518, "step": 12206 }, { "epoch": 0.7289501970619849, "grad_norm": 1.464909553527832, "learning_rate": 3.012407935770686e-06, "loss": 0.8096, "step": 12207 }, { "epoch": 0.7290099128150006, "grad_norm": 2.3384032249450684, "learning_rate": 3.011744409793644e-06, "loss": 0.8303, "step": 12208 }, { "epoch": 0.7290696285680163, "grad_norm": 2.0813565254211426, "learning_rate": 3.0110808838166017e-06, "loss": 0.8304, "step": 12209 }, { "epoch": 0.7291293443210319, "grad_norm": 10.499258995056152, "learning_rate": 3.0104173578395598e-06, "loss": 0.8179, "step": 12210 }, { "epoch": 0.7291890600740475, "grad_norm": 2.000566244125366, "learning_rate": 3.0097538318625174e-06, "loss": 0.8287, "step": 12211 }, { "epoch": 0.7292487758270632, "grad_norm": 2.132124662399292, "learning_rate": 3.009090305885476e-06, "loss": 0.82, "step": 12212 }, { "epoch": 0.7293084915800788, "grad_norm": 2.295764684677124, "learning_rate": 3.0084267799084336e-06, "loss": 0.8267, "step": 12213 }, { "epoch": 0.7293682073330945, "grad_norm": 1.7429800033569336, "learning_rate": 3.0077632539313917e-06, "loss": 0.8039, "step": 12214 }, { "epoch": 0.7294279230861102, "grad_norm": 1.662331461906433, "learning_rate": 3.0070997279543494e-06, "loss": 0.7929, "step": 12215 }, { "epoch": 0.7294876388391257, "grad_norm": 5.559218883514404, "learning_rate": 3.0064362019773075e-06, "loss": 0.8887, "step": 12216 }, { "epoch": 0.7295473545921414, "grad_norm": 1.9220064878463745, "learning_rate": 3.005772676000266e-06, "loss": 0.8007, "step": 12217 }, { "epoch": 0.729607070345157, "grad_norm": 1.8928003311157227, "learning_rate": 3.0051091500232237e-06, "loss": 0.8288, "step": 12218 }, { "epoch": 0.7296667860981727, "grad_norm": 3.478175163269043, "learning_rate": 3.004445624046182e-06, "loss": 0.8293, "step": 12219 }, { "epoch": 0.7297265018511884, "grad_norm": 2.8236043453216553, "learning_rate": 3.0037820980691395e-06, "loss": 0.8299, "step": 12220 }, { "epoch": 0.729786217604204, "grad_norm": 2.2365686893463135, "learning_rate": 3.0031185720920976e-06, "loss": 0.7814, "step": 12221 }, { "epoch": 0.7298459333572196, "grad_norm": 4.135008335113525, "learning_rate": 3.0024550461150557e-06, "loss": 0.8149, "step": 12222 }, { "epoch": 0.7299056491102353, "grad_norm": 2.0404770374298096, "learning_rate": 3.001791520138014e-06, "loss": 0.8475, "step": 12223 }, { "epoch": 0.7299653648632509, "grad_norm": 2.0115270614624023, "learning_rate": 3.0011279941609715e-06, "loss": 0.8641, "step": 12224 }, { "epoch": 0.7300250806162666, "grad_norm": 2.10463285446167, "learning_rate": 3.0004644681839296e-06, "loss": 0.8031, "step": 12225 }, { "epoch": 0.7300847963692823, "grad_norm": 2.1645092964172363, "learning_rate": 2.9998009422068873e-06, "loss": 0.8339, "step": 12226 }, { "epoch": 0.7301445121222979, "grad_norm": 1.9048542976379395, "learning_rate": 2.9991374162298458e-06, "loss": 0.855, "step": 12227 }, { "epoch": 0.7302042278753135, "grad_norm": 1.590258240699768, "learning_rate": 2.998473890252804e-06, "loss": 0.8347, "step": 12228 }, { "epoch": 0.7302639436283291, "grad_norm": 2.517768383026123, "learning_rate": 2.9978103642757616e-06, "loss": 0.7891, "step": 12229 }, { "epoch": 0.7303236593813448, "grad_norm": 2.228642702102661, "learning_rate": 2.9971468382987197e-06, "loss": 0.8579, "step": 12230 }, { "epoch": 0.7303833751343605, "grad_norm": 3.187596082687378, "learning_rate": 2.9964833123216773e-06, "loss": 0.8397, "step": 12231 }, { "epoch": 0.7304430908873761, "grad_norm": 2.0852553844451904, "learning_rate": 2.995819786344636e-06, "loss": 0.8544, "step": 12232 }, { "epoch": 0.7305028066403917, "grad_norm": 4.021069049835205, "learning_rate": 2.995156260367594e-06, "loss": 0.8337, "step": 12233 }, { "epoch": 0.7305625223934074, "grad_norm": 1.9672787189483643, "learning_rate": 2.9944927343905516e-06, "loss": 0.8042, "step": 12234 }, { "epoch": 0.730622238146423, "grad_norm": 2.254671573638916, "learning_rate": 2.9938292084135097e-06, "loss": 0.8025, "step": 12235 }, { "epoch": 0.7306819538994387, "grad_norm": 3.571662187576294, "learning_rate": 2.9931656824364674e-06, "loss": 0.8561, "step": 12236 }, { "epoch": 0.7307416696524544, "grad_norm": 2.299771308898926, "learning_rate": 2.992502156459426e-06, "loss": 0.855, "step": 12237 }, { "epoch": 0.73080138540547, "grad_norm": 1.680127501487732, "learning_rate": 2.9918386304823836e-06, "loss": 0.8258, "step": 12238 }, { "epoch": 0.7308611011584856, "grad_norm": 2.316230297088623, "learning_rate": 2.9911751045053417e-06, "loss": 0.8335, "step": 12239 }, { "epoch": 0.7309208169115012, "grad_norm": 3.343879461288452, "learning_rate": 2.9905115785282994e-06, "loss": 0.8177, "step": 12240 }, { "epoch": 0.7309805326645169, "grad_norm": 1.739864706993103, "learning_rate": 2.9898480525512575e-06, "loss": 0.8306, "step": 12241 }, { "epoch": 0.7310402484175326, "grad_norm": 1.671106219291687, "learning_rate": 2.989184526574216e-06, "loss": 0.8131, "step": 12242 }, { "epoch": 0.7310999641705482, "grad_norm": 1.820339322090149, "learning_rate": 2.9885210005971737e-06, "loss": 0.8291, "step": 12243 }, { "epoch": 0.7311596799235638, "grad_norm": 2.007542371749878, "learning_rate": 2.987857474620132e-06, "loss": 0.8171, "step": 12244 }, { "epoch": 0.7312193956765795, "grad_norm": 3.0832247734069824, "learning_rate": 2.9871939486430895e-06, "loss": 0.8415, "step": 12245 }, { "epoch": 0.7312791114295951, "grad_norm": 5.156625270843506, "learning_rate": 2.9865304226660476e-06, "loss": 0.8326, "step": 12246 }, { "epoch": 0.7313388271826108, "grad_norm": 1.990472435951233, "learning_rate": 2.9858668966890057e-06, "loss": 0.8219, "step": 12247 }, { "epoch": 0.7313985429356265, "grad_norm": 2.7882487773895264, "learning_rate": 2.9852033707119638e-06, "loss": 0.8016, "step": 12248 }, { "epoch": 0.7314582586886421, "grad_norm": 2.7387027740478516, "learning_rate": 2.9845398447349214e-06, "loss": 0.8221, "step": 12249 }, { "epoch": 0.7315179744416577, "grad_norm": 2.459703207015991, "learning_rate": 2.9838763187578795e-06, "loss": 0.7927, "step": 12250 }, { "epoch": 0.7315776901946733, "grad_norm": 1.898019790649414, "learning_rate": 2.9832127927808372e-06, "loss": 0.8232, "step": 12251 }, { "epoch": 0.731637405947689, "grad_norm": 2.0285956859588623, "learning_rate": 2.9825492668037957e-06, "loss": 0.8569, "step": 12252 }, { "epoch": 0.7316971217007047, "grad_norm": 2.3855419158935547, "learning_rate": 2.981885740826754e-06, "loss": 0.8304, "step": 12253 }, { "epoch": 0.7317568374537203, "grad_norm": 2.5678632259368896, "learning_rate": 2.9812222148497115e-06, "loss": 0.8201, "step": 12254 }, { "epoch": 0.7318165532067359, "grad_norm": 2.5097157955169678, "learning_rate": 2.9805586888726696e-06, "loss": 0.8475, "step": 12255 }, { "epoch": 0.7318762689597516, "grad_norm": 3.060351848602295, "learning_rate": 2.9798951628956273e-06, "loss": 0.8604, "step": 12256 }, { "epoch": 0.7319359847127672, "grad_norm": 4.800553321838379, "learning_rate": 2.979231636918586e-06, "loss": 0.8203, "step": 12257 }, { "epoch": 0.7319957004657829, "grad_norm": 2.707794666290283, "learning_rate": 2.978568110941544e-06, "loss": 0.8727, "step": 12258 }, { "epoch": 0.7320554162187985, "grad_norm": 1.9263066053390503, "learning_rate": 2.9779045849645016e-06, "loss": 0.8252, "step": 12259 }, { "epoch": 0.7321151319718142, "grad_norm": 2.448868989944458, "learning_rate": 2.9772410589874597e-06, "loss": 0.8671, "step": 12260 }, { "epoch": 0.7321748477248298, "grad_norm": 2.2172110080718994, "learning_rate": 2.9765775330104174e-06, "loss": 0.8436, "step": 12261 }, { "epoch": 0.7322345634778454, "grad_norm": 1.4686824083328247, "learning_rate": 2.975914007033376e-06, "loss": 0.8186, "step": 12262 }, { "epoch": 0.7322942792308611, "grad_norm": 2.0975890159606934, "learning_rate": 2.9752504810563336e-06, "loss": 0.8685, "step": 12263 }, { "epoch": 0.7323539949838768, "grad_norm": 3.0240519046783447, "learning_rate": 2.9745869550792917e-06, "loss": 0.8051, "step": 12264 }, { "epoch": 0.7324137107368924, "grad_norm": 2.858036994934082, "learning_rate": 2.9739234291022494e-06, "loss": 0.8128, "step": 12265 }, { "epoch": 0.732473426489908, "grad_norm": 1.7656803131103516, "learning_rate": 2.9732599031252075e-06, "loss": 0.8318, "step": 12266 }, { "epoch": 0.7325331422429237, "grad_norm": 1.7991759777069092, "learning_rate": 2.972596377148166e-06, "loss": 0.8416, "step": 12267 }, { "epoch": 0.7325928579959393, "grad_norm": 2.8182308673858643, "learning_rate": 2.9719328511711237e-06, "loss": 0.8592, "step": 12268 }, { "epoch": 0.732652573748955, "grad_norm": 2.3353774547576904, "learning_rate": 2.9712693251940818e-06, "loss": 0.8315, "step": 12269 }, { "epoch": 0.7327122895019706, "grad_norm": 1.7361887693405151, "learning_rate": 2.9706057992170394e-06, "loss": 0.8505, "step": 12270 }, { "epoch": 0.7327720052549863, "grad_norm": 5.312726020812988, "learning_rate": 2.9699422732399975e-06, "loss": 0.8774, "step": 12271 }, { "epoch": 0.7328317210080019, "grad_norm": 2.5545382499694824, "learning_rate": 2.9692787472629556e-06, "loss": 0.7947, "step": 12272 }, { "epoch": 0.7328914367610175, "grad_norm": 2.3287312984466553, "learning_rate": 2.9686152212859137e-06, "loss": 0.8017, "step": 12273 }, { "epoch": 0.7329511525140332, "grad_norm": 1.9765576124191284, "learning_rate": 2.9679516953088714e-06, "loss": 0.8363, "step": 12274 }, { "epoch": 0.7330108682670489, "grad_norm": 1.7547723054885864, "learning_rate": 2.9672881693318295e-06, "loss": 0.8346, "step": 12275 }, { "epoch": 0.7330705840200645, "grad_norm": 1.6239794492721558, "learning_rate": 2.966624643354787e-06, "loss": 0.8103, "step": 12276 }, { "epoch": 0.7331302997730801, "grad_norm": 3.786721706390381, "learning_rate": 2.9659611173777457e-06, "loss": 0.8392, "step": 12277 }, { "epoch": 0.7331900155260958, "grad_norm": 1.5835552215576172, "learning_rate": 2.965297591400704e-06, "loss": 0.7894, "step": 12278 }, { "epoch": 0.7332497312791114, "grad_norm": 1.9752813577651978, "learning_rate": 2.9646340654236615e-06, "loss": 0.8518, "step": 12279 }, { "epoch": 0.7333094470321271, "grad_norm": 1.6357954740524292, "learning_rate": 2.9639705394466196e-06, "loss": 0.8199, "step": 12280 }, { "epoch": 0.7333691627851427, "grad_norm": 2.0269641876220703, "learning_rate": 2.9633070134695773e-06, "loss": 0.8519, "step": 12281 }, { "epoch": 0.7334288785381584, "grad_norm": 2.3430960178375244, "learning_rate": 2.962643487492536e-06, "loss": 0.8176, "step": 12282 }, { "epoch": 0.733488594291174, "grad_norm": 3.250000238418579, "learning_rate": 2.961979961515494e-06, "loss": 0.8643, "step": 12283 }, { "epoch": 0.7335483100441896, "grad_norm": 1.8431719541549683, "learning_rate": 2.9613164355384516e-06, "loss": 0.833, "step": 12284 }, { "epoch": 0.7336080257972053, "grad_norm": 3.321443557739258, "learning_rate": 2.9606529095614097e-06, "loss": 0.8223, "step": 12285 }, { "epoch": 0.733667741550221, "grad_norm": 2.215115547180176, "learning_rate": 2.9599893835843674e-06, "loss": 0.8417, "step": 12286 }, { "epoch": 0.7337274573032366, "grad_norm": 2.289329767227173, "learning_rate": 2.959325857607326e-06, "loss": 0.8473, "step": 12287 }, { "epoch": 0.7337871730562522, "grad_norm": 2.1819169521331787, "learning_rate": 2.9586623316302836e-06, "loss": 0.8182, "step": 12288 }, { "epoch": 0.7338468888092678, "grad_norm": 3.9671130180358887, "learning_rate": 2.9579988056532417e-06, "loss": 0.837, "step": 12289 }, { "epoch": 0.7339066045622835, "grad_norm": 1.9222538471221924, "learning_rate": 2.9573352796761993e-06, "loss": 0.8426, "step": 12290 }, { "epoch": 0.7339663203152992, "grad_norm": 2.0872669219970703, "learning_rate": 2.9566717536991574e-06, "loss": 0.8552, "step": 12291 }, { "epoch": 0.7340260360683148, "grad_norm": 3.317397356033325, "learning_rate": 2.956008227722116e-06, "loss": 0.8529, "step": 12292 }, { "epoch": 0.7340857518213305, "grad_norm": 1.9492374658584595, "learning_rate": 2.9553447017450736e-06, "loss": 0.8342, "step": 12293 }, { "epoch": 0.7341454675743461, "grad_norm": 2.793962240219116, "learning_rate": 2.9546811757680317e-06, "loss": 0.8522, "step": 12294 }, { "epoch": 0.7342051833273617, "grad_norm": 1.8696064949035645, "learning_rate": 2.9540176497909894e-06, "loss": 0.8363, "step": 12295 }, { "epoch": 0.7342648990803774, "grad_norm": 2.4551196098327637, "learning_rate": 2.9533541238139475e-06, "loss": 0.8325, "step": 12296 }, { "epoch": 0.7343246148333931, "grad_norm": 4.275002479553223, "learning_rate": 2.9526905978369056e-06, "loss": 0.8327, "step": 12297 }, { "epoch": 0.7343843305864087, "grad_norm": 2.3839375972747803, "learning_rate": 2.9520270718598637e-06, "loss": 0.8486, "step": 12298 }, { "epoch": 0.7344440463394244, "grad_norm": 2.3955514430999756, "learning_rate": 2.9513635458828214e-06, "loss": 0.8585, "step": 12299 }, { "epoch": 0.73450376209244, "grad_norm": 1.6598966121673584, "learning_rate": 2.9507000199057795e-06, "loss": 0.837, "step": 12300 }, { "epoch": 0.7345634778454556, "grad_norm": 2.3709895610809326, "learning_rate": 2.950036493928737e-06, "loss": 0.8091, "step": 12301 }, { "epoch": 0.7346231935984713, "grad_norm": 1.87245774269104, "learning_rate": 2.9493729679516957e-06, "loss": 0.8102, "step": 12302 }, { "epoch": 0.734682909351487, "grad_norm": 2.256768226623535, "learning_rate": 2.948709441974654e-06, "loss": 0.8123, "step": 12303 }, { "epoch": 0.7347426251045026, "grad_norm": 2.1530327796936035, "learning_rate": 2.9480459159976115e-06, "loss": 0.8339, "step": 12304 }, { "epoch": 0.7348023408575182, "grad_norm": 2.887488603591919, "learning_rate": 2.9473823900205696e-06, "loss": 0.8446, "step": 12305 }, { "epoch": 0.7348620566105338, "grad_norm": 1.6942592859268188, "learning_rate": 2.9467188640435272e-06, "loss": 0.8636, "step": 12306 }, { "epoch": 0.7349217723635495, "grad_norm": 1.917739748954773, "learning_rate": 2.9460553380664858e-06, "loss": 0.8287, "step": 12307 }, { "epoch": 0.7349814881165652, "grad_norm": 1.6951323747634888, "learning_rate": 2.945391812089444e-06, "loss": 0.8392, "step": 12308 }, { "epoch": 0.7350412038695808, "grad_norm": 1.6272119283676147, "learning_rate": 2.9447282861124015e-06, "loss": 0.7835, "step": 12309 }, { "epoch": 0.7351009196225965, "grad_norm": 2.4064226150512695, "learning_rate": 2.9440647601353596e-06, "loss": 0.7912, "step": 12310 }, { "epoch": 0.735160635375612, "grad_norm": 4.150492191314697, "learning_rate": 2.9434012341583173e-06, "loss": 0.8309, "step": 12311 }, { "epoch": 0.7352203511286277, "grad_norm": 1.9374381303787231, "learning_rate": 2.942737708181276e-06, "loss": 0.8648, "step": 12312 }, { "epoch": 0.7352800668816434, "grad_norm": 1.6871482133865356, "learning_rate": 2.9420741822042335e-06, "loss": 0.814, "step": 12313 }, { "epoch": 0.735339782634659, "grad_norm": 1.9520944356918335, "learning_rate": 2.9414106562271916e-06, "loss": 0.8463, "step": 12314 }, { "epoch": 0.7353994983876747, "grad_norm": 2.0313961505889893, "learning_rate": 2.9407471302501493e-06, "loss": 0.8532, "step": 12315 }, { "epoch": 0.7354592141406903, "grad_norm": 2.936508893966675, "learning_rate": 2.9400836042731074e-06, "loss": 0.8327, "step": 12316 }, { "epoch": 0.7355189298937059, "grad_norm": 2.2316548824310303, "learning_rate": 2.939420078296066e-06, "loss": 0.8461, "step": 12317 }, { "epoch": 0.7355786456467216, "grad_norm": 1.9176239967346191, "learning_rate": 2.9387565523190236e-06, "loss": 0.802, "step": 12318 }, { "epoch": 0.7356383613997373, "grad_norm": 2.1074471473693848, "learning_rate": 2.9380930263419817e-06, "loss": 0.8033, "step": 12319 }, { "epoch": 0.7356980771527529, "grad_norm": 1.812200665473938, "learning_rate": 2.9374295003649394e-06, "loss": 0.8269, "step": 12320 }, { "epoch": 0.7357577929057686, "grad_norm": 2.0338079929351807, "learning_rate": 2.9367659743878975e-06, "loss": 0.8242, "step": 12321 }, { "epoch": 0.7358175086587841, "grad_norm": 2.285621404647827, "learning_rate": 2.9361024484108556e-06, "loss": 0.8631, "step": 12322 }, { "epoch": 0.7358772244117998, "grad_norm": 2.1281898021698, "learning_rate": 2.9354389224338137e-06, "loss": 0.8391, "step": 12323 }, { "epoch": 0.7359369401648155, "grad_norm": 2.340533494949341, "learning_rate": 2.9347753964567714e-06, "loss": 0.815, "step": 12324 }, { "epoch": 0.7359966559178311, "grad_norm": 2.8506133556365967, "learning_rate": 2.9341118704797295e-06, "loss": 0.8298, "step": 12325 }, { "epoch": 0.7360563716708468, "grad_norm": 3.390146493911743, "learning_rate": 2.933448344502687e-06, "loss": 0.832, "step": 12326 }, { "epoch": 0.7361160874238624, "grad_norm": 3.3480281829833984, "learning_rate": 2.9327848185256457e-06, "loss": 0.8093, "step": 12327 }, { "epoch": 0.736175803176878, "grad_norm": 1.895980954170227, "learning_rate": 2.9321212925486038e-06, "loss": 0.8428, "step": 12328 }, { "epoch": 0.7362355189298937, "grad_norm": 3.3868062496185303, "learning_rate": 2.9314577665715614e-06, "loss": 0.8589, "step": 12329 }, { "epoch": 0.7362952346829094, "grad_norm": 1.8677622079849243, "learning_rate": 2.9307942405945195e-06, "loss": 0.8231, "step": 12330 }, { "epoch": 0.736354950435925, "grad_norm": 2.4160196781158447, "learning_rate": 2.9301307146174772e-06, "loss": 0.8364, "step": 12331 }, { "epoch": 0.7364146661889407, "grad_norm": 1.8210656642913818, "learning_rate": 2.9294671886404357e-06, "loss": 0.8335, "step": 12332 }, { "epoch": 0.7364743819419562, "grad_norm": 2.0430383682250977, "learning_rate": 2.928803662663394e-06, "loss": 0.8355, "step": 12333 }, { "epoch": 0.7365340976949719, "grad_norm": 3.445523977279663, "learning_rate": 2.9281401366863515e-06, "loss": 0.8286, "step": 12334 }, { "epoch": 0.7365938134479876, "grad_norm": 2.7324674129486084, "learning_rate": 2.9274766107093096e-06, "loss": 0.8501, "step": 12335 }, { "epoch": 0.7366535292010032, "grad_norm": 1.886215329170227, "learning_rate": 2.9268130847322673e-06, "loss": 0.8116, "step": 12336 }, { "epoch": 0.7367132449540189, "grad_norm": 2.1019303798675537, "learning_rate": 2.926149558755226e-06, "loss": 0.8193, "step": 12337 }, { "epoch": 0.7367729607070345, "grad_norm": 2.2396767139434814, "learning_rate": 2.9254860327781835e-06, "loss": 0.8552, "step": 12338 }, { "epoch": 0.7368326764600501, "grad_norm": 2.441676139831543, "learning_rate": 2.9248225068011416e-06, "loss": 0.8747, "step": 12339 }, { "epoch": 0.7368923922130658, "grad_norm": 1.8848533630371094, "learning_rate": 2.9241589808240993e-06, "loss": 0.8494, "step": 12340 }, { "epoch": 0.7369521079660815, "grad_norm": 2.340731143951416, "learning_rate": 2.9234954548470574e-06, "loss": 0.827, "step": 12341 }, { "epoch": 0.7370118237190971, "grad_norm": 2.8457136154174805, "learning_rate": 2.922831928870016e-06, "loss": 0.8657, "step": 12342 }, { "epoch": 0.7370715394721128, "grad_norm": 1.8704042434692383, "learning_rate": 2.9221684028929736e-06, "loss": 0.8298, "step": 12343 }, { "epoch": 0.7371312552251283, "grad_norm": 1.8359421491622925, "learning_rate": 2.9215048769159317e-06, "loss": 0.8402, "step": 12344 }, { "epoch": 0.737190970978144, "grad_norm": 3.9772210121154785, "learning_rate": 2.9208413509388894e-06, "loss": 0.774, "step": 12345 }, { "epoch": 0.7372506867311597, "grad_norm": 2.0560104846954346, "learning_rate": 2.9201778249618475e-06, "loss": 0.8193, "step": 12346 }, { "epoch": 0.7373104024841753, "grad_norm": 1.81473708152771, "learning_rate": 2.9195142989848056e-06, "loss": 0.8227, "step": 12347 }, { "epoch": 0.737370118237191, "grad_norm": 2.484398603439331, "learning_rate": 2.9188507730077637e-06, "loss": 0.8268, "step": 12348 }, { "epoch": 0.7374298339902066, "grad_norm": 2.9643592834472656, "learning_rate": 2.9181872470307213e-06, "loss": 0.8287, "step": 12349 }, { "epoch": 0.7374895497432222, "grad_norm": 1.7888940572738647, "learning_rate": 2.9175237210536794e-06, "loss": 0.8153, "step": 12350 }, { "epoch": 0.7375492654962379, "grad_norm": 2.5764834880828857, "learning_rate": 2.916860195076637e-06, "loss": 0.8076, "step": 12351 }, { "epoch": 0.7376089812492536, "grad_norm": 1.716697096824646, "learning_rate": 2.9161966690995956e-06, "loss": 0.8247, "step": 12352 }, { "epoch": 0.7376686970022692, "grad_norm": 2.697916030883789, "learning_rate": 2.9155331431225537e-06, "loss": 0.7984, "step": 12353 }, { "epoch": 0.7377284127552849, "grad_norm": 2.230837106704712, "learning_rate": 2.9148696171455114e-06, "loss": 0.8221, "step": 12354 }, { "epoch": 0.7377881285083004, "grad_norm": 2.231994390487671, "learning_rate": 2.9142060911684695e-06, "loss": 0.8256, "step": 12355 }, { "epoch": 0.7378478442613161, "grad_norm": 1.7770131826400757, "learning_rate": 2.913542565191427e-06, "loss": 0.8348, "step": 12356 }, { "epoch": 0.7379075600143318, "grad_norm": 1.8520082235336304, "learning_rate": 2.9128790392143857e-06, "loss": 0.8278, "step": 12357 }, { "epoch": 0.7379672757673474, "grad_norm": 1.988395094871521, "learning_rate": 2.912215513237344e-06, "loss": 0.8457, "step": 12358 }, { "epoch": 0.7380269915203631, "grad_norm": 2.0508368015289307, "learning_rate": 2.9115519872603015e-06, "loss": 0.8218, "step": 12359 }, { "epoch": 0.7380867072733788, "grad_norm": 2.7213895320892334, "learning_rate": 2.9108884612832596e-06, "loss": 0.8376, "step": 12360 }, { "epoch": 0.7381464230263943, "grad_norm": 3.685394763946533, "learning_rate": 2.9102249353062173e-06, "loss": 0.8173, "step": 12361 }, { "epoch": 0.73820613877941, "grad_norm": 1.911920189857483, "learning_rate": 2.9095614093291758e-06, "loss": 0.8243, "step": 12362 }, { "epoch": 0.7382658545324257, "grad_norm": 4.4679460525512695, "learning_rate": 2.9088978833521335e-06, "loss": 0.8085, "step": 12363 }, { "epoch": 0.7383255702854413, "grad_norm": 1.6470863819122314, "learning_rate": 2.9082343573750916e-06, "loss": 0.8253, "step": 12364 }, { "epoch": 0.738385286038457, "grad_norm": 1.9297420978546143, "learning_rate": 2.9075708313980492e-06, "loss": 0.8505, "step": 12365 }, { "epoch": 0.7384450017914725, "grad_norm": 2.5652854442596436, "learning_rate": 2.9069073054210073e-06, "loss": 0.8082, "step": 12366 }, { "epoch": 0.7385047175444882, "grad_norm": 3.3323280811309814, "learning_rate": 2.906243779443966e-06, "loss": 0.8758, "step": 12367 }, { "epoch": 0.7385644332975039, "grad_norm": 2.3473987579345703, "learning_rate": 2.9055802534669235e-06, "loss": 0.7986, "step": 12368 }, { "epoch": 0.7386241490505195, "grad_norm": 1.9885129928588867, "learning_rate": 2.9049167274898816e-06, "loss": 0.8391, "step": 12369 }, { "epoch": 0.7386838648035352, "grad_norm": 2.1612675189971924, "learning_rate": 2.9042532015128393e-06, "loss": 0.8266, "step": 12370 }, { "epoch": 0.7387435805565509, "grad_norm": 2.1241614818573, "learning_rate": 2.9035896755357974e-06, "loss": 0.8033, "step": 12371 }, { "epoch": 0.7388032963095664, "grad_norm": 3.331848621368408, "learning_rate": 2.902926149558756e-06, "loss": 0.8207, "step": 12372 }, { "epoch": 0.7388630120625821, "grad_norm": 2.57922625541687, "learning_rate": 2.9022626235817136e-06, "loss": 0.8231, "step": 12373 }, { "epoch": 0.7389227278155978, "grad_norm": 2.6742656230926514, "learning_rate": 2.9015990976046713e-06, "loss": 0.814, "step": 12374 }, { "epoch": 0.7389824435686134, "grad_norm": 2.5912246704101562, "learning_rate": 2.9009355716276294e-06, "loss": 0.8159, "step": 12375 }, { "epoch": 0.7390421593216291, "grad_norm": 1.682410478591919, "learning_rate": 2.900272045650587e-06, "loss": 0.8389, "step": 12376 }, { "epoch": 0.7391018750746446, "grad_norm": 1.918846607208252, "learning_rate": 2.8996085196735456e-06, "loss": 0.8356, "step": 12377 }, { "epoch": 0.7391615908276603, "grad_norm": 1.7615729570388794, "learning_rate": 2.8989449936965037e-06, "loss": 0.81, "step": 12378 }, { "epoch": 0.739221306580676, "grad_norm": 2.0674967765808105, "learning_rate": 2.8982814677194614e-06, "loss": 0.833, "step": 12379 }, { "epoch": 0.7392810223336916, "grad_norm": 2.4380435943603516, "learning_rate": 2.8976179417424195e-06, "loss": 0.8075, "step": 12380 }, { "epoch": 0.7393407380867073, "grad_norm": 1.9640837907791138, "learning_rate": 2.896954415765377e-06, "loss": 0.824, "step": 12381 }, { "epoch": 0.739400453839723, "grad_norm": 2.0735292434692383, "learning_rate": 2.8962908897883357e-06, "loss": 0.8024, "step": 12382 }, { "epoch": 0.7394601695927385, "grad_norm": 4.939332485198975, "learning_rate": 2.8956273638112938e-06, "loss": 0.8114, "step": 12383 }, { "epoch": 0.7395198853457542, "grad_norm": 2.7389891147613525, "learning_rate": 2.8949638378342515e-06, "loss": 0.8305, "step": 12384 }, { "epoch": 0.7395796010987699, "grad_norm": 2.017155408859253, "learning_rate": 2.8943003118572096e-06, "loss": 0.8377, "step": 12385 }, { "epoch": 0.7396393168517855, "grad_norm": 1.954986333847046, "learning_rate": 2.8936367858801672e-06, "loss": 0.8352, "step": 12386 }, { "epoch": 0.7396990326048012, "grad_norm": 2.3809313774108887, "learning_rate": 2.8929732599031258e-06, "loss": 0.8257, "step": 12387 }, { "epoch": 0.7397587483578167, "grad_norm": 1.896921157836914, "learning_rate": 2.8923097339260834e-06, "loss": 0.8343, "step": 12388 }, { "epoch": 0.7398184641108324, "grad_norm": 1.6391533613204956, "learning_rate": 2.8916462079490415e-06, "loss": 0.8193, "step": 12389 }, { "epoch": 0.7398781798638481, "grad_norm": 1.7299671173095703, "learning_rate": 2.8909826819719992e-06, "loss": 0.8127, "step": 12390 }, { "epoch": 0.7399378956168637, "grad_norm": 5.425974369049072, "learning_rate": 2.8903191559949573e-06, "loss": 0.8858, "step": 12391 }, { "epoch": 0.7399976113698794, "grad_norm": 2.0076956748962402, "learning_rate": 2.889655630017916e-06, "loss": 0.8448, "step": 12392 }, { "epoch": 0.7400573271228951, "grad_norm": 2.033207654953003, "learning_rate": 2.8889921040408735e-06, "loss": 0.8168, "step": 12393 }, { "epoch": 0.7401170428759106, "grad_norm": 1.7369482517242432, "learning_rate": 2.8883285780638316e-06, "loss": 0.7843, "step": 12394 }, { "epoch": 0.7401767586289263, "grad_norm": 1.634332299232483, "learning_rate": 2.8876650520867893e-06, "loss": 0.8344, "step": 12395 }, { "epoch": 0.740236474381942, "grad_norm": 2.103907585144043, "learning_rate": 2.8870015261097474e-06, "loss": 0.814, "step": 12396 }, { "epoch": 0.7402961901349576, "grad_norm": 1.9118876457214355, "learning_rate": 2.886338000132706e-06, "loss": 0.8085, "step": 12397 }, { "epoch": 0.7403559058879733, "grad_norm": 3.4098622798919678, "learning_rate": 2.8856744741556636e-06, "loss": 0.8555, "step": 12398 }, { "epoch": 0.7404156216409888, "grad_norm": 2.8436455726623535, "learning_rate": 2.8850109481786217e-06, "loss": 0.8655, "step": 12399 }, { "epoch": 0.7404753373940045, "grad_norm": 4.079277038574219, "learning_rate": 2.8843474222015794e-06, "loss": 0.8164, "step": 12400 }, { "epoch": 0.7404753373940045, "eval_text_loss": 0.8946400880813599, "eval_text_runtime": 15.2136, "eval_text_samples_per_second": 262.922, "eval_text_steps_per_second": 0.526, "step": 12400 }, { "epoch": 0.7404753373940045, "eval_image_loss": 0.5996935367584229, "eval_image_runtime": 5.006, "eval_image_samples_per_second": 799.047, "eval_image_steps_per_second": 1.598, "step": 12400 }, { "epoch": 0.7404753373940045, "eval_video_loss": 1.0304052829742432, "eval_video_runtime": 76.3947, "eval_video_samples_per_second": 52.36, "eval_video_steps_per_second": 0.105, "step": 12400 }, { "epoch": 0.7405350531470202, "grad_norm": 1.835244059562683, "learning_rate": 2.8836838962245375e-06, "loss": 0.822, "step": 12401 }, { "epoch": 0.7405947689000358, "grad_norm": 2.014892101287842, "learning_rate": 2.883020370247495e-06, "loss": 0.8385, "step": 12402 }, { "epoch": 0.7406544846530515, "grad_norm": 2.5905938148498535, "learning_rate": 2.8823568442704537e-06, "loss": 0.854, "step": 12403 }, { "epoch": 0.7407142004060672, "grad_norm": 1.7213246822357178, "learning_rate": 2.8816933182934113e-06, "loss": 0.8426, "step": 12404 }, { "epoch": 0.7407739161590827, "grad_norm": 4.701025009155273, "learning_rate": 2.8810297923163694e-06, "loss": 0.8046, "step": 12405 }, { "epoch": 0.7408336319120984, "grad_norm": 2.1292879581451416, "learning_rate": 2.880366266339327e-06, "loss": 0.8216, "step": 12406 }, { "epoch": 0.740893347665114, "grad_norm": 1.6067519187927246, "learning_rate": 2.8797027403622852e-06, "loss": 0.8324, "step": 12407 }, { "epoch": 0.7409530634181297, "grad_norm": 2.068457841873169, "learning_rate": 2.8790392143852438e-06, "loss": 0.8188, "step": 12408 }, { "epoch": 0.7410127791711454, "grad_norm": 2.5947859287261963, "learning_rate": 2.8783756884082014e-06, "loss": 0.8079, "step": 12409 }, { "epoch": 0.7410724949241609, "grad_norm": 2.2533469200134277, "learning_rate": 2.8777121624311595e-06, "loss": 0.7952, "step": 12410 }, { "epoch": 0.7411322106771766, "grad_norm": 3.095092296600342, "learning_rate": 2.877048636454117e-06, "loss": 0.8151, "step": 12411 }, { "epoch": 0.7411919264301923, "grad_norm": 1.920985221862793, "learning_rate": 2.8763851104770753e-06, "loss": 0.7846, "step": 12412 }, { "epoch": 0.7412516421832079, "grad_norm": 1.9095685482025146, "learning_rate": 2.8757215845000334e-06, "loss": 0.8524, "step": 12413 }, { "epoch": 0.7413113579362236, "grad_norm": 2.925694704055786, "learning_rate": 2.8750580585229915e-06, "loss": 0.838, "step": 12414 }, { "epoch": 0.7413710736892393, "grad_norm": 1.7293705940246582, "learning_rate": 2.874394532545949e-06, "loss": 0.8644, "step": 12415 }, { "epoch": 0.7414307894422548, "grad_norm": 2.2080233097076416, "learning_rate": 2.8737310065689073e-06, "loss": 0.814, "step": 12416 }, { "epoch": 0.7414905051952705, "grad_norm": 1.924465537071228, "learning_rate": 2.873067480591865e-06, "loss": 0.8608, "step": 12417 }, { "epoch": 0.7415502209482862, "grad_norm": 2.7191390991210938, "learning_rate": 2.8724039546148235e-06, "loss": 0.8263, "step": 12418 }, { "epoch": 0.7416099367013018, "grad_norm": 1.9650547504425049, "learning_rate": 2.8717404286377816e-06, "loss": 0.8336, "step": 12419 }, { "epoch": 0.7416696524543175, "grad_norm": 5.511484146118164, "learning_rate": 2.8710769026607393e-06, "loss": 0.8167, "step": 12420 }, { "epoch": 0.741729368207333, "grad_norm": 1.6903878450393677, "learning_rate": 2.8704133766836974e-06, "loss": 0.848, "step": 12421 }, { "epoch": 0.7417890839603487, "grad_norm": 1.8562352657318115, "learning_rate": 2.869749850706655e-06, "loss": 0.8337, "step": 12422 }, { "epoch": 0.7418487997133644, "grad_norm": 2.8733866214752197, "learning_rate": 2.8690863247296136e-06, "loss": 0.8429, "step": 12423 }, { "epoch": 0.74190851546638, "grad_norm": 1.5678023099899292, "learning_rate": 2.8684227987525717e-06, "loss": 0.8104, "step": 12424 }, { "epoch": 0.7419682312193957, "grad_norm": 1.6467900276184082, "learning_rate": 2.8677592727755293e-06, "loss": 0.7878, "step": 12425 }, { "epoch": 0.7420279469724114, "grad_norm": 2.430981159210205, "learning_rate": 2.8670957467984874e-06, "loss": 0.818, "step": 12426 }, { "epoch": 0.7420876627254269, "grad_norm": 2.0493690967559814, "learning_rate": 2.866432220821445e-06, "loss": 0.876, "step": 12427 }, { "epoch": 0.7421473784784426, "grad_norm": 1.6931911706924438, "learning_rate": 2.8657686948444036e-06, "loss": 0.8476, "step": 12428 }, { "epoch": 0.7422070942314583, "grad_norm": 2.849032402038574, "learning_rate": 2.8651051688673613e-06, "loss": 0.8016, "step": 12429 }, { "epoch": 0.7422668099844739, "grad_norm": 1.7962958812713623, "learning_rate": 2.8644416428903194e-06, "loss": 0.8476, "step": 12430 }, { "epoch": 0.7423265257374896, "grad_norm": 1.7544642686843872, "learning_rate": 2.863778116913277e-06, "loss": 0.8181, "step": 12431 }, { "epoch": 0.7423862414905052, "grad_norm": 1.6022039651870728, "learning_rate": 2.863114590936235e-06, "loss": 0.8411, "step": 12432 }, { "epoch": 0.7424459572435208, "grad_norm": 2.042433500289917, "learning_rate": 2.8624510649591937e-06, "loss": 0.8621, "step": 12433 }, { "epoch": 0.7425056729965365, "grad_norm": 1.8809140920639038, "learning_rate": 2.8617875389821514e-06, "loss": 0.8265, "step": 12434 }, { "epoch": 0.7425653887495521, "grad_norm": 2.082888126373291, "learning_rate": 2.8611240130051095e-06, "loss": 0.8266, "step": 12435 }, { "epoch": 0.7426251045025678, "grad_norm": 2.0594370365142822, "learning_rate": 2.860460487028067e-06, "loss": 0.8304, "step": 12436 }, { "epoch": 0.7426848202555835, "grad_norm": 2.9737606048583984, "learning_rate": 2.8597969610510253e-06, "loss": 0.8852, "step": 12437 }, { "epoch": 0.742744536008599, "grad_norm": 2.0795137882232666, "learning_rate": 2.8591334350739834e-06, "loss": 0.8351, "step": 12438 }, { "epoch": 0.7428042517616147, "grad_norm": 2.1247758865356445, "learning_rate": 2.8584699090969415e-06, "loss": 0.7953, "step": 12439 }, { "epoch": 0.7428639675146304, "grad_norm": 2.0257744789123535, "learning_rate": 2.857806383119899e-06, "loss": 0.8326, "step": 12440 }, { "epoch": 0.742923683267646, "grad_norm": 2.1375324726104736, "learning_rate": 2.8571428571428573e-06, "loss": 0.8437, "step": 12441 }, { "epoch": 0.7429833990206617, "grad_norm": 2.274341344833374, "learning_rate": 2.856479331165815e-06, "loss": 0.8139, "step": 12442 }, { "epoch": 0.7430431147736773, "grad_norm": 1.9017943143844604, "learning_rate": 2.8558158051887735e-06, "loss": 0.7679, "step": 12443 }, { "epoch": 0.7431028305266929, "grad_norm": 1.9656652212142944, "learning_rate": 2.8551522792117316e-06, "loss": 0.7759, "step": 12444 }, { "epoch": 0.7431625462797086, "grad_norm": 4.8071370124816895, "learning_rate": 2.8544887532346892e-06, "loss": 0.8128, "step": 12445 }, { "epoch": 0.7432222620327242, "grad_norm": 1.9139469861984253, "learning_rate": 2.8538252272576473e-06, "loss": 0.797, "step": 12446 }, { "epoch": 0.7432819777857399, "grad_norm": 1.9712663888931274, "learning_rate": 2.853161701280605e-06, "loss": 0.837, "step": 12447 }, { "epoch": 0.7433416935387556, "grad_norm": 3.3919014930725098, "learning_rate": 2.8524981753035635e-06, "loss": 0.8333, "step": 12448 }, { "epoch": 0.7434014092917711, "grad_norm": 2.564872980117798, "learning_rate": 2.8518346493265216e-06, "loss": 0.8573, "step": 12449 }, { "epoch": 0.7434611250447868, "grad_norm": 1.999228596687317, "learning_rate": 2.8511711233494793e-06, "loss": 0.8309, "step": 12450 }, { "epoch": 0.7435208407978025, "grad_norm": 2.839959144592285, "learning_rate": 2.8505075973724374e-06, "loss": 0.8066, "step": 12451 }, { "epoch": 0.7435805565508181, "grad_norm": 2.575031042098999, "learning_rate": 2.849844071395395e-06, "loss": 0.8155, "step": 12452 }, { "epoch": 0.7436402723038338, "grad_norm": 1.782849907875061, "learning_rate": 2.8491805454183536e-06, "loss": 0.8407, "step": 12453 }, { "epoch": 0.7436999880568494, "grad_norm": 2.810952663421631, "learning_rate": 2.8485170194413113e-06, "loss": 0.8097, "step": 12454 }, { "epoch": 0.743759703809865, "grad_norm": 1.9223333597183228, "learning_rate": 2.8478534934642694e-06, "loss": 0.8044, "step": 12455 }, { "epoch": 0.7438194195628807, "grad_norm": 1.9607927799224854, "learning_rate": 2.847189967487227e-06, "loss": 0.8282, "step": 12456 }, { "epoch": 0.7438791353158963, "grad_norm": 1.835530161857605, "learning_rate": 2.846526441510185e-06, "loss": 0.8023, "step": 12457 }, { "epoch": 0.743938851068912, "grad_norm": 1.7346285581588745, "learning_rate": 2.8458629155331437e-06, "loss": 0.7741, "step": 12458 }, { "epoch": 0.7439985668219277, "grad_norm": 1.9288923740386963, "learning_rate": 2.8451993895561014e-06, "loss": 0.8438, "step": 12459 }, { "epoch": 0.7440582825749432, "grad_norm": 1.978979468345642, "learning_rate": 2.8445358635790595e-06, "loss": 0.8559, "step": 12460 }, { "epoch": 0.7441179983279589, "grad_norm": 1.650039792060852, "learning_rate": 2.843872337602017e-06, "loss": 0.839, "step": 12461 }, { "epoch": 0.7441777140809746, "grad_norm": 2.6575138568878174, "learning_rate": 2.8432088116249752e-06, "loss": 0.8452, "step": 12462 }, { "epoch": 0.7442374298339902, "grad_norm": 2.126267910003662, "learning_rate": 2.8425452856479333e-06, "loss": 0.8593, "step": 12463 }, { "epoch": 0.7442971455870059, "grad_norm": 1.7764891386032104, "learning_rate": 2.8418817596708914e-06, "loss": 0.8109, "step": 12464 }, { "epoch": 0.7443568613400215, "grad_norm": 1.8071205615997314, "learning_rate": 2.841218233693849e-06, "loss": 0.7973, "step": 12465 }, { "epoch": 0.7444165770930371, "grad_norm": 2.8262288570404053, "learning_rate": 2.8405547077168072e-06, "loss": 0.8274, "step": 12466 }, { "epoch": 0.7444762928460528, "grad_norm": 2.810857057571411, "learning_rate": 2.839891181739765e-06, "loss": 0.8363, "step": 12467 }, { "epoch": 0.7445360085990684, "grad_norm": 2.370936393737793, "learning_rate": 2.8392276557627234e-06, "loss": 0.8307, "step": 12468 }, { "epoch": 0.7445957243520841, "grad_norm": 2.001697301864624, "learning_rate": 2.8385641297856815e-06, "loss": 0.8456, "step": 12469 }, { "epoch": 0.7446554401050998, "grad_norm": 1.8601869344711304, "learning_rate": 2.837900603808639e-06, "loss": 0.8199, "step": 12470 }, { "epoch": 0.7447151558581153, "grad_norm": 3.6952407360076904, "learning_rate": 2.8372370778315973e-06, "loss": 0.8642, "step": 12471 }, { "epoch": 0.744774871611131, "grad_norm": 2.4231479167938232, "learning_rate": 2.836573551854555e-06, "loss": 0.8452, "step": 12472 }, { "epoch": 0.7448345873641466, "grad_norm": 1.7367510795593262, "learning_rate": 2.8359100258775135e-06, "loss": 0.8361, "step": 12473 }, { "epoch": 0.7448943031171623, "grad_norm": 2.0138258934020996, "learning_rate": 2.8352464999004716e-06, "loss": 0.8269, "step": 12474 }, { "epoch": 0.744954018870178, "grad_norm": 1.924385905265808, "learning_rate": 2.8345829739234293e-06, "loss": 0.8438, "step": 12475 }, { "epoch": 0.7450137346231936, "grad_norm": 2.259253978729248, "learning_rate": 2.8339194479463874e-06, "loss": 0.8041, "step": 12476 }, { "epoch": 0.7450734503762092, "grad_norm": 1.9318456649780273, "learning_rate": 2.833255921969345e-06, "loss": 0.8587, "step": 12477 }, { "epoch": 0.7451331661292249, "grad_norm": 11.110444068908691, "learning_rate": 2.8325923959923036e-06, "loss": 0.8237, "step": 12478 }, { "epoch": 0.7451928818822405, "grad_norm": 3.692397356033325, "learning_rate": 2.8319288700152613e-06, "loss": 0.8725, "step": 12479 }, { "epoch": 0.7452525976352562, "grad_norm": 2.7501721382141113, "learning_rate": 2.8312653440382194e-06, "loss": 0.8312, "step": 12480 }, { "epoch": 0.7453123133882719, "grad_norm": 1.5924428701400757, "learning_rate": 2.830601818061177e-06, "loss": 0.8375, "step": 12481 }, { "epoch": 0.7453720291412874, "grad_norm": 2.9783170223236084, "learning_rate": 2.829938292084135e-06, "loss": 0.8297, "step": 12482 }, { "epoch": 0.7454317448943031, "grad_norm": 1.7150943279266357, "learning_rate": 2.8292747661070937e-06, "loss": 0.8173, "step": 12483 }, { "epoch": 0.7454914606473187, "grad_norm": 2.3973608016967773, "learning_rate": 2.8286112401300513e-06, "loss": 0.8564, "step": 12484 }, { "epoch": 0.7455511764003344, "grad_norm": 2.5484278202056885, "learning_rate": 2.8279477141530094e-06, "loss": 0.8094, "step": 12485 }, { "epoch": 0.7456108921533501, "grad_norm": 1.775206446647644, "learning_rate": 2.827284188175967e-06, "loss": 0.8367, "step": 12486 }, { "epoch": 0.7456706079063657, "grad_norm": 3.0111498832702637, "learning_rate": 2.8266206621989252e-06, "loss": 0.7554, "step": 12487 }, { "epoch": 0.7457303236593813, "grad_norm": 2.183603048324585, "learning_rate": 2.8259571362218833e-06, "loss": 0.8449, "step": 12488 }, { "epoch": 0.745790039412397, "grad_norm": 2.237177610397339, "learning_rate": 2.8252936102448414e-06, "loss": 0.8529, "step": 12489 }, { "epoch": 0.7458497551654126, "grad_norm": 1.951371192932129, "learning_rate": 2.824630084267799e-06, "loss": 0.8348, "step": 12490 }, { "epoch": 0.7459094709184283, "grad_norm": 2.6357688903808594, "learning_rate": 2.823966558290757e-06, "loss": 0.8267, "step": 12491 }, { "epoch": 0.745969186671444, "grad_norm": 2.353048086166382, "learning_rate": 2.823303032313715e-06, "loss": 0.8078, "step": 12492 }, { "epoch": 0.7460289024244596, "grad_norm": 2.3175837993621826, "learning_rate": 2.8226395063366734e-06, "loss": 0.8052, "step": 12493 }, { "epoch": 0.7460886181774752, "grad_norm": 2.564127206802368, "learning_rate": 2.8219759803596315e-06, "loss": 0.8244, "step": 12494 }, { "epoch": 0.7461483339304908, "grad_norm": 1.7421234846115112, "learning_rate": 2.821312454382589e-06, "loss": 0.793, "step": 12495 }, { "epoch": 0.7462080496835065, "grad_norm": 1.7294756174087524, "learning_rate": 2.8206489284055473e-06, "loss": 0.7682, "step": 12496 }, { "epoch": 0.7462677654365222, "grad_norm": 2.520751953125, "learning_rate": 2.819985402428505e-06, "loss": 0.817, "step": 12497 }, { "epoch": 0.7463274811895378, "grad_norm": 1.8929123878479004, "learning_rate": 2.8193218764514635e-06, "loss": 0.8298, "step": 12498 }, { "epoch": 0.7463871969425534, "grad_norm": 2.1254115104675293, "learning_rate": 2.8186583504744216e-06, "loss": 0.7984, "step": 12499 }, { "epoch": 0.7464469126955691, "grad_norm": 2.3920133113861084, "learning_rate": 2.8179948244973793e-06, "loss": 0.7953, "step": 12500 }, { "epoch": 0.7465066284485847, "grad_norm": 1.7801562547683716, "learning_rate": 2.8173312985203374e-06, "loss": 0.8495, "step": 12501 }, { "epoch": 0.7465663442016004, "grad_norm": 2.2845823764801025, "learning_rate": 2.816667772543295e-06, "loss": 0.803, "step": 12502 }, { "epoch": 0.7466260599546161, "grad_norm": 2.4790873527526855, "learning_rate": 2.8160042465662536e-06, "loss": 0.836, "step": 12503 }, { "epoch": 0.7466857757076317, "grad_norm": 3.3631272315979004, "learning_rate": 2.8153407205892112e-06, "loss": 0.7957, "step": 12504 }, { "epoch": 0.7467454914606473, "grad_norm": 2.6289498805999756, "learning_rate": 2.8146771946121693e-06, "loss": 0.8233, "step": 12505 }, { "epoch": 0.746805207213663, "grad_norm": 3.4061999320983887, "learning_rate": 2.814013668635127e-06, "loss": 0.8431, "step": 12506 }, { "epoch": 0.7468649229666786, "grad_norm": 2.237454891204834, "learning_rate": 2.813350142658085e-06, "loss": 0.8183, "step": 12507 }, { "epoch": 0.7469246387196943, "grad_norm": 2.2953736782073975, "learning_rate": 2.8126866166810436e-06, "loss": 0.8305, "step": 12508 }, { "epoch": 0.7469843544727099, "grad_norm": 1.9553935527801514, "learning_rate": 2.8120230907040013e-06, "loss": 0.8421, "step": 12509 }, { "epoch": 0.7470440702257255, "grad_norm": 2.452942132949829, "learning_rate": 2.8113595647269594e-06, "loss": 0.8041, "step": 12510 }, { "epoch": 0.7471037859787412, "grad_norm": 1.9760710000991821, "learning_rate": 2.810696038749917e-06, "loss": 0.8583, "step": 12511 }, { "epoch": 0.7471635017317568, "grad_norm": 3.0454840660095215, "learning_rate": 2.810032512772875e-06, "loss": 0.8054, "step": 12512 }, { "epoch": 0.7472232174847725, "grad_norm": 2.0383262634277344, "learning_rate": 2.8093689867958333e-06, "loss": 0.8518, "step": 12513 }, { "epoch": 0.7472829332377882, "grad_norm": 2.5235397815704346, "learning_rate": 2.8087054608187914e-06, "loss": 0.8218, "step": 12514 }, { "epoch": 0.7473426489908038, "grad_norm": 2.9937021732330322, "learning_rate": 2.808041934841749e-06, "loss": 0.8773, "step": 12515 }, { "epoch": 0.7474023647438194, "grad_norm": 2.4808382987976074, "learning_rate": 2.807378408864707e-06, "loss": 0.8156, "step": 12516 }, { "epoch": 0.747462080496835, "grad_norm": 4.882264137268066, "learning_rate": 2.806714882887665e-06, "loss": 0.7837, "step": 12517 }, { "epoch": 0.7475217962498507, "grad_norm": 2.3999176025390625, "learning_rate": 2.8060513569106234e-06, "loss": 0.8256, "step": 12518 }, { "epoch": 0.7475815120028664, "grad_norm": 2.3434505462646484, "learning_rate": 2.8053878309335815e-06, "loss": 0.8321, "step": 12519 }, { "epoch": 0.747641227755882, "grad_norm": 3.0161736011505127, "learning_rate": 2.804724304956539e-06, "loss": 0.7743, "step": 12520 }, { "epoch": 0.7477009435088976, "grad_norm": 2.1814627647399902, "learning_rate": 2.8040607789794972e-06, "loss": 0.7813, "step": 12521 }, { "epoch": 0.7477606592619133, "grad_norm": 3.996551752090454, "learning_rate": 2.803397253002455e-06, "loss": 0.8411, "step": 12522 }, { "epoch": 0.7478203750149289, "grad_norm": 1.582633376121521, "learning_rate": 2.8027337270254134e-06, "loss": 0.8276, "step": 12523 }, { "epoch": 0.7478800907679446, "grad_norm": 2.1112444400787354, "learning_rate": 2.8020702010483715e-06, "loss": 0.8171, "step": 12524 }, { "epoch": 0.7479398065209603, "grad_norm": 1.971364140510559, "learning_rate": 2.8014066750713292e-06, "loss": 0.8247, "step": 12525 }, { "epoch": 0.7479995222739759, "grad_norm": 3.8755805492401123, "learning_rate": 2.8007431490942873e-06, "loss": 0.8182, "step": 12526 }, { "epoch": 0.7480592380269915, "grad_norm": 2.2562477588653564, "learning_rate": 2.800079623117245e-06, "loss": 0.844, "step": 12527 }, { "epoch": 0.7481189537800071, "grad_norm": 2.6735095977783203, "learning_rate": 2.7994160971402035e-06, "loss": 0.8376, "step": 12528 }, { "epoch": 0.7481786695330228, "grad_norm": 1.9572991132736206, "learning_rate": 2.798752571163161e-06, "loss": 0.8025, "step": 12529 }, { "epoch": 0.7482383852860385, "grad_norm": 2.672022581100464, "learning_rate": 2.7980890451861193e-06, "loss": 0.8231, "step": 12530 }, { "epoch": 0.7482981010390541, "grad_norm": 2.9300739765167236, "learning_rate": 2.797425519209077e-06, "loss": 0.8261, "step": 12531 }, { "epoch": 0.7483578167920697, "grad_norm": 1.9015171527862549, "learning_rate": 2.796761993232035e-06, "loss": 0.7864, "step": 12532 }, { "epoch": 0.7484175325450854, "grad_norm": 3.5853192806243896, "learning_rate": 2.7960984672549936e-06, "loss": 0.8846, "step": 12533 }, { "epoch": 0.748477248298101, "grad_norm": 2.102123975753784, "learning_rate": 2.7954349412779513e-06, "loss": 0.8149, "step": 12534 }, { "epoch": 0.7485369640511167, "grad_norm": 5.317927837371826, "learning_rate": 2.7947714153009094e-06, "loss": 0.8537, "step": 12535 }, { "epoch": 0.7485966798041324, "grad_norm": 2.082655429840088, "learning_rate": 2.794107889323867e-06, "loss": 0.8495, "step": 12536 }, { "epoch": 0.748656395557148, "grad_norm": 2.018796920776367, "learning_rate": 2.793444363346825e-06, "loss": 0.8368, "step": 12537 }, { "epoch": 0.7487161113101636, "grad_norm": 1.8871355056762695, "learning_rate": 2.7927808373697833e-06, "loss": 0.8586, "step": 12538 }, { "epoch": 0.7487758270631792, "grad_norm": 1.8148149251937866, "learning_rate": 2.7921173113927414e-06, "loss": 0.8049, "step": 12539 }, { "epoch": 0.7488355428161949, "grad_norm": 2.915327548980713, "learning_rate": 2.791453785415699e-06, "loss": 0.8464, "step": 12540 }, { "epoch": 0.7488952585692106, "grad_norm": 2.2402396202087402, "learning_rate": 2.790790259438657e-06, "loss": 0.8135, "step": 12541 }, { "epoch": 0.7489549743222262, "grad_norm": 2.0817837715148926, "learning_rate": 2.790126733461615e-06, "loss": 0.8433, "step": 12542 }, { "epoch": 0.7490146900752418, "grad_norm": 7.269124507904053, "learning_rate": 2.7894632074845733e-06, "loss": 0.8053, "step": 12543 }, { "epoch": 0.7490744058282575, "grad_norm": 2.2105600833892822, "learning_rate": 2.7887996815075314e-06, "loss": 0.8256, "step": 12544 }, { "epoch": 0.7491341215812731, "grad_norm": 2.067049026489258, "learning_rate": 2.788136155530489e-06, "loss": 0.8475, "step": 12545 }, { "epoch": 0.7491938373342888, "grad_norm": 2.273322582244873, "learning_rate": 2.7874726295534472e-06, "loss": 0.8356, "step": 12546 }, { "epoch": 0.7492535530873045, "grad_norm": 1.7698906660079956, "learning_rate": 2.786809103576405e-06, "loss": 0.832, "step": 12547 }, { "epoch": 0.7493132688403201, "grad_norm": 2.164353847503662, "learning_rate": 2.7861455775993634e-06, "loss": 0.7947, "step": 12548 }, { "epoch": 0.7493729845933357, "grad_norm": 2.318150043487549, "learning_rate": 2.7854820516223215e-06, "loss": 0.831, "step": 12549 }, { "epoch": 0.7494327003463513, "grad_norm": 3.899191379547119, "learning_rate": 2.784818525645279e-06, "loss": 0.8384, "step": 12550 }, { "epoch": 0.749492416099367, "grad_norm": 1.7885172367095947, "learning_rate": 2.7841549996682373e-06, "loss": 0.8411, "step": 12551 }, { "epoch": 0.7495521318523827, "grad_norm": 2.5315804481506348, "learning_rate": 2.783491473691195e-06, "loss": 0.8002, "step": 12552 }, { "epoch": 0.7496118476053983, "grad_norm": 1.9266754388809204, "learning_rate": 2.7828279477141535e-06, "loss": 0.8434, "step": 12553 }, { "epoch": 0.7496715633584139, "grad_norm": 2.379757881164551, "learning_rate": 2.782164421737111e-06, "loss": 0.8275, "step": 12554 }, { "epoch": 0.7497312791114296, "grad_norm": 2.3305583000183105, "learning_rate": 2.7815008957600693e-06, "loss": 0.836, "step": 12555 }, { "epoch": 0.7497909948644452, "grad_norm": 1.8192346096038818, "learning_rate": 2.780837369783027e-06, "loss": 0.8283, "step": 12556 }, { "epoch": 0.7498507106174609, "grad_norm": 1.9015791416168213, "learning_rate": 2.780173843805985e-06, "loss": 0.8095, "step": 12557 }, { "epoch": 0.7499104263704766, "grad_norm": 1.9025812149047852, "learning_rate": 2.7795103178289436e-06, "loss": 0.8114, "step": 12558 }, { "epoch": 0.7499701421234922, "grad_norm": 3.090636968612671, "learning_rate": 2.7788467918519013e-06, "loss": 0.8141, "step": 12559 }, { "epoch": 0.7500298578765078, "grad_norm": 2.575493097305298, "learning_rate": 2.7781832658748594e-06, "loss": 0.8703, "step": 12560 }, { "epoch": 0.7500895736295234, "grad_norm": 1.8024362325668335, "learning_rate": 2.777519739897817e-06, "loss": 0.8386, "step": 12561 }, { "epoch": 0.7501492893825391, "grad_norm": 1.7905548810958862, "learning_rate": 2.776856213920775e-06, "loss": 0.8354, "step": 12562 }, { "epoch": 0.7502090051355548, "grad_norm": 2.0616631507873535, "learning_rate": 2.7761926879437332e-06, "loss": 0.7809, "step": 12563 }, { "epoch": 0.7502687208885704, "grad_norm": 2.157543182373047, "learning_rate": 2.7755291619666913e-06, "loss": 0.855, "step": 12564 }, { "epoch": 0.7503284366415861, "grad_norm": 2.6119701862335205, "learning_rate": 2.774865635989649e-06, "loss": 0.7996, "step": 12565 }, { "epoch": 0.7503881523946017, "grad_norm": 2.32218599319458, "learning_rate": 2.774202110012607e-06, "loss": 0.8249, "step": 12566 }, { "epoch": 0.7504478681476173, "grad_norm": 1.566616177558899, "learning_rate": 2.7735385840355648e-06, "loss": 0.797, "step": 12567 }, { "epoch": 0.750507583900633, "grad_norm": 2.03849458694458, "learning_rate": 2.7728750580585233e-06, "loss": 0.8164, "step": 12568 }, { "epoch": 0.7505672996536487, "grad_norm": 2.380910634994507, "learning_rate": 2.7722115320814814e-06, "loss": 0.8051, "step": 12569 }, { "epoch": 0.7506270154066643, "grad_norm": 2.1212007999420166, "learning_rate": 2.771548006104439e-06, "loss": 0.818, "step": 12570 }, { "epoch": 0.7506867311596799, "grad_norm": 2.753734588623047, "learning_rate": 2.770884480127397e-06, "loss": 0.8049, "step": 12571 }, { "epoch": 0.7507464469126955, "grad_norm": 2.217268705368042, "learning_rate": 2.770220954150355e-06, "loss": 0.8348, "step": 12572 }, { "epoch": 0.7508061626657112, "grad_norm": 2.233187675476074, "learning_rate": 2.7695574281733134e-06, "loss": 0.8442, "step": 12573 }, { "epoch": 0.7508658784187269, "grad_norm": 1.7368484735488892, "learning_rate": 2.7688939021962715e-06, "loss": 0.8198, "step": 12574 }, { "epoch": 0.7509255941717425, "grad_norm": 4.771996974945068, "learning_rate": 2.768230376219229e-06, "loss": 0.8391, "step": 12575 }, { "epoch": 0.7509853099247582, "grad_norm": 2.5123651027679443, "learning_rate": 2.7675668502421873e-06, "loss": 0.8364, "step": 12576 }, { "epoch": 0.7510450256777738, "grad_norm": 3.0090503692626953, "learning_rate": 2.766903324265145e-06, "loss": 0.8115, "step": 12577 }, { "epoch": 0.7511047414307894, "grad_norm": 1.8275337219238281, "learning_rate": 2.7662397982881035e-06, "loss": 0.796, "step": 12578 }, { "epoch": 0.7511644571838051, "grad_norm": 2.3818464279174805, "learning_rate": 2.765576272311061e-06, "loss": 0.8233, "step": 12579 }, { "epoch": 0.7512241729368208, "grad_norm": 1.6524947881698608, "learning_rate": 2.7649127463340192e-06, "loss": 0.8358, "step": 12580 }, { "epoch": 0.7512838886898364, "grad_norm": 2.221134901046753, "learning_rate": 2.764249220356977e-06, "loss": 0.8344, "step": 12581 }, { "epoch": 0.751343604442852, "grad_norm": 3.209496021270752, "learning_rate": 2.763585694379935e-06, "loss": 0.806, "step": 12582 }, { "epoch": 0.7514033201958676, "grad_norm": 2.1573588848114014, "learning_rate": 2.7629221684028935e-06, "loss": 0.824, "step": 12583 }, { "epoch": 0.7514630359488833, "grad_norm": 2.7516753673553467, "learning_rate": 2.7622586424258512e-06, "loss": 0.8227, "step": 12584 }, { "epoch": 0.751522751701899, "grad_norm": 4.672298908233643, "learning_rate": 2.7615951164488093e-06, "loss": 0.8072, "step": 12585 }, { "epoch": 0.7515824674549146, "grad_norm": 1.990301251411438, "learning_rate": 2.760931590471767e-06, "loss": 0.8447, "step": 12586 }, { "epoch": 0.7516421832079303, "grad_norm": 1.942039132118225, "learning_rate": 2.760268064494725e-06, "loss": 0.85, "step": 12587 }, { "epoch": 0.7517018989609459, "grad_norm": 1.8318705558776855, "learning_rate": 2.759604538517683e-06, "loss": 0.8563, "step": 12588 }, { "epoch": 0.7517616147139615, "grad_norm": 2.5987184047698975, "learning_rate": 2.7589410125406413e-06, "loss": 0.8174, "step": 12589 }, { "epoch": 0.7518213304669772, "grad_norm": 2.44158935546875, "learning_rate": 2.758277486563599e-06, "loss": 0.8098, "step": 12590 }, { "epoch": 0.7518810462199929, "grad_norm": 2.671379804611206, "learning_rate": 2.757613960586557e-06, "loss": 0.8398, "step": 12591 }, { "epoch": 0.7519407619730085, "grad_norm": 1.6788948774337769, "learning_rate": 2.7569504346095148e-06, "loss": 0.8016, "step": 12592 }, { "epoch": 0.7520004777260241, "grad_norm": 1.9706695079803467, "learning_rate": 2.7562869086324733e-06, "loss": 0.8262, "step": 12593 }, { "epoch": 0.7520601934790397, "grad_norm": 1.6654720306396484, "learning_rate": 2.7556233826554314e-06, "loss": 0.7992, "step": 12594 }, { "epoch": 0.7521199092320554, "grad_norm": 2.2648890018463135, "learning_rate": 2.754959856678389e-06, "loss": 0.7881, "step": 12595 }, { "epoch": 0.7521796249850711, "grad_norm": 1.5871422290802002, "learning_rate": 2.754296330701347e-06, "loss": 0.8418, "step": 12596 }, { "epoch": 0.7522393407380867, "grad_norm": 2.3153274059295654, "learning_rate": 2.753632804724305e-06, "loss": 0.8143, "step": 12597 }, { "epoch": 0.7522990564911024, "grad_norm": 2.874941110610962, "learning_rate": 2.7529692787472634e-06, "loss": 0.8251, "step": 12598 }, { "epoch": 0.752358772244118, "grad_norm": 2.231367349624634, "learning_rate": 2.7523057527702215e-06, "loss": 0.8176, "step": 12599 }, { "epoch": 0.7524184879971336, "grad_norm": 2.749082088470459, "learning_rate": 2.751642226793179e-06, "loss": 0.8227, "step": 12600 }, { "epoch": 0.7524184879971336, "eval_text_loss": 0.8941822052001953, "eval_text_runtime": 15.1748, "eval_text_samples_per_second": 263.595, "eval_text_steps_per_second": 0.527, "step": 12600 }, { "epoch": 0.7524184879971336, "eval_image_loss": 0.5984588265419006, "eval_image_runtime": 5.0054, "eval_image_samples_per_second": 799.144, "eval_image_steps_per_second": 1.598, "step": 12600 }, { "epoch": 0.7524184879971336, "eval_video_loss": 1.0296838283538818, "eval_video_runtime": 77.0547, "eval_video_samples_per_second": 51.911, "eval_video_steps_per_second": 0.104, "step": 12600 }, { "epoch": 0.7524782037501493, "grad_norm": 1.8554109334945679, "learning_rate": 2.7509787008161372e-06, "loss": 0.812, "step": 12601 }, { "epoch": 0.752537919503165, "grad_norm": 2.421967029571533, "learning_rate": 2.750315174839095e-06, "loss": 0.8247, "step": 12602 }, { "epoch": 0.7525976352561806, "grad_norm": 1.7359352111816406, "learning_rate": 2.7496516488620534e-06, "loss": 0.8652, "step": 12603 }, { "epoch": 0.7526573510091962, "grad_norm": 3.2645413875579834, "learning_rate": 2.748988122885011e-06, "loss": 0.7986, "step": 12604 }, { "epoch": 0.7527170667622118, "grad_norm": 3.2787508964538574, "learning_rate": 2.7483245969079692e-06, "loss": 0.816, "step": 12605 }, { "epoch": 0.7527767825152275, "grad_norm": 4.554436206817627, "learning_rate": 2.747661070930927e-06, "loss": 0.8322, "step": 12606 }, { "epoch": 0.7528364982682432, "grad_norm": 1.6737174987792969, "learning_rate": 2.746997544953885e-06, "loss": 0.8048, "step": 12607 }, { "epoch": 0.7528962140212588, "grad_norm": 2.347301959991455, "learning_rate": 2.7463340189768435e-06, "loss": 0.7944, "step": 12608 }, { "epoch": 0.7529559297742745, "grad_norm": 3.514418840408325, "learning_rate": 2.745670492999801e-06, "loss": 0.8186, "step": 12609 }, { "epoch": 0.7530156455272901, "grad_norm": 2.367945432662964, "learning_rate": 2.7450069670227593e-06, "loss": 0.8014, "step": 12610 }, { "epoch": 0.7530753612803057, "grad_norm": 2.311431884765625, "learning_rate": 2.744343441045717e-06, "loss": 0.8184, "step": 12611 }, { "epoch": 0.7531350770333214, "grad_norm": 2.8026466369628906, "learning_rate": 2.743679915068675e-06, "loss": 0.8285, "step": 12612 }, { "epoch": 0.753194792786337, "grad_norm": 2.600043773651123, "learning_rate": 2.743016389091633e-06, "loss": 0.8418, "step": 12613 }, { "epoch": 0.7532545085393527, "grad_norm": 1.6746368408203125, "learning_rate": 2.7423528631145913e-06, "loss": 0.7838, "step": 12614 }, { "epoch": 0.7533142242923683, "grad_norm": 3.162842273712158, "learning_rate": 2.741689337137549e-06, "loss": 0.815, "step": 12615 }, { "epoch": 0.7533739400453839, "grad_norm": 2.1988677978515625, "learning_rate": 2.741025811160507e-06, "loss": 0.8663, "step": 12616 }, { "epoch": 0.7534336557983996, "grad_norm": 2.2266693115234375, "learning_rate": 2.7403622851834647e-06, "loss": 0.8429, "step": 12617 }, { "epoch": 0.7534933715514153, "grad_norm": 2.0903170108795166, "learning_rate": 2.7396987592064233e-06, "loss": 0.8067, "step": 12618 }, { "epoch": 0.7535530873044309, "grad_norm": 2.0803143978118896, "learning_rate": 2.7390352332293814e-06, "loss": 0.845, "step": 12619 }, { "epoch": 0.7536128030574466, "grad_norm": 2.1467559337615967, "learning_rate": 2.738371707252339e-06, "loss": 0.8573, "step": 12620 }, { "epoch": 0.7536725188104622, "grad_norm": 4.137945175170898, "learning_rate": 2.737708181275297e-06, "loss": 0.8218, "step": 12621 }, { "epoch": 0.7537322345634778, "grad_norm": 1.6585739850997925, "learning_rate": 2.737044655298255e-06, "loss": 0.7986, "step": 12622 }, { "epoch": 0.7537919503164935, "grad_norm": 2.1753830909729004, "learning_rate": 2.7363811293212133e-06, "loss": 0.8331, "step": 12623 }, { "epoch": 0.7538516660695092, "grad_norm": 2.0195682048797607, "learning_rate": 2.7357176033441714e-06, "loss": 0.8181, "step": 12624 }, { "epoch": 0.7539113818225248, "grad_norm": 1.9528217315673828, "learning_rate": 2.735054077367129e-06, "loss": 0.8087, "step": 12625 }, { "epoch": 0.7539710975755404, "grad_norm": 1.7408231496810913, "learning_rate": 2.734390551390087e-06, "loss": 0.8081, "step": 12626 }, { "epoch": 0.754030813328556, "grad_norm": 1.7342642545700073, "learning_rate": 2.733727025413045e-06, "loss": 0.845, "step": 12627 }, { "epoch": 0.7540905290815717, "grad_norm": 2.3893256187438965, "learning_rate": 2.7330634994360034e-06, "loss": 0.8394, "step": 12628 }, { "epoch": 0.7541502448345874, "grad_norm": 2.0372531414031982, "learning_rate": 2.732399973458961e-06, "loss": 0.865, "step": 12629 }, { "epoch": 0.754209960587603, "grad_norm": 3.452362298965454, "learning_rate": 2.731736447481919e-06, "loss": 0.8066, "step": 12630 }, { "epoch": 0.7542696763406187, "grad_norm": 2.9299228191375732, "learning_rate": 2.731072921504877e-06, "loss": 0.8269, "step": 12631 }, { "epoch": 0.7543293920936343, "grad_norm": 2.8452374935150146, "learning_rate": 2.730409395527835e-06, "loss": 0.8837, "step": 12632 }, { "epoch": 0.7543891078466499, "grad_norm": 2.0271615982055664, "learning_rate": 2.7297458695507935e-06, "loss": 0.7949, "step": 12633 }, { "epoch": 0.7544488235996656, "grad_norm": 1.7756187915802002, "learning_rate": 2.729082343573751e-06, "loss": 0.8597, "step": 12634 }, { "epoch": 0.7545085393526813, "grad_norm": 2.9820027351379395, "learning_rate": 2.7284188175967093e-06, "loss": 0.8595, "step": 12635 }, { "epoch": 0.7545682551056969, "grad_norm": 2.35312557220459, "learning_rate": 2.727755291619667e-06, "loss": 0.8427, "step": 12636 }, { "epoch": 0.7546279708587126, "grad_norm": 3.567713737487793, "learning_rate": 2.727091765642625e-06, "loss": 0.8194, "step": 12637 }, { "epoch": 0.7546876866117281, "grad_norm": 1.6961092948913574, "learning_rate": 2.726428239665583e-06, "loss": 0.8025, "step": 12638 }, { "epoch": 0.7547474023647438, "grad_norm": 2.074038028717041, "learning_rate": 2.7257647136885412e-06, "loss": 0.8659, "step": 12639 }, { "epoch": 0.7548071181177595, "grad_norm": 2.737194299697876, "learning_rate": 2.725101187711499e-06, "loss": 0.7979, "step": 12640 }, { "epoch": 0.7548668338707751, "grad_norm": 2.8097245693206787, "learning_rate": 2.724437661734457e-06, "loss": 0.8473, "step": 12641 }, { "epoch": 0.7549265496237908, "grad_norm": 1.9753491878509521, "learning_rate": 2.7237741357574147e-06, "loss": 0.8134, "step": 12642 }, { "epoch": 0.7549862653768064, "grad_norm": 1.6973730325698853, "learning_rate": 2.7231106097803732e-06, "loss": 0.8592, "step": 12643 }, { "epoch": 0.755045981129822, "grad_norm": 2.201303005218506, "learning_rate": 2.7224470838033313e-06, "loss": 0.8423, "step": 12644 }, { "epoch": 0.7551056968828377, "grad_norm": 2.8545143604278564, "learning_rate": 2.721783557826289e-06, "loss": 0.8287, "step": 12645 }, { "epoch": 0.7551654126358534, "grad_norm": 2.0030126571655273, "learning_rate": 2.721120031849247e-06, "loss": 0.8227, "step": 12646 }, { "epoch": 0.755225128388869, "grad_norm": 2.3943281173706055, "learning_rate": 2.7204565058722048e-06, "loss": 0.8079, "step": 12647 }, { "epoch": 0.7552848441418847, "grad_norm": 2.1830055713653564, "learning_rate": 2.7197929798951633e-06, "loss": 0.8042, "step": 12648 }, { "epoch": 0.7553445598949002, "grad_norm": 1.76835036277771, "learning_rate": 2.7191294539181214e-06, "loss": 0.7954, "step": 12649 }, { "epoch": 0.7554042756479159, "grad_norm": 1.5468635559082031, "learning_rate": 2.718465927941079e-06, "loss": 0.8128, "step": 12650 }, { "epoch": 0.7554639914009316, "grad_norm": 2.262289524078369, "learning_rate": 2.717802401964037e-06, "loss": 0.8222, "step": 12651 }, { "epoch": 0.7555237071539472, "grad_norm": 1.9950369596481323, "learning_rate": 2.717138875986995e-06, "loss": 0.8252, "step": 12652 }, { "epoch": 0.7555834229069629, "grad_norm": 1.8284095525741577, "learning_rate": 2.7164753500099534e-06, "loss": 0.8289, "step": 12653 }, { "epoch": 0.7556431386599785, "grad_norm": 3.978799343109131, "learning_rate": 2.715811824032911e-06, "loss": 0.8121, "step": 12654 }, { "epoch": 0.7557028544129941, "grad_norm": 1.5466630458831787, "learning_rate": 2.715148298055869e-06, "loss": 0.8179, "step": 12655 }, { "epoch": 0.7557625701660098, "grad_norm": 1.633447289466858, "learning_rate": 2.714484772078827e-06, "loss": 0.8223, "step": 12656 }, { "epoch": 0.7558222859190254, "grad_norm": 3.189040422439575, "learning_rate": 2.713821246101785e-06, "loss": 0.8075, "step": 12657 }, { "epoch": 0.7558820016720411, "grad_norm": 3.31484317779541, "learning_rate": 2.7131577201247435e-06, "loss": 0.8639, "step": 12658 }, { "epoch": 0.7559417174250568, "grad_norm": 2.3271172046661377, "learning_rate": 2.712494194147701e-06, "loss": 0.8391, "step": 12659 }, { "epoch": 0.7560014331780723, "grad_norm": 2.582163095474243, "learning_rate": 2.7118306681706592e-06, "loss": 0.8483, "step": 12660 }, { "epoch": 0.756061148931088, "grad_norm": 2.25443172454834, "learning_rate": 2.711167142193617e-06, "loss": 0.8469, "step": 12661 }, { "epoch": 0.7561208646841037, "grad_norm": 1.9740632772445679, "learning_rate": 2.710503616216575e-06, "loss": 0.7998, "step": 12662 }, { "epoch": 0.7561805804371193, "grad_norm": 2.6361403465270996, "learning_rate": 2.709840090239533e-06, "loss": 0.8359, "step": 12663 }, { "epoch": 0.756240296190135, "grad_norm": 2.0994205474853516, "learning_rate": 2.7091765642624912e-06, "loss": 0.8215, "step": 12664 }, { "epoch": 0.7563000119431506, "grad_norm": 6.491690635681152, "learning_rate": 2.708513038285449e-06, "loss": 0.8017, "step": 12665 }, { "epoch": 0.7563597276961662, "grad_norm": 2.238917589187622, "learning_rate": 2.707849512308407e-06, "loss": 0.7899, "step": 12666 }, { "epoch": 0.7564194434491819, "grad_norm": 2.4534857273101807, "learning_rate": 2.7071859863313647e-06, "loss": 0.8644, "step": 12667 }, { "epoch": 0.7564791592021975, "grad_norm": 2.542654275894165, "learning_rate": 2.706522460354323e-06, "loss": 0.8806, "step": 12668 }, { "epoch": 0.7565388749552132, "grad_norm": 2.2450149059295654, "learning_rate": 2.7058589343772813e-06, "loss": 0.8566, "step": 12669 }, { "epoch": 0.7565985907082289, "grad_norm": 2.168100357055664, "learning_rate": 2.705195408400239e-06, "loss": 0.8242, "step": 12670 }, { "epoch": 0.7566583064612444, "grad_norm": 1.9913971424102783, "learning_rate": 2.704531882423197e-06, "loss": 0.8057, "step": 12671 }, { "epoch": 0.7567180222142601, "grad_norm": 1.9248521327972412, "learning_rate": 2.7038683564461547e-06, "loss": 0.8302, "step": 12672 }, { "epoch": 0.7567777379672758, "grad_norm": 2.049492835998535, "learning_rate": 2.7032048304691133e-06, "loss": 0.8444, "step": 12673 }, { "epoch": 0.7568374537202914, "grad_norm": 1.726646900177002, "learning_rate": 2.7025413044920714e-06, "loss": 0.8243, "step": 12674 }, { "epoch": 0.7568971694733071, "grad_norm": 1.6570991277694702, "learning_rate": 2.701877778515029e-06, "loss": 0.8066, "step": 12675 }, { "epoch": 0.7569568852263227, "grad_norm": 1.6801402568817139, "learning_rate": 2.701214252537987e-06, "loss": 0.8263, "step": 12676 }, { "epoch": 0.7570166009793383, "grad_norm": 4.647033214569092, "learning_rate": 2.700550726560945e-06, "loss": 0.8008, "step": 12677 }, { "epoch": 0.757076316732354, "grad_norm": 2.8068106174468994, "learning_rate": 2.6998872005839034e-06, "loss": 0.8393, "step": 12678 }, { "epoch": 0.7571360324853696, "grad_norm": 1.7490683794021606, "learning_rate": 2.699223674606861e-06, "loss": 0.8431, "step": 12679 }, { "epoch": 0.7571957482383853, "grad_norm": 4.412002086639404, "learning_rate": 2.698560148629819e-06, "loss": 0.8223, "step": 12680 }, { "epoch": 0.757255463991401, "grad_norm": 1.5770288705825806, "learning_rate": 2.697896622652777e-06, "loss": 0.8254, "step": 12681 }, { "epoch": 0.7573151797444165, "grad_norm": 1.8286538124084473, "learning_rate": 2.697233096675735e-06, "loss": 0.826, "step": 12682 }, { "epoch": 0.7573748954974322, "grad_norm": 2.02838134765625, "learning_rate": 2.6965695706986934e-06, "loss": 0.821, "step": 12683 }, { "epoch": 0.7574346112504479, "grad_norm": 3.3405778408050537, "learning_rate": 2.695906044721651e-06, "loss": 0.8607, "step": 12684 }, { "epoch": 0.7574943270034635, "grad_norm": 2.4380393028259277, "learning_rate": 2.695242518744609e-06, "loss": 0.8295, "step": 12685 }, { "epoch": 0.7575540427564792, "grad_norm": 2.2167716026306152, "learning_rate": 2.694578992767567e-06, "loss": 0.8241, "step": 12686 }, { "epoch": 0.7576137585094948, "grad_norm": 2.1140787601470947, "learning_rate": 2.693915466790525e-06, "loss": 0.8119, "step": 12687 }, { "epoch": 0.7576734742625104, "grad_norm": 3.1704885959625244, "learning_rate": 2.693251940813483e-06, "loss": 0.8469, "step": 12688 }, { "epoch": 0.7577331900155261, "grad_norm": 2.6834566593170166, "learning_rate": 2.692588414836441e-06, "loss": 0.8355, "step": 12689 }, { "epoch": 0.7577929057685417, "grad_norm": 2.5594279766082764, "learning_rate": 2.691924888859399e-06, "loss": 0.8456, "step": 12690 }, { "epoch": 0.7578526215215574, "grad_norm": 2.1455891132354736, "learning_rate": 2.691261362882357e-06, "loss": 0.8237, "step": 12691 }, { "epoch": 0.7579123372745731, "grad_norm": 2.096825361251831, "learning_rate": 2.6905978369053146e-06, "loss": 0.8482, "step": 12692 }, { "epoch": 0.7579720530275886, "grad_norm": 2.058699369430542, "learning_rate": 2.689934310928273e-06, "loss": 0.838, "step": 12693 }, { "epoch": 0.7580317687806043, "grad_norm": 1.5935848951339722, "learning_rate": 2.6892707849512313e-06, "loss": 0.8443, "step": 12694 }, { "epoch": 0.75809148453362, "grad_norm": 2.64803409576416, "learning_rate": 2.688607258974189e-06, "loss": 0.8253, "step": 12695 }, { "epoch": 0.7581512002866356, "grad_norm": 2.873927354812622, "learning_rate": 2.687943732997147e-06, "loss": 0.8478, "step": 12696 }, { "epoch": 0.7582109160396513, "grad_norm": 1.9991744756698608, "learning_rate": 2.6872802070201047e-06, "loss": 0.8548, "step": 12697 }, { "epoch": 0.758270631792667, "grad_norm": 3.52268385887146, "learning_rate": 2.6866166810430632e-06, "loss": 0.822, "step": 12698 }, { "epoch": 0.7583303475456825, "grad_norm": 1.958811640739441, "learning_rate": 2.6859531550660213e-06, "loss": 0.8562, "step": 12699 }, { "epoch": 0.7583900632986982, "grad_norm": 1.7008355855941772, "learning_rate": 2.685289629088979e-06, "loss": 0.8402, "step": 12700 }, { "epoch": 0.7584497790517138, "grad_norm": 2.0039987564086914, "learning_rate": 2.684626103111937e-06, "loss": 0.8139, "step": 12701 }, { "epoch": 0.7585094948047295, "grad_norm": 2.4424142837524414, "learning_rate": 2.683962577134895e-06, "loss": 0.8561, "step": 12702 }, { "epoch": 0.7585692105577452, "grad_norm": 2.255880832672119, "learning_rate": 2.6832990511578533e-06, "loss": 0.8312, "step": 12703 }, { "epoch": 0.7586289263107607, "grad_norm": 2.3318164348602295, "learning_rate": 2.682635525180811e-06, "loss": 0.8116, "step": 12704 }, { "epoch": 0.7586886420637764, "grad_norm": 1.871628999710083, "learning_rate": 2.681971999203769e-06, "loss": 0.8162, "step": 12705 }, { "epoch": 0.7587483578167921, "grad_norm": 2.941251039505005, "learning_rate": 2.6813084732267268e-06, "loss": 0.8212, "step": 12706 }, { "epoch": 0.7588080735698077, "grad_norm": 7.259409427642822, "learning_rate": 2.680644947249685e-06, "loss": 0.8422, "step": 12707 }, { "epoch": 0.7588677893228234, "grad_norm": 1.7971688508987427, "learning_rate": 2.6799814212726434e-06, "loss": 0.7933, "step": 12708 }, { "epoch": 0.7589275050758391, "grad_norm": 2.5313618183135986, "learning_rate": 2.679317895295601e-06, "loss": 0.8236, "step": 12709 }, { "epoch": 0.7589872208288546, "grad_norm": 1.6888346672058105, "learning_rate": 2.678654369318559e-06, "loss": 0.8046, "step": 12710 }, { "epoch": 0.7590469365818703, "grad_norm": 2.3313069343566895, "learning_rate": 2.677990843341517e-06, "loss": 0.8073, "step": 12711 }, { "epoch": 0.759106652334886, "grad_norm": 2.54134202003479, "learning_rate": 2.677327317364475e-06, "loss": 0.801, "step": 12712 }, { "epoch": 0.7591663680879016, "grad_norm": 6.140650749206543, "learning_rate": 2.676663791387433e-06, "loss": 0.8251, "step": 12713 }, { "epoch": 0.7592260838409173, "grad_norm": 2.1758639812469482, "learning_rate": 2.676000265410391e-06, "loss": 0.8064, "step": 12714 }, { "epoch": 0.7592857995939328, "grad_norm": 1.6401152610778809, "learning_rate": 2.675336739433349e-06, "loss": 0.8207, "step": 12715 }, { "epoch": 0.7593455153469485, "grad_norm": 1.595550298690796, "learning_rate": 2.674673213456307e-06, "loss": 0.8497, "step": 12716 }, { "epoch": 0.7594052310999642, "grad_norm": 1.7017379999160767, "learning_rate": 2.6740096874792646e-06, "loss": 0.8395, "step": 12717 }, { "epoch": 0.7594649468529798, "grad_norm": 2.621919631958008, "learning_rate": 2.673346161502223e-06, "loss": 0.8435, "step": 12718 }, { "epoch": 0.7595246626059955, "grad_norm": 1.7760580778121948, "learning_rate": 2.6726826355251812e-06, "loss": 0.8431, "step": 12719 }, { "epoch": 0.7595843783590112, "grad_norm": 2.2988433837890625, "learning_rate": 2.672019109548139e-06, "loss": 0.8051, "step": 12720 }, { "epoch": 0.7596440941120267, "grad_norm": 1.974216103553772, "learning_rate": 2.671355583571097e-06, "loss": 0.8192, "step": 12721 }, { "epoch": 0.7597038098650424, "grad_norm": 2.5372023582458496, "learning_rate": 2.6706920575940547e-06, "loss": 0.8395, "step": 12722 }, { "epoch": 0.759763525618058, "grad_norm": 1.9413472414016724, "learning_rate": 2.6700285316170132e-06, "loss": 0.799, "step": 12723 }, { "epoch": 0.7598232413710737, "grad_norm": 2.2737412452697754, "learning_rate": 2.6693650056399713e-06, "loss": 0.8198, "step": 12724 }, { "epoch": 0.7598829571240894, "grad_norm": 1.9399141073226929, "learning_rate": 2.668701479662929e-06, "loss": 0.8403, "step": 12725 }, { "epoch": 0.7599426728771049, "grad_norm": 2.6484107971191406, "learning_rate": 2.668037953685887e-06, "loss": 0.8074, "step": 12726 }, { "epoch": 0.7600023886301206, "grad_norm": 1.6864198446273804, "learning_rate": 2.6673744277088448e-06, "loss": 0.8221, "step": 12727 }, { "epoch": 0.7600621043831363, "grad_norm": 6.243162155151367, "learning_rate": 2.6667109017318033e-06, "loss": 0.8373, "step": 12728 }, { "epoch": 0.7601218201361519, "grad_norm": 3.54563570022583, "learning_rate": 2.666047375754761e-06, "loss": 0.8358, "step": 12729 }, { "epoch": 0.7601815358891676, "grad_norm": 2.967360734939575, "learning_rate": 2.665383849777719e-06, "loss": 0.8261, "step": 12730 }, { "epoch": 0.7602412516421833, "grad_norm": 2.00728702545166, "learning_rate": 2.6647203238006767e-06, "loss": 0.8503, "step": 12731 }, { "epoch": 0.7603009673951988, "grad_norm": 1.7368026971817017, "learning_rate": 2.664056797823635e-06, "loss": 0.8395, "step": 12732 }, { "epoch": 0.7603606831482145, "grad_norm": 2.481130838394165, "learning_rate": 2.6633932718465934e-06, "loss": 0.8211, "step": 12733 }, { "epoch": 0.7604203989012301, "grad_norm": 2.12583327293396, "learning_rate": 2.662729745869551e-06, "loss": 0.8543, "step": 12734 }, { "epoch": 0.7604801146542458, "grad_norm": 2.552173376083374, "learning_rate": 2.662066219892509e-06, "loss": 0.821, "step": 12735 }, { "epoch": 0.7605398304072615, "grad_norm": 1.774111270904541, "learning_rate": 2.661402693915467e-06, "loss": 0.8307, "step": 12736 }, { "epoch": 0.760599546160277, "grad_norm": 2.296297073364258, "learning_rate": 2.660739167938425e-06, "loss": 0.8477, "step": 12737 }, { "epoch": 0.7606592619132927, "grad_norm": 2.3625130653381348, "learning_rate": 2.660075641961383e-06, "loss": 0.8587, "step": 12738 }, { "epoch": 0.7607189776663084, "grad_norm": 2.8818094730377197, "learning_rate": 2.659412115984341e-06, "loss": 0.8117, "step": 12739 }, { "epoch": 0.760778693419324, "grad_norm": 2.110051155090332, "learning_rate": 2.658748590007299e-06, "loss": 0.8261, "step": 12740 }, { "epoch": 0.7608384091723397, "grad_norm": 3.89113450050354, "learning_rate": 2.658085064030257e-06, "loss": 0.8268, "step": 12741 }, { "epoch": 0.7608981249253554, "grad_norm": 3.7006659507751465, "learning_rate": 2.6574215380532146e-06, "loss": 0.7962, "step": 12742 }, { "epoch": 0.7609578406783709, "grad_norm": 2.6063458919525146, "learning_rate": 2.656758012076173e-06, "loss": 0.7979, "step": 12743 }, { "epoch": 0.7610175564313866, "grad_norm": 2.4618144035339355, "learning_rate": 2.656094486099131e-06, "loss": 0.849, "step": 12744 }, { "epoch": 0.7610772721844022, "grad_norm": 2.3854286670684814, "learning_rate": 2.655430960122089e-06, "loss": 0.8085, "step": 12745 }, { "epoch": 0.7611369879374179, "grad_norm": 2.4013047218322754, "learning_rate": 2.654767434145047e-06, "loss": 0.8534, "step": 12746 }, { "epoch": 0.7611967036904336, "grad_norm": 2.087891101837158, "learning_rate": 2.6541039081680047e-06, "loss": 0.8326, "step": 12747 }, { "epoch": 0.7612564194434491, "grad_norm": 2.754004716873169, "learning_rate": 2.653440382190963e-06, "loss": 0.887, "step": 12748 }, { "epoch": 0.7613161351964648, "grad_norm": 2.1803979873657227, "learning_rate": 2.6527768562139213e-06, "loss": 0.8025, "step": 12749 }, { "epoch": 0.7613758509494805, "grad_norm": 2.193901777267456, "learning_rate": 2.652113330236879e-06, "loss": 0.8259, "step": 12750 }, { "epoch": 0.7614355667024961, "grad_norm": 2.535186767578125, "learning_rate": 2.651449804259837e-06, "loss": 0.8159, "step": 12751 }, { "epoch": 0.7614952824555118, "grad_norm": 2.7927987575531006, "learning_rate": 2.6507862782827947e-06, "loss": 0.8342, "step": 12752 }, { "epoch": 0.7615549982085275, "grad_norm": 2.3118984699249268, "learning_rate": 2.6501227523057533e-06, "loss": 0.8811, "step": 12753 }, { "epoch": 0.761614713961543, "grad_norm": 7.880763053894043, "learning_rate": 2.649459226328711e-06, "loss": 0.8436, "step": 12754 }, { "epoch": 0.7616744297145587, "grad_norm": 3.0503861904144287, "learning_rate": 2.648795700351669e-06, "loss": 0.8259, "step": 12755 }, { "epoch": 0.7617341454675743, "grad_norm": 2.33794903755188, "learning_rate": 2.6481321743746267e-06, "loss": 0.8259, "step": 12756 }, { "epoch": 0.76179386122059, "grad_norm": 2.109099864959717, "learning_rate": 2.647468648397585e-06, "loss": 0.8209, "step": 12757 }, { "epoch": 0.7618535769736057, "grad_norm": 1.8274978399276733, "learning_rate": 2.6468051224205433e-06, "loss": 0.8165, "step": 12758 }, { "epoch": 0.7619132927266212, "grad_norm": 1.8841365575790405, "learning_rate": 2.646141596443501e-06, "loss": 0.8332, "step": 12759 }, { "epoch": 0.7619730084796369, "grad_norm": 1.6588329076766968, "learning_rate": 2.645478070466459e-06, "loss": 0.8333, "step": 12760 }, { "epoch": 0.7620327242326526, "grad_norm": 2.3549861907958984, "learning_rate": 2.644814544489417e-06, "loss": 0.8725, "step": 12761 }, { "epoch": 0.7620924399856682, "grad_norm": 2.3422460556030273, "learning_rate": 2.644151018512375e-06, "loss": 0.826, "step": 12762 }, { "epoch": 0.7621521557386839, "grad_norm": 4.6898345947265625, "learning_rate": 2.6434874925353334e-06, "loss": 0.7894, "step": 12763 }, { "epoch": 0.7622118714916996, "grad_norm": 1.6935888528823853, "learning_rate": 2.642823966558291e-06, "loss": 0.8195, "step": 12764 }, { "epoch": 0.7622715872447151, "grad_norm": 1.8838070631027222, "learning_rate": 2.6421604405812488e-06, "loss": 0.817, "step": 12765 }, { "epoch": 0.7623313029977308, "grad_norm": 1.9491124153137207, "learning_rate": 2.641496914604207e-06, "loss": 0.8316, "step": 12766 }, { "epoch": 0.7623910187507464, "grad_norm": 2.4033517837524414, "learning_rate": 2.6408333886271646e-06, "loss": 0.8465, "step": 12767 }, { "epoch": 0.7624507345037621, "grad_norm": 3.5005621910095215, "learning_rate": 2.640169862650123e-06, "loss": 0.8508, "step": 12768 }, { "epoch": 0.7625104502567778, "grad_norm": 1.7279822826385498, "learning_rate": 2.639506336673081e-06, "loss": 0.827, "step": 12769 }, { "epoch": 0.7625701660097934, "grad_norm": 2.532130718231201, "learning_rate": 2.638842810696039e-06, "loss": 0.8356, "step": 12770 }, { "epoch": 0.762629881762809, "grad_norm": 2.801790237426758, "learning_rate": 2.638179284718997e-06, "loss": 0.891, "step": 12771 }, { "epoch": 0.7626895975158247, "grad_norm": 1.861279010772705, "learning_rate": 2.6375157587419546e-06, "loss": 0.8138, "step": 12772 }, { "epoch": 0.7627493132688403, "grad_norm": 2.3204381465911865, "learning_rate": 2.636852232764913e-06, "loss": 0.8364, "step": 12773 }, { "epoch": 0.762809029021856, "grad_norm": 1.8499077558517456, "learning_rate": 2.6361887067878713e-06, "loss": 0.8507, "step": 12774 }, { "epoch": 0.7628687447748717, "grad_norm": 1.6256300210952759, "learning_rate": 2.635525180810829e-06, "loss": 0.8527, "step": 12775 }, { "epoch": 0.7629284605278872, "grad_norm": 2.3825764656066895, "learning_rate": 2.634861654833787e-06, "loss": 0.814, "step": 12776 }, { "epoch": 0.7629881762809029, "grad_norm": 5.524620056152344, "learning_rate": 2.6341981288567447e-06, "loss": 0.8528, "step": 12777 }, { "epoch": 0.7630478920339185, "grad_norm": 1.9310191869735718, "learning_rate": 2.6335346028797032e-06, "loss": 0.8023, "step": 12778 }, { "epoch": 0.7631076077869342, "grad_norm": 2.1420083045959473, "learning_rate": 2.632871076902661e-06, "loss": 0.8338, "step": 12779 }, { "epoch": 0.7631673235399499, "grad_norm": 3.5740807056427, "learning_rate": 2.632207550925619e-06, "loss": 0.8719, "step": 12780 }, { "epoch": 0.7632270392929655, "grad_norm": 8.198533058166504, "learning_rate": 2.6315440249485767e-06, "loss": 0.8009, "step": 12781 }, { "epoch": 0.7632867550459811, "grad_norm": 2.8227691650390625, "learning_rate": 2.630880498971535e-06, "loss": 0.8164, "step": 12782 }, { "epoch": 0.7633464707989968, "grad_norm": 2.9904818534851074, "learning_rate": 2.6302169729944933e-06, "loss": 0.8343, "step": 12783 }, { "epoch": 0.7634061865520124, "grad_norm": 3.4358155727386475, "learning_rate": 2.629553447017451e-06, "loss": 0.8468, "step": 12784 }, { "epoch": 0.7634659023050281, "grad_norm": 1.9211452007293701, "learning_rate": 2.628889921040409e-06, "loss": 0.8172, "step": 12785 }, { "epoch": 0.7635256180580438, "grad_norm": 2.821204900741577, "learning_rate": 2.6282263950633668e-06, "loss": 0.863, "step": 12786 }, { "epoch": 0.7635853338110593, "grad_norm": 1.8304258584976196, "learning_rate": 2.627562869086325e-06, "loss": 0.8495, "step": 12787 }, { "epoch": 0.763645049564075, "grad_norm": 2.7053375244140625, "learning_rate": 2.6268993431092834e-06, "loss": 0.8601, "step": 12788 }, { "epoch": 0.7637047653170906, "grad_norm": 1.8393170833587646, "learning_rate": 2.626235817132241e-06, "loss": 0.8356, "step": 12789 }, { "epoch": 0.7637644810701063, "grad_norm": 2.2723801136016846, "learning_rate": 2.625572291155199e-06, "loss": 0.8329, "step": 12790 }, { "epoch": 0.763824196823122, "grad_norm": 1.6976969242095947, "learning_rate": 2.624908765178157e-06, "loss": 0.8296, "step": 12791 }, { "epoch": 0.7638839125761376, "grad_norm": 1.8017263412475586, "learning_rate": 2.6242452392011145e-06, "loss": 0.8388, "step": 12792 }, { "epoch": 0.7639436283291532, "grad_norm": 1.7087504863739014, "learning_rate": 2.623581713224073e-06, "loss": 0.851, "step": 12793 }, { "epoch": 0.7640033440821689, "grad_norm": 11.871414184570312, "learning_rate": 2.622918187247031e-06, "loss": 0.7974, "step": 12794 }, { "epoch": 0.7640630598351845, "grad_norm": 3.0344393253326416, "learning_rate": 2.622254661269989e-06, "loss": 0.8197, "step": 12795 }, { "epoch": 0.7641227755882002, "grad_norm": 2.086599111557007, "learning_rate": 2.621591135292947e-06, "loss": 0.85, "step": 12796 }, { "epoch": 0.7641824913412159, "grad_norm": 2.1871535778045654, "learning_rate": 2.6209276093159046e-06, "loss": 0.8357, "step": 12797 }, { "epoch": 0.7642422070942314, "grad_norm": 1.8644118309020996, "learning_rate": 2.620264083338863e-06, "loss": 0.8429, "step": 12798 }, { "epoch": 0.7643019228472471, "grad_norm": 2.0682966709136963, "learning_rate": 2.6196005573618212e-06, "loss": 0.835, "step": 12799 }, { "epoch": 0.7643616386002627, "grad_norm": 2.7789852619171143, "learning_rate": 2.618937031384779e-06, "loss": 0.7893, "step": 12800 }, { "epoch": 0.7643616386002627, "eval_text_loss": 0.8933354020118713, "eval_text_runtime": 15.1679, "eval_text_samples_per_second": 263.715, "eval_text_steps_per_second": 0.527, "step": 12800 }, { "epoch": 0.7643616386002627, "eval_image_loss": 0.5967463254928589, "eval_image_runtime": 5.0125, "eval_image_samples_per_second": 798.001, "eval_image_steps_per_second": 1.596, "step": 12800 }, { "epoch": 0.7643616386002627, "eval_video_loss": 1.027452826499939, "eval_video_runtime": 77.2966, "eval_video_samples_per_second": 51.749, "eval_video_steps_per_second": 0.103, "step": 12800 }, { "epoch": 0.7644213543532784, "grad_norm": 1.6508736610412598, "learning_rate": 2.618273505407737e-06, "loss": 0.8461, "step": 12801 }, { "epoch": 0.7644810701062941, "grad_norm": 2.0233614444732666, "learning_rate": 2.6176099794306947e-06, "loss": 0.8475, "step": 12802 }, { "epoch": 0.7645407858593097, "grad_norm": 4.075673580169678, "learning_rate": 2.616946453453653e-06, "loss": 0.838, "step": 12803 }, { "epoch": 0.7646005016123253, "grad_norm": 2.780294418334961, "learning_rate": 2.616282927476611e-06, "loss": 0.8271, "step": 12804 }, { "epoch": 0.764660217365341, "grad_norm": 1.7680068016052246, "learning_rate": 2.615619401499569e-06, "loss": 0.8307, "step": 12805 }, { "epoch": 0.7647199331183566, "grad_norm": 1.9921998977661133, "learning_rate": 2.6149558755225267e-06, "loss": 0.8396, "step": 12806 }, { "epoch": 0.7647796488713723, "grad_norm": 2.4681878089904785, "learning_rate": 2.6142923495454848e-06, "loss": 0.7969, "step": 12807 }, { "epoch": 0.764839364624388, "grad_norm": 1.924486756324768, "learning_rate": 2.6136288235684433e-06, "loss": 0.812, "step": 12808 }, { "epoch": 0.7648990803774035, "grad_norm": 2.8310368061065674, "learning_rate": 2.612965297591401e-06, "loss": 0.8413, "step": 12809 }, { "epoch": 0.7649587961304192, "grad_norm": 3.458498001098633, "learning_rate": 2.612301771614359e-06, "loss": 0.8484, "step": 12810 }, { "epoch": 0.7650185118834348, "grad_norm": 1.7487199306488037, "learning_rate": 2.6116382456373167e-06, "loss": 0.8152, "step": 12811 }, { "epoch": 0.7650782276364505, "grad_norm": 8.890829086303711, "learning_rate": 2.610974719660275e-06, "loss": 0.8649, "step": 12812 }, { "epoch": 0.7651379433894662, "grad_norm": 2.008538246154785, "learning_rate": 2.6103111936832334e-06, "loss": 0.8388, "step": 12813 }, { "epoch": 0.7651976591424818, "grad_norm": 2.5394046306610107, "learning_rate": 2.609647667706191e-06, "loss": 0.8452, "step": 12814 }, { "epoch": 0.7652573748954974, "grad_norm": 2.1905360221862793, "learning_rate": 2.608984141729149e-06, "loss": 0.837, "step": 12815 }, { "epoch": 0.765317090648513, "grad_norm": 3.635842800140381, "learning_rate": 2.608320615752107e-06, "loss": 0.8029, "step": 12816 }, { "epoch": 0.7653768064015287, "grad_norm": 1.873616337776184, "learning_rate": 2.607657089775065e-06, "loss": 0.8319, "step": 12817 }, { "epoch": 0.7654365221545444, "grad_norm": 2.343559980392456, "learning_rate": 2.606993563798023e-06, "loss": 0.8156, "step": 12818 }, { "epoch": 0.76549623790756, "grad_norm": 2.045372247695923, "learning_rate": 2.606330037820981e-06, "loss": 0.8628, "step": 12819 }, { "epoch": 0.7655559536605756, "grad_norm": 2.021451950073242, "learning_rate": 2.605666511843939e-06, "loss": 0.8605, "step": 12820 }, { "epoch": 0.7656156694135913, "grad_norm": 2.706016778945923, "learning_rate": 2.605002985866897e-06, "loss": 0.8118, "step": 12821 }, { "epoch": 0.7656753851666069, "grad_norm": 2.6078741550445557, "learning_rate": 2.6043394598898546e-06, "loss": 0.8198, "step": 12822 }, { "epoch": 0.7657351009196226, "grad_norm": 1.7695673704147339, "learning_rate": 2.603675933912813e-06, "loss": 0.8075, "step": 12823 }, { "epoch": 0.7657948166726383, "grad_norm": 2.5705089569091797, "learning_rate": 2.603012407935771e-06, "loss": 0.8235, "step": 12824 }, { "epoch": 0.7658545324256539, "grad_norm": 1.502685785293579, "learning_rate": 2.602348881958729e-06, "loss": 0.8169, "step": 12825 }, { "epoch": 0.7659142481786695, "grad_norm": 1.7216105461120605, "learning_rate": 2.601685355981687e-06, "loss": 0.8308, "step": 12826 }, { "epoch": 0.7659739639316852, "grad_norm": 3.8492209911346436, "learning_rate": 2.6010218300046447e-06, "loss": 0.8258, "step": 12827 }, { "epoch": 0.7660336796847008, "grad_norm": 2.3694660663604736, "learning_rate": 2.600358304027603e-06, "loss": 0.8624, "step": 12828 }, { "epoch": 0.7660933954377165, "grad_norm": 2.1252923011779785, "learning_rate": 2.599694778050561e-06, "loss": 0.7906, "step": 12829 }, { "epoch": 0.7661531111907322, "grad_norm": 3.4417309761047363, "learning_rate": 2.599031252073519e-06, "loss": 0.8186, "step": 12830 }, { "epoch": 0.7662128269437478, "grad_norm": 2.7074930667877197, "learning_rate": 2.5983677260964766e-06, "loss": 0.83, "step": 12831 }, { "epoch": 0.7662725426967634, "grad_norm": 1.5903383493423462, "learning_rate": 2.5977042001194347e-06, "loss": 0.8165, "step": 12832 }, { "epoch": 0.766332258449779, "grad_norm": 2.5130794048309326, "learning_rate": 2.5970406741423933e-06, "loss": 0.8464, "step": 12833 }, { "epoch": 0.7663919742027947, "grad_norm": 1.9239695072174072, "learning_rate": 2.596377148165351e-06, "loss": 0.8294, "step": 12834 }, { "epoch": 0.7664516899558104, "grad_norm": 1.5638537406921387, "learning_rate": 2.595713622188309e-06, "loss": 0.7595, "step": 12835 }, { "epoch": 0.766511405708826, "grad_norm": 2.1689693927764893, "learning_rate": 2.5950500962112667e-06, "loss": 0.8208, "step": 12836 }, { "epoch": 0.7665711214618416, "grad_norm": 2.144474506378174, "learning_rate": 2.594386570234225e-06, "loss": 0.8178, "step": 12837 }, { "epoch": 0.7666308372148573, "grad_norm": 1.7340924739837646, "learning_rate": 2.5937230442571833e-06, "loss": 0.7967, "step": 12838 }, { "epoch": 0.7666905529678729, "grad_norm": 2.3603720664978027, "learning_rate": 2.593059518280141e-06, "loss": 0.8294, "step": 12839 }, { "epoch": 0.7667502687208886, "grad_norm": 2.018367290496826, "learning_rate": 2.592395992303099e-06, "loss": 0.8366, "step": 12840 }, { "epoch": 0.7668099844739042, "grad_norm": 1.9231557846069336, "learning_rate": 2.591732466326057e-06, "loss": 0.8095, "step": 12841 }, { "epoch": 0.7668697002269199, "grad_norm": 2.6574153900146484, "learning_rate": 2.591068940349015e-06, "loss": 0.8285, "step": 12842 }, { "epoch": 0.7669294159799355, "grad_norm": 2.206887722015381, "learning_rate": 2.590405414371973e-06, "loss": 0.8214, "step": 12843 }, { "epoch": 0.7669891317329511, "grad_norm": 1.8581275939941406, "learning_rate": 2.589741888394931e-06, "loss": 0.8092, "step": 12844 }, { "epoch": 0.7670488474859668, "grad_norm": 2.1882829666137695, "learning_rate": 2.5890783624178888e-06, "loss": 0.8416, "step": 12845 }, { "epoch": 0.7671085632389825, "grad_norm": 3.445185422897339, "learning_rate": 2.588414836440847e-06, "loss": 0.8269, "step": 12846 }, { "epoch": 0.7671682789919981, "grad_norm": 2.1531941890716553, "learning_rate": 2.5877513104638045e-06, "loss": 0.8264, "step": 12847 }, { "epoch": 0.7672279947450137, "grad_norm": 1.8962421417236328, "learning_rate": 2.587087784486763e-06, "loss": 0.8084, "step": 12848 }, { "epoch": 0.7672877104980294, "grad_norm": 2.3157026767730713, "learning_rate": 2.586424258509721e-06, "loss": 0.8328, "step": 12849 }, { "epoch": 0.767347426251045, "grad_norm": 1.6378906965255737, "learning_rate": 2.585760732532679e-06, "loss": 0.8445, "step": 12850 }, { "epoch": 0.7674071420040607, "grad_norm": 2.0185837745666504, "learning_rate": 2.585097206555637e-06, "loss": 0.8548, "step": 12851 }, { "epoch": 0.7674668577570763, "grad_norm": 2.1362826824188232, "learning_rate": 2.5844336805785946e-06, "loss": 0.8156, "step": 12852 }, { "epoch": 0.767526573510092, "grad_norm": 2.2923316955566406, "learning_rate": 2.583770154601553e-06, "loss": 0.8487, "step": 12853 }, { "epoch": 0.7675862892631076, "grad_norm": 2.0688276290893555, "learning_rate": 2.583106628624511e-06, "loss": 0.8351, "step": 12854 }, { "epoch": 0.7676460050161232, "grad_norm": 1.6591376066207886, "learning_rate": 2.582443102647469e-06, "loss": 0.7938, "step": 12855 }, { "epoch": 0.7677057207691389, "grad_norm": 3.1248223781585693, "learning_rate": 2.5817795766704266e-06, "loss": 0.8006, "step": 12856 }, { "epoch": 0.7677654365221546, "grad_norm": 2.040961742401123, "learning_rate": 2.5811160506933847e-06, "loss": 0.8159, "step": 12857 }, { "epoch": 0.7678251522751702, "grad_norm": 2.186116933822632, "learning_rate": 2.5804525247163432e-06, "loss": 0.8369, "step": 12858 }, { "epoch": 0.7678848680281858, "grad_norm": 2.443420171737671, "learning_rate": 2.579788998739301e-06, "loss": 0.8309, "step": 12859 }, { "epoch": 0.7679445837812015, "grad_norm": 2.9056973457336426, "learning_rate": 2.579125472762259e-06, "loss": 0.8278, "step": 12860 }, { "epoch": 0.7680042995342171, "grad_norm": 1.907507300376892, "learning_rate": 2.5784619467852167e-06, "loss": 0.7814, "step": 12861 }, { "epoch": 0.7680640152872328, "grad_norm": 1.640045404434204, "learning_rate": 2.5777984208081748e-06, "loss": 0.8506, "step": 12862 }, { "epoch": 0.7681237310402484, "grad_norm": 2.475412607192993, "learning_rate": 2.5771348948311333e-06, "loss": 0.807, "step": 12863 }, { "epoch": 0.7681834467932641, "grad_norm": 2.075408935546875, "learning_rate": 2.576471368854091e-06, "loss": 0.8153, "step": 12864 }, { "epoch": 0.7682431625462797, "grad_norm": 1.8125723600387573, "learning_rate": 2.575807842877049e-06, "loss": 0.8509, "step": 12865 }, { "epoch": 0.7683028782992953, "grad_norm": 1.5693931579589844, "learning_rate": 2.5751443169000068e-06, "loss": 0.8109, "step": 12866 }, { "epoch": 0.768362594052311, "grad_norm": 1.9695014953613281, "learning_rate": 2.574480790922965e-06, "loss": 0.8306, "step": 12867 }, { "epoch": 0.7684223098053267, "grad_norm": 2.762605905532837, "learning_rate": 2.573817264945923e-06, "loss": 0.8478, "step": 12868 }, { "epoch": 0.7684820255583423, "grad_norm": 1.7549434900283813, "learning_rate": 2.573153738968881e-06, "loss": 0.7713, "step": 12869 }, { "epoch": 0.7685417413113579, "grad_norm": 1.9277095794677734, "learning_rate": 2.5724902129918387e-06, "loss": 0.8132, "step": 12870 }, { "epoch": 0.7686014570643735, "grad_norm": 3.0952205657958984, "learning_rate": 2.571826687014797e-06, "loss": 0.823, "step": 12871 }, { "epoch": 0.7686611728173892, "grad_norm": 2.4568874835968018, "learning_rate": 2.5711631610377545e-06, "loss": 0.8298, "step": 12872 }, { "epoch": 0.7687208885704049, "grad_norm": 1.7268341779708862, "learning_rate": 2.570499635060713e-06, "loss": 0.7966, "step": 12873 }, { "epoch": 0.7687806043234205, "grad_norm": 2.007394313812256, "learning_rate": 2.569836109083671e-06, "loss": 0.8026, "step": 12874 }, { "epoch": 0.7688403200764362, "grad_norm": 1.726412057876587, "learning_rate": 2.569172583106629e-06, "loss": 0.8089, "step": 12875 }, { "epoch": 0.7689000358294518, "grad_norm": 2.1625702381134033, "learning_rate": 2.568509057129587e-06, "loss": 0.8852, "step": 12876 }, { "epoch": 0.7689597515824674, "grad_norm": 1.8395373821258545, "learning_rate": 2.5678455311525446e-06, "loss": 0.8186, "step": 12877 }, { "epoch": 0.7690194673354831, "grad_norm": 2.023404598236084, "learning_rate": 2.567182005175503e-06, "loss": 0.8161, "step": 12878 }, { "epoch": 0.7690791830884988, "grad_norm": 2.09531569480896, "learning_rate": 2.566518479198461e-06, "loss": 0.8558, "step": 12879 }, { "epoch": 0.7691388988415144, "grad_norm": 3.0608017444610596, "learning_rate": 2.565854953221419e-06, "loss": 0.8318, "step": 12880 }, { "epoch": 0.76919861459453, "grad_norm": 2.726381301879883, "learning_rate": 2.5651914272443766e-06, "loss": 0.8679, "step": 12881 }, { "epoch": 0.7692583303475456, "grad_norm": 2.4063167572021484, "learning_rate": 2.5645279012673347e-06, "loss": 0.8294, "step": 12882 }, { "epoch": 0.7693180461005613, "grad_norm": 2.0273163318634033, "learning_rate": 2.563864375290293e-06, "loss": 0.7928, "step": 12883 }, { "epoch": 0.769377761853577, "grad_norm": 2.6681888103485107, "learning_rate": 2.563200849313251e-06, "loss": 0.8087, "step": 12884 }, { "epoch": 0.7694374776065926, "grad_norm": 2.872404098510742, "learning_rate": 2.562537323336209e-06, "loss": 0.817, "step": 12885 }, { "epoch": 0.7694971933596083, "grad_norm": 4.401604175567627, "learning_rate": 2.5618737973591667e-06, "loss": 0.8565, "step": 12886 }, { "epoch": 0.7695569091126239, "grad_norm": 6.806119918823242, "learning_rate": 2.5612102713821248e-06, "loss": 0.8006, "step": 12887 }, { "epoch": 0.7696166248656395, "grad_norm": 1.912293553352356, "learning_rate": 2.5605467454050833e-06, "loss": 0.813, "step": 12888 }, { "epoch": 0.7696763406186552, "grad_norm": 2.1060571670532227, "learning_rate": 2.559883219428041e-06, "loss": 0.7859, "step": 12889 }, { "epoch": 0.7697360563716709, "grad_norm": 2.0648081302642822, "learning_rate": 2.559219693450999e-06, "loss": 0.8286, "step": 12890 }, { "epoch": 0.7697957721246865, "grad_norm": 1.668866753578186, "learning_rate": 2.5585561674739567e-06, "loss": 0.8482, "step": 12891 }, { "epoch": 0.7698554878777021, "grad_norm": 4.301475524902344, "learning_rate": 2.557892641496915e-06, "loss": 0.8566, "step": 12892 }, { "epoch": 0.7699152036307177, "grad_norm": 3.176790475845337, "learning_rate": 2.557229115519873e-06, "loss": 0.8036, "step": 12893 }, { "epoch": 0.7699749193837334, "grad_norm": 2.2545337677001953, "learning_rate": 2.556565589542831e-06, "loss": 0.8429, "step": 12894 }, { "epoch": 0.7700346351367491, "grad_norm": 2.035644769668579, "learning_rate": 2.5559020635657887e-06, "loss": 0.8598, "step": 12895 }, { "epoch": 0.7700943508897647, "grad_norm": 2.9741218090057373, "learning_rate": 2.555238537588747e-06, "loss": 0.8117, "step": 12896 }, { "epoch": 0.7701540666427804, "grad_norm": 2.2924699783325195, "learning_rate": 2.5545750116117045e-06, "loss": 0.8296, "step": 12897 }, { "epoch": 0.770213782395796, "grad_norm": 2.0241079330444336, "learning_rate": 2.553911485634663e-06, "loss": 0.8077, "step": 12898 }, { "epoch": 0.7702734981488116, "grad_norm": 2.816439390182495, "learning_rate": 2.553247959657621e-06, "loss": 0.8282, "step": 12899 }, { "epoch": 0.7703332139018273, "grad_norm": 2.3189470767974854, "learning_rate": 2.552584433680579e-06, "loss": 0.8207, "step": 12900 }, { "epoch": 0.770392929654843, "grad_norm": 4.443121910095215, "learning_rate": 2.551920907703537e-06, "loss": 0.8368, "step": 12901 }, { "epoch": 0.7704526454078586, "grad_norm": 3.296701192855835, "learning_rate": 2.5512573817264946e-06, "loss": 0.826, "step": 12902 }, { "epoch": 0.7705123611608743, "grad_norm": 1.8304975032806396, "learning_rate": 2.550593855749453e-06, "loss": 0.8276, "step": 12903 }, { "epoch": 0.7705720769138898, "grad_norm": 3.1455435752868652, "learning_rate": 2.5499303297724108e-06, "loss": 0.8052, "step": 12904 }, { "epoch": 0.7706317926669055, "grad_norm": 2.564455270767212, "learning_rate": 2.549266803795369e-06, "loss": 0.8263, "step": 12905 }, { "epoch": 0.7706915084199212, "grad_norm": 1.9279526472091675, "learning_rate": 2.5486032778183265e-06, "loss": 0.7989, "step": 12906 }, { "epoch": 0.7707512241729368, "grad_norm": 1.8211570978164673, "learning_rate": 2.5479397518412846e-06, "loss": 0.7798, "step": 12907 }, { "epoch": 0.7708109399259525, "grad_norm": 2.14583420753479, "learning_rate": 2.547276225864243e-06, "loss": 0.8177, "step": 12908 }, { "epoch": 0.7708706556789681, "grad_norm": 2.1610264778137207, "learning_rate": 2.546612699887201e-06, "loss": 0.8596, "step": 12909 }, { "epoch": 0.7709303714319837, "grad_norm": 4.882908821105957, "learning_rate": 2.545949173910159e-06, "loss": 0.8307, "step": 12910 }, { "epoch": 0.7709900871849994, "grad_norm": 1.9514732360839844, "learning_rate": 2.5452856479331166e-06, "loss": 0.8244, "step": 12911 }, { "epoch": 0.7710498029380151, "grad_norm": 2.1665198802948, "learning_rate": 2.5446221219560747e-06, "loss": 0.7986, "step": 12912 }, { "epoch": 0.7711095186910307, "grad_norm": 2.495504140853882, "learning_rate": 2.5439585959790332e-06, "loss": 0.8564, "step": 12913 }, { "epoch": 0.7711692344440464, "grad_norm": 2.2279465198516846, "learning_rate": 2.543295070001991e-06, "loss": 0.8528, "step": 12914 }, { "epoch": 0.771228950197062, "grad_norm": 2.291358709335327, "learning_rate": 2.542631544024949e-06, "loss": 0.7951, "step": 12915 }, { "epoch": 0.7712886659500776, "grad_norm": 1.8606364727020264, "learning_rate": 2.5419680180479067e-06, "loss": 0.8501, "step": 12916 }, { "epoch": 0.7713483817030933, "grad_norm": 2.5224146842956543, "learning_rate": 2.541304492070865e-06, "loss": 0.8172, "step": 12917 }, { "epoch": 0.7714080974561089, "grad_norm": 2.9153590202331543, "learning_rate": 2.540640966093823e-06, "loss": 0.8024, "step": 12918 }, { "epoch": 0.7714678132091246, "grad_norm": 2.586895704269409, "learning_rate": 2.539977440116781e-06, "loss": 0.838, "step": 12919 }, { "epoch": 0.7715275289621402, "grad_norm": 3.3161215782165527, "learning_rate": 2.5393139141397387e-06, "loss": 0.8444, "step": 12920 }, { "epoch": 0.7715872447151558, "grad_norm": 2.531621217727661, "learning_rate": 2.5386503881626968e-06, "loss": 0.852, "step": 12921 }, { "epoch": 0.7716469604681715, "grad_norm": 1.9720258712768555, "learning_rate": 2.5379868621856545e-06, "loss": 0.8427, "step": 12922 }, { "epoch": 0.7717066762211872, "grad_norm": 2.4416608810424805, "learning_rate": 2.537323336208613e-06, "loss": 0.8427, "step": 12923 }, { "epoch": 0.7717663919742028, "grad_norm": 3.285372495651245, "learning_rate": 2.536659810231571e-06, "loss": 0.7918, "step": 12924 }, { "epoch": 0.7718261077272185, "grad_norm": 2.1578404903411865, "learning_rate": 2.5359962842545288e-06, "loss": 0.8084, "step": 12925 }, { "epoch": 0.771885823480234, "grad_norm": 3.935145139694214, "learning_rate": 2.535332758277487e-06, "loss": 0.8343, "step": 12926 }, { "epoch": 0.7719455392332497, "grad_norm": 3.236417293548584, "learning_rate": 2.5346692323004445e-06, "loss": 0.8301, "step": 12927 }, { "epoch": 0.7720052549862654, "grad_norm": 10.69375228881836, "learning_rate": 2.534005706323403e-06, "loss": 0.8411, "step": 12928 }, { "epoch": 0.772064970739281, "grad_norm": 1.683512568473816, "learning_rate": 2.5333421803463607e-06, "loss": 0.8007, "step": 12929 }, { "epoch": 0.7721246864922967, "grad_norm": 2.2991373538970947, "learning_rate": 2.532678654369319e-06, "loss": 0.8275, "step": 12930 }, { "epoch": 0.7721844022453123, "grad_norm": 2.3925304412841797, "learning_rate": 2.5320151283922765e-06, "loss": 0.8129, "step": 12931 }, { "epoch": 0.7722441179983279, "grad_norm": 2.217646360397339, "learning_rate": 2.5313516024152346e-06, "loss": 0.8079, "step": 12932 }, { "epoch": 0.7723038337513436, "grad_norm": 2.5750465393066406, "learning_rate": 2.530688076438193e-06, "loss": 0.7935, "step": 12933 }, { "epoch": 0.7723635495043593, "grad_norm": 1.5633143186569214, "learning_rate": 2.530024550461151e-06, "loss": 0.8364, "step": 12934 }, { "epoch": 0.7724232652573749, "grad_norm": 3.096881151199341, "learning_rate": 2.529361024484109e-06, "loss": 0.8475, "step": 12935 }, { "epoch": 0.7724829810103906, "grad_norm": 1.821710467338562, "learning_rate": 2.5286974985070666e-06, "loss": 0.8157, "step": 12936 }, { "epoch": 0.7725426967634061, "grad_norm": 2.5210933685302734, "learning_rate": 2.5280339725300247e-06, "loss": 0.7982, "step": 12937 }, { "epoch": 0.7726024125164218, "grad_norm": 1.686223030090332, "learning_rate": 2.5273704465529832e-06, "loss": 0.8058, "step": 12938 }, { "epoch": 0.7726621282694375, "grad_norm": 2.36543607711792, "learning_rate": 2.526706920575941e-06, "loss": 0.8187, "step": 12939 }, { "epoch": 0.7727218440224531, "grad_norm": 2.998671531677246, "learning_rate": 2.526043394598899e-06, "loss": 0.8576, "step": 12940 }, { "epoch": 0.7727815597754688, "grad_norm": 1.6376194953918457, "learning_rate": 2.5253798686218567e-06, "loss": 0.8345, "step": 12941 }, { "epoch": 0.7728412755284844, "grad_norm": 2.633296251296997, "learning_rate": 2.5247163426448148e-06, "loss": 0.7859, "step": 12942 }, { "epoch": 0.7729009912815, "grad_norm": 3.0215253829956055, "learning_rate": 2.524052816667773e-06, "loss": 0.8041, "step": 12943 }, { "epoch": 0.7729607070345157, "grad_norm": 2.6216886043548584, "learning_rate": 2.523389290690731e-06, "loss": 0.8195, "step": 12944 }, { "epoch": 0.7730204227875314, "grad_norm": 2.355097532272339, "learning_rate": 2.5227257647136887e-06, "loss": 0.8181, "step": 12945 }, { "epoch": 0.773080138540547, "grad_norm": 2.115597724914551, "learning_rate": 2.5220622387366468e-06, "loss": 0.8154, "step": 12946 }, { "epoch": 0.7731398542935627, "grad_norm": 1.7613270282745361, "learning_rate": 2.5213987127596044e-06, "loss": 0.845, "step": 12947 }, { "epoch": 0.7731995700465782, "grad_norm": 3.0387566089630127, "learning_rate": 2.520735186782563e-06, "loss": 0.8354, "step": 12948 }, { "epoch": 0.7732592857995939, "grad_norm": 1.7634793519973755, "learning_rate": 2.520071660805521e-06, "loss": 0.859, "step": 12949 }, { "epoch": 0.7733190015526096, "grad_norm": 2.4251930713653564, "learning_rate": 2.5194081348284787e-06, "loss": 0.8369, "step": 12950 }, { "epoch": 0.7733787173056252, "grad_norm": 3.11004376411438, "learning_rate": 2.518744608851437e-06, "loss": 0.8169, "step": 12951 }, { "epoch": 0.7734384330586409, "grad_norm": 1.9710553884506226, "learning_rate": 2.5180810828743945e-06, "loss": 0.7857, "step": 12952 }, { "epoch": 0.7734981488116565, "grad_norm": 2.133777618408203, "learning_rate": 2.517417556897353e-06, "loss": 0.8588, "step": 12953 }, { "epoch": 0.7735578645646721, "grad_norm": 2.7170636653900146, "learning_rate": 2.5167540309203107e-06, "loss": 0.8339, "step": 12954 }, { "epoch": 0.7736175803176878, "grad_norm": 3.831996202468872, "learning_rate": 2.516090504943269e-06, "loss": 0.8678, "step": 12955 }, { "epoch": 0.7736772960707035, "grad_norm": 1.9017044305801392, "learning_rate": 2.5154269789662265e-06, "loss": 0.8234, "step": 12956 }, { "epoch": 0.7737370118237191, "grad_norm": 1.954023003578186, "learning_rate": 2.5147634529891846e-06, "loss": 0.8231, "step": 12957 }, { "epoch": 0.7737967275767348, "grad_norm": 2.204493522644043, "learning_rate": 2.514099927012143e-06, "loss": 0.7999, "step": 12958 }, { "epoch": 0.7738564433297503, "grad_norm": 2.262035608291626, "learning_rate": 2.5134364010351008e-06, "loss": 0.8306, "step": 12959 }, { "epoch": 0.773916159082766, "grad_norm": 3.431368350982666, "learning_rate": 2.512772875058059e-06, "loss": 0.8393, "step": 12960 }, { "epoch": 0.7739758748357817, "grad_norm": 4.632055759429932, "learning_rate": 2.5121093490810166e-06, "loss": 0.8087, "step": 12961 }, { "epoch": 0.7740355905887973, "grad_norm": 3.531994581222534, "learning_rate": 2.5114458231039747e-06, "loss": 0.8242, "step": 12962 }, { "epoch": 0.774095306341813, "grad_norm": 1.6776212453842163, "learning_rate": 2.510782297126933e-06, "loss": 0.8259, "step": 12963 }, { "epoch": 0.7741550220948287, "grad_norm": 1.824463963508606, "learning_rate": 2.510118771149891e-06, "loss": 0.8316, "step": 12964 }, { "epoch": 0.7742147378478442, "grad_norm": 2.3858959674835205, "learning_rate": 2.509455245172849e-06, "loss": 0.8482, "step": 12965 }, { "epoch": 0.7742744536008599, "grad_norm": 2.6974117755889893, "learning_rate": 2.5087917191958066e-06, "loss": 0.8523, "step": 12966 }, { "epoch": 0.7743341693538756, "grad_norm": 2.107567310333252, "learning_rate": 2.5081281932187647e-06, "loss": 0.811, "step": 12967 }, { "epoch": 0.7743938851068912, "grad_norm": 2.4477434158325195, "learning_rate": 2.507464667241723e-06, "loss": 0.8418, "step": 12968 }, { "epoch": 0.7744536008599069, "grad_norm": 3.5829882621765137, "learning_rate": 2.506801141264681e-06, "loss": 0.8245, "step": 12969 }, { "epoch": 0.7745133166129224, "grad_norm": 1.521681308746338, "learning_rate": 2.5061376152876386e-06, "loss": 0.809, "step": 12970 }, { "epoch": 0.7745730323659381, "grad_norm": 2.372333526611328, "learning_rate": 2.5054740893105967e-06, "loss": 0.817, "step": 12971 }, { "epoch": 0.7746327481189538, "grad_norm": 2.050811529159546, "learning_rate": 2.5048105633335544e-06, "loss": 0.8161, "step": 12972 }, { "epoch": 0.7746924638719694, "grad_norm": 3.5677783489227295, "learning_rate": 2.504147037356513e-06, "loss": 0.8243, "step": 12973 }, { "epoch": 0.7747521796249851, "grad_norm": 2.0683798789978027, "learning_rate": 2.503483511379471e-06, "loss": 0.876, "step": 12974 }, { "epoch": 0.7748118953780008, "grad_norm": 2.5319225788116455, "learning_rate": 2.5028199854024287e-06, "loss": 0.823, "step": 12975 }, { "epoch": 0.7748716111310163, "grad_norm": 2.047675848007202, "learning_rate": 2.502156459425387e-06, "loss": 0.8064, "step": 12976 }, { "epoch": 0.774931326884032, "grad_norm": 1.9539988040924072, "learning_rate": 2.5014929334483445e-06, "loss": 0.8467, "step": 12977 }, { "epoch": 0.7749910426370477, "grad_norm": 2.3720993995666504, "learning_rate": 2.500829407471303e-06, "loss": 0.8064, "step": 12978 }, { "epoch": 0.7750507583900633, "grad_norm": 1.915245771408081, "learning_rate": 2.5001658814942607e-06, "loss": 0.8099, "step": 12979 }, { "epoch": 0.775110474143079, "grad_norm": 2.9131836891174316, "learning_rate": 2.4995023555172188e-06, "loss": 0.791, "step": 12980 }, { "epoch": 0.7751701898960945, "grad_norm": 3.1387829780578613, "learning_rate": 2.4988388295401765e-06, "loss": 0.8515, "step": 12981 }, { "epoch": 0.7752299056491102, "grad_norm": 3.0517163276672363, "learning_rate": 2.498175303563135e-06, "loss": 0.8292, "step": 12982 }, { "epoch": 0.7752896214021259, "grad_norm": 2.405276298522949, "learning_rate": 2.4975117775860927e-06, "loss": 0.7924, "step": 12983 }, { "epoch": 0.7753493371551415, "grad_norm": 1.613683819770813, "learning_rate": 2.4968482516090508e-06, "loss": 0.8233, "step": 12984 }, { "epoch": 0.7754090529081572, "grad_norm": 2.3804397583007812, "learning_rate": 2.496184725632009e-06, "loss": 0.8166, "step": 12985 }, { "epoch": 0.7754687686611729, "grad_norm": 2.509646415710449, "learning_rate": 2.4955211996549665e-06, "loss": 0.8542, "step": 12986 }, { "epoch": 0.7755284844141884, "grad_norm": 2.795753240585327, "learning_rate": 2.4948576736779246e-06, "loss": 0.7942, "step": 12987 }, { "epoch": 0.7755882001672041, "grad_norm": 2.3423566818237305, "learning_rate": 2.4941941477008827e-06, "loss": 0.7813, "step": 12988 }, { "epoch": 0.7756479159202198, "grad_norm": 1.9116023778915405, "learning_rate": 2.4935306217238404e-06, "loss": 0.8247, "step": 12989 }, { "epoch": 0.7757076316732354, "grad_norm": 2.2460811138153076, "learning_rate": 2.492867095746799e-06, "loss": 0.8039, "step": 12990 }, { "epoch": 0.7757673474262511, "grad_norm": 2.6323938369750977, "learning_rate": 2.4922035697697566e-06, "loss": 0.8081, "step": 12991 }, { "epoch": 0.7758270631792666, "grad_norm": 1.8842798471450806, "learning_rate": 2.4915400437927147e-06, "loss": 0.8378, "step": 12992 }, { "epoch": 0.7758867789322823, "grad_norm": 2.102661609649658, "learning_rate": 2.490876517815673e-06, "loss": 0.8311, "step": 12993 }, { "epoch": 0.775946494685298, "grad_norm": 2.6850712299346924, "learning_rate": 2.4902129918386305e-06, "loss": 0.8355, "step": 12994 }, { "epoch": 0.7760062104383136, "grad_norm": 3.2716128826141357, "learning_rate": 2.4895494658615886e-06, "loss": 0.8325, "step": 12995 }, { "epoch": 0.7760659261913293, "grad_norm": 1.8494514226913452, "learning_rate": 2.4888859398845467e-06, "loss": 0.8343, "step": 12996 }, { "epoch": 0.776125641944345, "grad_norm": 2.1502177715301514, "learning_rate": 2.488222413907505e-06, "loss": 0.8313, "step": 12997 }, { "epoch": 0.7761853576973605, "grad_norm": 1.9885072708129883, "learning_rate": 2.487558887930463e-06, "loss": 0.8275, "step": 12998 }, { "epoch": 0.7762450734503762, "grad_norm": 2.050588369369507, "learning_rate": 2.4868953619534206e-06, "loss": 0.8484, "step": 12999 }, { "epoch": 0.7763047892033919, "grad_norm": 2.4624385833740234, "learning_rate": 2.4862318359763787e-06, "loss": 0.8421, "step": 13000 }, { "epoch": 0.7763047892033919, "eval_text_loss": 0.8923404216766357, "eval_text_runtime": 15.2131, "eval_text_samples_per_second": 262.931, "eval_text_steps_per_second": 0.526, "step": 13000 }, { "epoch": 0.7763047892033919, "eval_image_loss": 0.5966247320175171, "eval_image_runtime": 5.019, "eval_image_samples_per_second": 796.978, "eval_image_steps_per_second": 1.594, "step": 13000 }, { "epoch": 0.7763047892033919, "eval_video_loss": 1.0256268978118896, "eval_video_runtime": 77.5766, "eval_video_samples_per_second": 51.562, "eval_video_steps_per_second": 0.103, "step": 13000 }, { "epoch": 0.7763645049564075, "grad_norm": 1.9007688760757446, "learning_rate": 2.4855683099993368e-06, "loss": 0.7849, "step": 13001 }, { "epoch": 0.7764242207094232, "grad_norm": 6.953337669372559, "learning_rate": 2.484904784022295e-06, "loss": 0.7951, "step": 13002 }, { "epoch": 0.7764839364624387, "grad_norm": 3.8394367694854736, "learning_rate": 2.4842412580452525e-06, "loss": 0.8117, "step": 13003 }, { "epoch": 0.7765436522154544, "grad_norm": 2.4806411266326904, "learning_rate": 2.4835777320682107e-06, "loss": 0.7954, "step": 13004 }, { "epoch": 0.7766033679684701, "grad_norm": 2.115677833557129, "learning_rate": 2.4829142060911688e-06, "loss": 0.8251, "step": 13005 }, { "epoch": 0.7766630837214857, "grad_norm": 1.747876763343811, "learning_rate": 2.4822506801141264e-06, "loss": 0.8416, "step": 13006 }, { "epoch": 0.7767227994745014, "grad_norm": 1.9961782693862915, "learning_rate": 2.481587154137085e-06, "loss": 0.8337, "step": 13007 }, { "epoch": 0.7767825152275171, "grad_norm": 2.1027543544769287, "learning_rate": 2.4809236281600426e-06, "loss": 0.8281, "step": 13008 }, { "epoch": 0.7768422309805326, "grad_norm": 2.5028445720672607, "learning_rate": 2.4802601021830007e-06, "loss": 0.8199, "step": 13009 }, { "epoch": 0.7769019467335483, "grad_norm": 2.5844180583953857, "learning_rate": 2.479596576205959e-06, "loss": 0.8006, "step": 13010 }, { "epoch": 0.776961662486564, "grad_norm": 2.3727176189422607, "learning_rate": 2.4789330502289165e-06, "loss": 0.8159, "step": 13011 }, { "epoch": 0.7770213782395796, "grad_norm": 2.4217755794525146, "learning_rate": 2.4782695242518746e-06, "loss": 0.829, "step": 13012 }, { "epoch": 0.7770810939925953, "grad_norm": 3.550220251083374, "learning_rate": 2.4776059982748327e-06, "loss": 0.8334, "step": 13013 }, { "epoch": 0.7771408097456108, "grad_norm": 2.8112199306488037, "learning_rate": 2.4769424722977904e-06, "loss": 0.8093, "step": 13014 }, { "epoch": 0.7772005254986265, "grad_norm": 2.400552272796631, "learning_rate": 2.476278946320749e-06, "loss": 0.8558, "step": 13015 }, { "epoch": 0.7772602412516422, "grad_norm": 3.347795009613037, "learning_rate": 2.4756154203437066e-06, "loss": 0.8159, "step": 13016 }, { "epoch": 0.7773199570046578, "grad_norm": 1.8827314376831055, "learning_rate": 2.4749518943666647e-06, "loss": 0.8018, "step": 13017 }, { "epoch": 0.7773796727576735, "grad_norm": 3.1935760974884033, "learning_rate": 2.4742883683896228e-06, "loss": 0.8136, "step": 13018 }, { "epoch": 0.7774393885106892, "grad_norm": 2.4259471893310547, "learning_rate": 2.4736248424125805e-06, "loss": 0.8283, "step": 13019 }, { "epoch": 0.7774991042637047, "grad_norm": 2.1371450424194336, "learning_rate": 2.4729613164355386e-06, "loss": 0.8232, "step": 13020 }, { "epoch": 0.7775588200167204, "grad_norm": 2.027608633041382, "learning_rate": 2.4722977904584967e-06, "loss": 0.8292, "step": 13021 }, { "epoch": 0.777618535769736, "grad_norm": 2.0933444499969482, "learning_rate": 2.4716342644814548e-06, "loss": 0.8073, "step": 13022 }, { "epoch": 0.7776782515227517, "grad_norm": 2.2733747959136963, "learning_rate": 2.470970738504413e-06, "loss": 0.7796, "step": 13023 }, { "epoch": 0.7777379672757674, "grad_norm": 3.0405194759368896, "learning_rate": 2.4703072125273705e-06, "loss": 0.8243, "step": 13024 }, { "epoch": 0.7777976830287829, "grad_norm": 1.6670479774475098, "learning_rate": 2.4696436865503286e-06, "loss": 0.843, "step": 13025 }, { "epoch": 0.7778573987817986, "grad_norm": 2.358722686767578, "learning_rate": 2.4689801605732867e-06, "loss": 0.8343, "step": 13026 }, { "epoch": 0.7779171145348143, "grad_norm": 1.751155972480774, "learning_rate": 2.468316634596245e-06, "loss": 0.8035, "step": 13027 }, { "epoch": 0.7779768302878299, "grad_norm": 2.3618416786193848, "learning_rate": 2.4676531086192025e-06, "loss": 0.8317, "step": 13028 }, { "epoch": 0.7780365460408456, "grad_norm": 2.0922441482543945, "learning_rate": 2.4669895826421606e-06, "loss": 0.8114, "step": 13029 }, { "epoch": 0.7780962617938613, "grad_norm": 2.5140838623046875, "learning_rate": 2.4663260566651187e-06, "loss": 0.806, "step": 13030 }, { "epoch": 0.7781559775468768, "grad_norm": 2.202967643737793, "learning_rate": 2.4656625306880764e-06, "loss": 0.8162, "step": 13031 }, { "epoch": 0.7782156932998925, "grad_norm": 1.9202659130096436, "learning_rate": 2.464999004711035e-06, "loss": 0.8212, "step": 13032 }, { "epoch": 0.7782754090529082, "grad_norm": 1.7305145263671875, "learning_rate": 2.4643354787339926e-06, "loss": 0.8262, "step": 13033 }, { "epoch": 0.7783351248059238, "grad_norm": 3.259425163269043, "learning_rate": 2.4636719527569507e-06, "loss": 0.8327, "step": 13034 }, { "epoch": 0.7783948405589395, "grad_norm": 2.0117599964141846, "learning_rate": 2.463008426779909e-06, "loss": 0.8712, "step": 13035 }, { "epoch": 0.7784545563119551, "grad_norm": 2.089930534362793, "learning_rate": 2.4623449008028665e-06, "loss": 0.8095, "step": 13036 }, { "epoch": 0.7785142720649707, "grad_norm": 1.718691349029541, "learning_rate": 2.4616813748258246e-06, "loss": 0.7899, "step": 13037 }, { "epoch": 0.7785739878179864, "grad_norm": 2.6900384426116943, "learning_rate": 2.4610178488487827e-06, "loss": 0.799, "step": 13038 }, { "epoch": 0.778633703571002, "grad_norm": 3.5408196449279785, "learning_rate": 2.4603543228717404e-06, "loss": 0.8722, "step": 13039 }, { "epoch": 0.7786934193240177, "grad_norm": 3.6563241481781006, "learning_rate": 2.459690796894699e-06, "loss": 0.8132, "step": 13040 }, { "epoch": 0.7787531350770334, "grad_norm": 3.4281318187713623, "learning_rate": 2.4590272709176566e-06, "loss": 0.8163, "step": 13041 }, { "epoch": 0.7788128508300489, "grad_norm": 2.295919418334961, "learning_rate": 2.4583637449406147e-06, "loss": 0.8375, "step": 13042 }, { "epoch": 0.7788725665830646, "grad_norm": 2.462998628616333, "learning_rate": 2.4577002189635728e-06, "loss": 0.7977, "step": 13043 }, { "epoch": 0.7789322823360803, "grad_norm": 1.8836942911148071, "learning_rate": 2.4570366929865304e-06, "loss": 0.8004, "step": 13044 }, { "epoch": 0.7789919980890959, "grad_norm": 2.2111880779266357, "learning_rate": 2.4563731670094885e-06, "loss": 0.8053, "step": 13045 }, { "epoch": 0.7790517138421116, "grad_norm": 3.376782178878784, "learning_rate": 2.4557096410324466e-06, "loss": 0.8369, "step": 13046 }, { "epoch": 0.7791114295951272, "grad_norm": 2.717247486114502, "learning_rate": 2.4550461150554047e-06, "loss": 0.8054, "step": 13047 }, { "epoch": 0.7791711453481428, "grad_norm": 2.1952803134918213, "learning_rate": 2.454382589078363e-06, "loss": 0.8186, "step": 13048 }, { "epoch": 0.7792308611011585, "grad_norm": 2.5048491954803467, "learning_rate": 2.4537190631013205e-06, "loss": 0.8254, "step": 13049 }, { "epoch": 0.7792905768541741, "grad_norm": 2.408262252807617, "learning_rate": 2.4530555371242786e-06, "loss": 0.7772, "step": 13050 }, { "epoch": 0.7793502926071898, "grad_norm": 1.8965575695037842, "learning_rate": 2.4523920111472367e-06, "loss": 0.8273, "step": 13051 }, { "epoch": 0.7794100083602055, "grad_norm": 1.8837732076644897, "learning_rate": 2.451728485170195e-06, "loss": 0.8334, "step": 13052 }, { "epoch": 0.779469724113221, "grad_norm": 2.7250359058380127, "learning_rate": 2.4510649591931525e-06, "loss": 0.8052, "step": 13053 }, { "epoch": 0.7795294398662367, "grad_norm": 2.119142770767212, "learning_rate": 2.4504014332161106e-06, "loss": 0.8686, "step": 13054 }, { "epoch": 0.7795891556192523, "grad_norm": 2.5186502933502197, "learning_rate": 2.4497379072390687e-06, "loss": 0.8099, "step": 13055 }, { "epoch": 0.779648871372268, "grad_norm": 2.1074867248535156, "learning_rate": 2.4490743812620264e-06, "loss": 0.8286, "step": 13056 }, { "epoch": 0.7797085871252837, "grad_norm": 3.530371904373169, "learning_rate": 2.448410855284985e-06, "loss": 0.7866, "step": 13057 }, { "epoch": 0.7797683028782993, "grad_norm": 2.393113374710083, "learning_rate": 2.4477473293079426e-06, "loss": 0.8177, "step": 13058 }, { "epoch": 0.7798280186313149, "grad_norm": 2.068359136581421, "learning_rate": 2.4470838033309007e-06, "loss": 0.8151, "step": 13059 }, { "epoch": 0.7798877343843306, "grad_norm": 2.3789546489715576, "learning_rate": 2.4464202773538588e-06, "loss": 0.828, "step": 13060 }, { "epoch": 0.7799474501373462, "grad_norm": 2.1246604919433594, "learning_rate": 2.4457567513768164e-06, "loss": 0.8145, "step": 13061 }, { "epoch": 0.7800071658903619, "grad_norm": 1.9962573051452637, "learning_rate": 2.4450932253997745e-06, "loss": 0.831, "step": 13062 }, { "epoch": 0.7800668816433776, "grad_norm": 3.0923993587493896, "learning_rate": 2.4444296994227326e-06, "loss": 0.8295, "step": 13063 }, { "epoch": 0.7801265973963931, "grad_norm": 2.3155264854431152, "learning_rate": 2.4437661734456903e-06, "loss": 0.8172, "step": 13064 }, { "epoch": 0.7801863131494088, "grad_norm": 2.144813299179077, "learning_rate": 2.443102647468649e-06, "loss": 0.815, "step": 13065 }, { "epoch": 0.7802460289024244, "grad_norm": 1.8208167552947998, "learning_rate": 2.4424391214916065e-06, "loss": 0.8247, "step": 13066 }, { "epoch": 0.7803057446554401, "grad_norm": 1.5626181364059448, "learning_rate": 2.4417755955145646e-06, "loss": 0.8331, "step": 13067 }, { "epoch": 0.7803654604084558, "grad_norm": 1.8271957635879517, "learning_rate": 2.4411120695375227e-06, "loss": 0.8253, "step": 13068 }, { "epoch": 0.7804251761614714, "grad_norm": 1.763190507888794, "learning_rate": 2.4404485435604804e-06, "loss": 0.8144, "step": 13069 }, { "epoch": 0.780484891914487, "grad_norm": 2.013040781021118, "learning_rate": 2.4397850175834385e-06, "loss": 0.8519, "step": 13070 }, { "epoch": 0.7805446076675027, "grad_norm": 1.9586559534072876, "learning_rate": 2.4391214916063966e-06, "loss": 0.8039, "step": 13071 }, { "epoch": 0.7806043234205183, "grad_norm": 2.1747212409973145, "learning_rate": 2.4384579656293547e-06, "loss": 0.7968, "step": 13072 }, { "epoch": 0.780664039173534, "grad_norm": 1.9785329103469849, "learning_rate": 2.437794439652313e-06, "loss": 0.8386, "step": 13073 }, { "epoch": 0.7807237549265497, "grad_norm": 2.0138468742370605, "learning_rate": 2.4371309136752705e-06, "loss": 0.8111, "step": 13074 }, { "epoch": 0.7807834706795652, "grad_norm": 2.0961923599243164, "learning_rate": 2.4364673876982286e-06, "loss": 0.8068, "step": 13075 }, { "epoch": 0.7808431864325809, "grad_norm": 2.8106229305267334, "learning_rate": 2.4358038617211867e-06, "loss": 0.8273, "step": 13076 }, { "epoch": 0.7809029021855965, "grad_norm": 2.0971174240112305, "learning_rate": 2.4351403357441448e-06, "loss": 0.8319, "step": 13077 }, { "epoch": 0.7809626179386122, "grad_norm": 3.9393110275268555, "learning_rate": 2.4344768097671025e-06, "loss": 0.8, "step": 13078 }, { "epoch": 0.7810223336916279, "grad_norm": 1.533294439315796, "learning_rate": 2.4338132837900606e-06, "loss": 0.7921, "step": 13079 }, { "epoch": 0.7810820494446435, "grad_norm": 1.5989410877227783, "learning_rate": 2.4331497578130187e-06, "loss": 0.8193, "step": 13080 }, { "epoch": 0.7811417651976591, "grad_norm": 2.4481358528137207, "learning_rate": 2.4324862318359763e-06, "loss": 0.8647, "step": 13081 }, { "epoch": 0.7812014809506748, "grad_norm": 1.9234883785247803, "learning_rate": 2.431822705858935e-06, "loss": 0.8048, "step": 13082 }, { "epoch": 0.7812611967036904, "grad_norm": 2.364380121231079, "learning_rate": 2.4311591798818925e-06, "loss": 0.8179, "step": 13083 }, { "epoch": 0.7813209124567061, "grad_norm": 2.040055274963379, "learning_rate": 2.4304956539048506e-06, "loss": 0.8167, "step": 13084 }, { "epoch": 0.7813806282097218, "grad_norm": 2.1785335540771484, "learning_rate": 2.4298321279278087e-06, "loss": 0.8636, "step": 13085 }, { "epoch": 0.7814403439627373, "grad_norm": 2.3075037002563477, "learning_rate": 2.4291686019507664e-06, "loss": 0.8638, "step": 13086 }, { "epoch": 0.781500059715753, "grad_norm": 1.7666996717453003, "learning_rate": 2.4285050759737245e-06, "loss": 0.8568, "step": 13087 }, { "epoch": 0.7815597754687686, "grad_norm": 3.2364466190338135, "learning_rate": 2.4278415499966826e-06, "loss": 0.8024, "step": 13088 }, { "epoch": 0.7816194912217843, "grad_norm": 2.325071096420288, "learning_rate": 2.4271780240196403e-06, "loss": 0.8574, "step": 13089 }, { "epoch": 0.7816792069748, "grad_norm": 1.7586908340454102, "learning_rate": 2.426514498042599e-06, "loss": 0.7856, "step": 13090 }, { "epoch": 0.7817389227278156, "grad_norm": 2.5512635707855225, "learning_rate": 2.4258509720655565e-06, "loss": 0.8492, "step": 13091 }, { "epoch": 0.7817986384808312, "grad_norm": 2.3793022632598877, "learning_rate": 2.4251874460885146e-06, "loss": 0.8177, "step": 13092 }, { "epoch": 0.7818583542338469, "grad_norm": 1.6038051843643188, "learning_rate": 2.4245239201114727e-06, "loss": 0.7852, "step": 13093 }, { "epoch": 0.7819180699868625, "grad_norm": 1.870864987373352, "learning_rate": 2.4238603941344304e-06, "loss": 0.8338, "step": 13094 }, { "epoch": 0.7819777857398782, "grad_norm": 4.277438163757324, "learning_rate": 2.4231968681573885e-06, "loss": 0.8118, "step": 13095 }, { "epoch": 0.7820375014928939, "grad_norm": 2.031137228012085, "learning_rate": 2.4225333421803466e-06, "loss": 0.7868, "step": 13096 }, { "epoch": 0.7820972172459095, "grad_norm": 3.5850706100463867, "learning_rate": 2.4218698162033047e-06, "loss": 0.8366, "step": 13097 }, { "epoch": 0.7821569329989251, "grad_norm": 2.6793816089630127, "learning_rate": 2.4212062902262628e-06, "loss": 0.7984, "step": 13098 }, { "epoch": 0.7822166487519407, "grad_norm": 2.0023553371429443, "learning_rate": 2.4205427642492205e-06, "loss": 0.787, "step": 13099 }, { "epoch": 0.7822763645049564, "grad_norm": 1.9043744802474976, "learning_rate": 2.4198792382721786e-06, "loss": 0.8061, "step": 13100 }, { "epoch": 0.7823360802579721, "grad_norm": 2.4803333282470703, "learning_rate": 2.4192157122951367e-06, "loss": 0.8464, "step": 13101 }, { "epoch": 0.7823957960109877, "grad_norm": 2.1795406341552734, "learning_rate": 2.4185521863180948e-06, "loss": 0.8243, "step": 13102 }, { "epoch": 0.7824555117640033, "grad_norm": 2.1928470134735107, "learning_rate": 2.4178886603410524e-06, "loss": 0.8456, "step": 13103 }, { "epoch": 0.782515227517019, "grad_norm": 3.2941267490386963, "learning_rate": 2.4172251343640105e-06, "loss": 0.7947, "step": 13104 }, { "epoch": 0.7825749432700346, "grad_norm": 6.1269941329956055, "learning_rate": 2.4165616083869686e-06, "loss": 0.8415, "step": 13105 }, { "epoch": 0.7826346590230503, "grad_norm": 2.1767752170562744, "learning_rate": 2.4158980824099263e-06, "loss": 0.7857, "step": 13106 }, { "epoch": 0.782694374776066, "grad_norm": 1.9853636026382446, "learning_rate": 2.415234556432885e-06, "loss": 0.8146, "step": 13107 }, { "epoch": 0.7827540905290816, "grad_norm": 2.2031919956207275, "learning_rate": 2.4145710304558425e-06, "loss": 0.8127, "step": 13108 }, { "epoch": 0.7828138062820972, "grad_norm": 1.683426856994629, "learning_rate": 2.4139075044788006e-06, "loss": 0.824, "step": 13109 }, { "epoch": 0.7828735220351128, "grad_norm": 1.7835562229156494, "learning_rate": 2.4132439785017587e-06, "loss": 0.8163, "step": 13110 }, { "epoch": 0.7829332377881285, "grad_norm": 1.8166484832763672, "learning_rate": 2.4125804525247164e-06, "loss": 0.8524, "step": 13111 }, { "epoch": 0.7829929535411442, "grad_norm": 6.708885192871094, "learning_rate": 2.4119169265476745e-06, "loss": 0.8341, "step": 13112 }, { "epoch": 0.7830526692941598, "grad_norm": 1.6911957263946533, "learning_rate": 2.4112534005706326e-06, "loss": 0.845, "step": 13113 }, { "epoch": 0.7831123850471754, "grad_norm": 2.8290176391601562, "learning_rate": 2.4105898745935903e-06, "loss": 0.8207, "step": 13114 }, { "epoch": 0.7831721008001911, "grad_norm": 2.366952896118164, "learning_rate": 2.409926348616549e-06, "loss": 0.8005, "step": 13115 }, { "epoch": 0.7832318165532067, "grad_norm": 2.218380928039551, "learning_rate": 2.4092628226395065e-06, "loss": 0.8184, "step": 13116 }, { "epoch": 0.7832915323062224, "grad_norm": 1.5515649318695068, "learning_rate": 2.4085992966624646e-06, "loss": 0.8019, "step": 13117 }, { "epoch": 0.7833512480592381, "grad_norm": 1.8370003700256348, "learning_rate": 2.4079357706854227e-06, "loss": 0.7798, "step": 13118 }, { "epoch": 0.7834109638122537, "grad_norm": 2.3233606815338135, "learning_rate": 2.4072722447083803e-06, "loss": 0.8249, "step": 13119 }, { "epoch": 0.7834706795652693, "grad_norm": 2.0994040966033936, "learning_rate": 2.4066087187313384e-06, "loss": 0.8307, "step": 13120 }, { "epoch": 0.783530395318285, "grad_norm": 1.8469438552856445, "learning_rate": 2.4059451927542965e-06, "loss": 0.8341, "step": 13121 }, { "epoch": 0.7835901110713006, "grad_norm": 2.2618064880371094, "learning_rate": 2.4052816667772546e-06, "loss": 0.8126, "step": 13122 }, { "epoch": 0.7836498268243163, "grad_norm": 1.9466570615768433, "learning_rate": 2.4046181408002127e-06, "loss": 0.843, "step": 13123 }, { "epoch": 0.7837095425773319, "grad_norm": 2.339040994644165, "learning_rate": 2.4039546148231704e-06, "loss": 0.8357, "step": 13124 }, { "epoch": 0.7837692583303475, "grad_norm": 2.750072717666626, "learning_rate": 2.4032910888461285e-06, "loss": 0.8144, "step": 13125 }, { "epoch": 0.7838289740833632, "grad_norm": 4.9731316566467285, "learning_rate": 2.4026275628690866e-06, "loss": 0.8244, "step": 13126 }, { "epoch": 0.7838886898363788, "grad_norm": 1.9176208972930908, "learning_rate": 2.4019640368920443e-06, "loss": 0.8208, "step": 13127 }, { "epoch": 0.7839484055893945, "grad_norm": 1.869920253753662, "learning_rate": 2.4013005109150024e-06, "loss": 0.7767, "step": 13128 }, { "epoch": 0.7840081213424102, "grad_norm": 3.164391279220581, "learning_rate": 2.4006369849379605e-06, "loss": 0.8154, "step": 13129 }, { "epoch": 0.7840678370954258, "grad_norm": 2.6790966987609863, "learning_rate": 2.3999734589609186e-06, "loss": 0.8168, "step": 13130 }, { "epoch": 0.7841275528484414, "grad_norm": 2.9075398445129395, "learning_rate": 2.3993099329838763e-06, "loss": 0.8416, "step": 13131 }, { "epoch": 0.784187268601457, "grad_norm": 1.7794544696807861, "learning_rate": 2.3986464070068344e-06, "loss": 0.8284, "step": 13132 }, { "epoch": 0.7842469843544727, "grad_norm": 1.8879190683364868, "learning_rate": 2.3979828810297925e-06, "loss": 0.7877, "step": 13133 }, { "epoch": 0.7843067001074884, "grad_norm": 2.2786877155303955, "learning_rate": 2.3973193550527506e-06, "loss": 0.8199, "step": 13134 }, { "epoch": 0.784366415860504, "grad_norm": 2.4842264652252197, "learning_rate": 2.3966558290757087e-06, "loss": 0.8066, "step": 13135 }, { "epoch": 0.7844261316135196, "grad_norm": 1.797437071800232, "learning_rate": 2.3959923030986664e-06, "loss": 0.8429, "step": 13136 }, { "epoch": 0.7844858473665353, "grad_norm": 2.6833724975585938, "learning_rate": 2.3953287771216245e-06, "loss": 0.8249, "step": 13137 }, { "epoch": 0.7845455631195509, "grad_norm": 4.481595993041992, "learning_rate": 2.3946652511445826e-06, "loss": 0.8534, "step": 13138 }, { "epoch": 0.7846052788725666, "grad_norm": 2.1590018272399902, "learning_rate": 2.3940017251675402e-06, "loss": 0.8193, "step": 13139 }, { "epoch": 0.7846649946255823, "grad_norm": 1.7797341346740723, "learning_rate": 2.3933381991904988e-06, "loss": 0.8021, "step": 13140 }, { "epoch": 0.7847247103785979, "grad_norm": 2.3527331352233887, "learning_rate": 2.3926746732134564e-06, "loss": 0.8621, "step": 13141 }, { "epoch": 0.7847844261316135, "grad_norm": 1.9852653741836548, "learning_rate": 2.3920111472364145e-06, "loss": 0.8621, "step": 13142 }, { "epoch": 0.7848441418846291, "grad_norm": 1.9017173051834106, "learning_rate": 2.3913476212593726e-06, "loss": 0.8182, "step": 13143 }, { "epoch": 0.7849038576376448, "grad_norm": 3.100165605545044, "learning_rate": 2.3906840952823303e-06, "loss": 0.8455, "step": 13144 }, { "epoch": 0.7849635733906605, "grad_norm": 2.116516351699829, "learning_rate": 2.3900205693052884e-06, "loss": 0.82, "step": 13145 }, { "epoch": 0.7850232891436761, "grad_norm": 1.8065893650054932, "learning_rate": 2.3893570433282465e-06, "loss": 0.8139, "step": 13146 }, { "epoch": 0.7850830048966917, "grad_norm": 2.26739764213562, "learning_rate": 2.388693517351204e-06, "loss": 0.8013, "step": 13147 }, { "epoch": 0.7851427206497074, "grad_norm": 1.9344903230667114, "learning_rate": 2.3880299913741627e-06, "loss": 0.837, "step": 13148 }, { "epoch": 0.785202436402723, "grad_norm": 2.5534918308258057, "learning_rate": 2.3873664653971204e-06, "loss": 0.8444, "step": 13149 }, { "epoch": 0.7852621521557387, "grad_norm": 2.570401430130005, "learning_rate": 2.3867029394200785e-06, "loss": 0.783, "step": 13150 }, { "epoch": 0.7853218679087544, "grad_norm": 1.772470235824585, "learning_rate": 2.3860394134430366e-06, "loss": 0.825, "step": 13151 }, { "epoch": 0.78538158366177, "grad_norm": 2.3782334327697754, "learning_rate": 2.3853758874659943e-06, "loss": 0.8425, "step": 13152 }, { "epoch": 0.7854412994147856, "grad_norm": 2.6295554637908936, "learning_rate": 2.3847123614889524e-06, "loss": 0.858, "step": 13153 }, { "epoch": 0.7855010151678012, "grad_norm": 2.1260695457458496, "learning_rate": 2.3840488355119105e-06, "loss": 0.7884, "step": 13154 }, { "epoch": 0.7855607309208169, "grad_norm": 2.2395265102386475, "learning_rate": 2.3833853095348686e-06, "loss": 0.8128, "step": 13155 }, { "epoch": 0.7856204466738326, "grad_norm": 2.358919620513916, "learning_rate": 2.3827217835578263e-06, "loss": 0.7702, "step": 13156 }, { "epoch": 0.7856801624268482, "grad_norm": 1.8299237489700317, "learning_rate": 2.3820582575807844e-06, "loss": 0.8144, "step": 13157 }, { "epoch": 0.7857398781798638, "grad_norm": 1.9026601314544678, "learning_rate": 2.3813947316037425e-06, "loss": 0.8083, "step": 13158 }, { "epoch": 0.7857995939328795, "grad_norm": 2.744093894958496, "learning_rate": 2.3807312056267006e-06, "loss": 0.789, "step": 13159 }, { "epoch": 0.7858593096858951, "grad_norm": 1.6879184246063232, "learning_rate": 2.3800676796496587e-06, "loss": 0.8264, "step": 13160 }, { "epoch": 0.7859190254389108, "grad_norm": 1.9922593832015991, "learning_rate": 2.3794041536726163e-06, "loss": 0.8002, "step": 13161 }, { "epoch": 0.7859787411919265, "grad_norm": 1.9032459259033203, "learning_rate": 2.3787406276955744e-06, "loss": 0.791, "step": 13162 }, { "epoch": 0.7860384569449421, "grad_norm": 1.609195590019226, "learning_rate": 2.3780771017185325e-06, "loss": 0.8349, "step": 13163 }, { "epoch": 0.7860981726979577, "grad_norm": 2.400575637817383, "learning_rate": 2.37741357574149e-06, "loss": 0.8252, "step": 13164 }, { "epoch": 0.7861578884509733, "grad_norm": 3.2220828533172607, "learning_rate": 2.3767500497644487e-06, "loss": 0.8475, "step": 13165 }, { "epoch": 0.786217604203989, "grad_norm": 1.85667085647583, "learning_rate": 2.3760865237874064e-06, "loss": 0.8181, "step": 13166 }, { "epoch": 0.7862773199570047, "grad_norm": 2.160710096359253, "learning_rate": 2.3754229978103645e-06, "loss": 0.8113, "step": 13167 }, { "epoch": 0.7863370357100203, "grad_norm": 1.7875592708587646, "learning_rate": 2.3747594718333226e-06, "loss": 0.8449, "step": 13168 }, { "epoch": 0.786396751463036, "grad_norm": 2.4916892051696777, "learning_rate": 2.3740959458562803e-06, "loss": 0.8117, "step": 13169 }, { "epoch": 0.7864564672160516, "grad_norm": 1.8476399183273315, "learning_rate": 2.3734324198792384e-06, "loss": 0.8245, "step": 13170 }, { "epoch": 0.7865161829690672, "grad_norm": 1.951922059059143, "learning_rate": 2.3727688939021965e-06, "loss": 0.8228, "step": 13171 }, { "epoch": 0.7865758987220829, "grad_norm": 1.9080414772033691, "learning_rate": 2.372105367925154e-06, "loss": 0.8008, "step": 13172 }, { "epoch": 0.7866356144750986, "grad_norm": 1.7196285724639893, "learning_rate": 2.3714418419481127e-06, "loss": 0.83, "step": 13173 }, { "epoch": 0.7866953302281142, "grad_norm": 1.9088467359542847, "learning_rate": 2.3707783159710704e-06, "loss": 0.8193, "step": 13174 }, { "epoch": 0.7867550459811298, "grad_norm": 1.9004100561141968, "learning_rate": 2.3701147899940285e-06, "loss": 0.8203, "step": 13175 }, { "epoch": 0.7868147617341454, "grad_norm": 2.257357358932495, "learning_rate": 2.3694512640169866e-06, "loss": 0.8147, "step": 13176 }, { "epoch": 0.7868744774871611, "grad_norm": 2.175028085708618, "learning_rate": 2.3687877380399442e-06, "loss": 0.8337, "step": 13177 }, { "epoch": 0.7869341932401768, "grad_norm": 1.6730518341064453, "learning_rate": 2.3681242120629023e-06, "loss": 0.8334, "step": 13178 }, { "epoch": 0.7869939089931924, "grad_norm": 3.9390759468078613, "learning_rate": 2.3674606860858604e-06, "loss": 0.8426, "step": 13179 }, { "epoch": 0.7870536247462081, "grad_norm": 2.1405880451202393, "learning_rate": 2.3667971601088185e-06, "loss": 0.8263, "step": 13180 }, { "epoch": 0.7871133404992237, "grad_norm": 1.9874876737594604, "learning_rate": 2.3661336341317766e-06, "loss": 0.8184, "step": 13181 }, { "epoch": 0.7871730562522393, "grad_norm": 2.6112141609191895, "learning_rate": 2.3654701081547343e-06, "loss": 0.8158, "step": 13182 }, { "epoch": 0.787232772005255, "grad_norm": 1.602371096611023, "learning_rate": 2.3648065821776924e-06, "loss": 0.8509, "step": 13183 }, { "epoch": 0.7872924877582707, "grad_norm": 8.276041030883789, "learning_rate": 2.3641430562006505e-06, "loss": 0.8451, "step": 13184 }, { "epoch": 0.7873522035112863, "grad_norm": 2.3614706993103027, "learning_rate": 2.3634795302236086e-06, "loss": 0.8242, "step": 13185 }, { "epoch": 0.7874119192643019, "grad_norm": 2.4314770698547363, "learning_rate": 2.3628160042465663e-06, "loss": 0.79, "step": 13186 }, { "epoch": 0.7874716350173175, "grad_norm": 2.1382713317871094, "learning_rate": 2.3621524782695244e-06, "loss": 0.7926, "step": 13187 }, { "epoch": 0.7875313507703332, "grad_norm": 2.939483880996704, "learning_rate": 2.3614889522924825e-06, "loss": 0.7962, "step": 13188 }, { "epoch": 0.7875910665233489, "grad_norm": 2.334414482116699, "learning_rate": 2.36082542631544e-06, "loss": 0.8319, "step": 13189 }, { "epoch": 0.7876507822763645, "grad_norm": 2.1211178302764893, "learning_rate": 2.3601619003383987e-06, "loss": 0.8103, "step": 13190 }, { "epoch": 0.7877104980293802, "grad_norm": 1.7115648984909058, "learning_rate": 2.3594983743613564e-06, "loss": 0.82, "step": 13191 }, { "epoch": 0.7877702137823958, "grad_norm": 2.4546966552734375, "learning_rate": 2.3588348483843145e-06, "loss": 0.7944, "step": 13192 }, { "epoch": 0.7878299295354114, "grad_norm": 2.7701189517974854, "learning_rate": 2.3581713224072726e-06, "loss": 0.8538, "step": 13193 }, { "epoch": 0.7878896452884271, "grad_norm": 1.7626919746398926, "learning_rate": 2.3575077964302303e-06, "loss": 0.8022, "step": 13194 }, { "epoch": 0.7879493610414428, "grad_norm": 3.496034860610962, "learning_rate": 2.3568442704531884e-06, "loss": 0.8491, "step": 13195 }, { "epoch": 0.7880090767944584, "grad_norm": 2.9110755920410156, "learning_rate": 2.3561807444761465e-06, "loss": 0.8197, "step": 13196 }, { "epoch": 0.788068792547474, "grad_norm": 2.569368839263916, "learning_rate": 2.355517218499104e-06, "loss": 0.8615, "step": 13197 }, { "epoch": 0.7881285083004896, "grad_norm": 1.813204050064087, "learning_rate": 2.3548536925220627e-06, "loss": 0.8408, "step": 13198 }, { "epoch": 0.7881882240535053, "grad_norm": 1.5931317806243896, "learning_rate": 2.3541901665450203e-06, "loss": 0.8284, "step": 13199 }, { "epoch": 0.788247939806521, "grad_norm": 2.1055796146392822, "learning_rate": 2.3535266405679784e-06, "loss": 0.8483, "step": 13200 }, { "epoch": 0.788247939806521, "eval_text_loss": 0.8912053108215332, "eval_text_runtime": 15.1756, "eval_text_samples_per_second": 263.581, "eval_text_steps_per_second": 0.527, "step": 13200 }, { "epoch": 0.788247939806521, "eval_image_loss": 0.5947199463844299, "eval_image_runtime": 5.0706, "eval_image_samples_per_second": 788.867, "eval_image_steps_per_second": 1.578, "step": 13200 }, { "epoch": 0.788247939806521, "eval_video_loss": 1.024435043334961, "eval_video_runtime": 77.0992, "eval_video_samples_per_second": 51.881, "eval_video_steps_per_second": 0.104, "step": 13200 }, { "epoch": 0.7883076555595366, "grad_norm": 5.631607532501221, "learning_rate": 2.3528631145909365e-06, "loss": 0.8364, "step": 13201 }, { "epoch": 0.7883673713125523, "grad_norm": 2.2246859073638916, "learning_rate": 2.3521995886138942e-06, "loss": 0.8243, "step": 13202 }, { "epoch": 0.7884270870655679, "grad_norm": 1.9704316854476929, "learning_rate": 2.3515360626368523e-06, "loss": 0.816, "step": 13203 }, { "epoch": 0.7884868028185835, "grad_norm": 3.41195011138916, "learning_rate": 2.3508725366598104e-06, "loss": 0.7832, "step": 13204 }, { "epoch": 0.7885465185715992, "grad_norm": 1.8867747783660889, "learning_rate": 2.3502090106827685e-06, "loss": 0.8249, "step": 13205 }, { "epoch": 0.7886062343246149, "grad_norm": 1.9680554866790771, "learning_rate": 2.3495454847057266e-06, "loss": 0.827, "step": 13206 }, { "epoch": 0.7886659500776305, "grad_norm": 2.565678834915161, "learning_rate": 2.3488819587286843e-06, "loss": 0.8027, "step": 13207 }, { "epoch": 0.7887256658306461, "grad_norm": 2.0805578231811523, "learning_rate": 2.3482184327516424e-06, "loss": 0.8408, "step": 13208 }, { "epoch": 0.7887853815836617, "grad_norm": 3.3316383361816406, "learning_rate": 2.3475549067746005e-06, "loss": 0.8125, "step": 13209 }, { "epoch": 0.7888450973366774, "grad_norm": 1.9957388639450073, "learning_rate": 2.3468913807975586e-06, "loss": 0.8393, "step": 13210 }, { "epoch": 0.7889048130896931, "grad_norm": 2.046696901321411, "learning_rate": 2.3462278548205163e-06, "loss": 0.8423, "step": 13211 }, { "epoch": 0.7889645288427087, "grad_norm": 1.9642060995101929, "learning_rate": 2.3455643288434744e-06, "loss": 0.8318, "step": 13212 }, { "epoch": 0.7890242445957244, "grad_norm": 3.860844612121582, "learning_rate": 2.3449008028664325e-06, "loss": 0.8275, "step": 13213 }, { "epoch": 0.78908396034874, "grad_norm": 3.698380470275879, "learning_rate": 2.34423727688939e-06, "loss": 0.8255, "step": 13214 }, { "epoch": 0.7891436761017556, "grad_norm": 2.196747064590454, "learning_rate": 2.3435737509123487e-06, "loss": 0.8237, "step": 13215 }, { "epoch": 0.7892033918547713, "grad_norm": 1.9478566646575928, "learning_rate": 2.3429102249353064e-06, "loss": 0.8454, "step": 13216 }, { "epoch": 0.789263107607787, "grad_norm": 1.8723679780960083, "learning_rate": 2.3422466989582645e-06, "loss": 0.844, "step": 13217 }, { "epoch": 0.7893228233608026, "grad_norm": 2.102919578552246, "learning_rate": 2.3415831729812226e-06, "loss": 0.8036, "step": 13218 }, { "epoch": 0.7893825391138182, "grad_norm": 1.9378994703292847, "learning_rate": 2.3409196470041802e-06, "loss": 0.8265, "step": 13219 }, { "epoch": 0.7894422548668338, "grad_norm": 3.330878496170044, "learning_rate": 2.3402561210271383e-06, "loss": 0.8051, "step": 13220 }, { "epoch": 0.7895019706198495, "grad_norm": 3.8263347148895264, "learning_rate": 2.3395925950500964e-06, "loss": 0.7981, "step": 13221 }, { "epoch": 0.7895616863728652, "grad_norm": 2.3209054470062256, "learning_rate": 2.338929069073054e-06, "loss": 0.8163, "step": 13222 }, { "epoch": 0.7896214021258808, "grad_norm": 3.9899303913116455, "learning_rate": 2.3382655430960126e-06, "loss": 0.8517, "step": 13223 }, { "epoch": 0.7896811178788965, "grad_norm": 2.3967928886413574, "learning_rate": 2.3376020171189703e-06, "loss": 0.7965, "step": 13224 }, { "epoch": 0.789740833631912, "grad_norm": 2.0403285026550293, "learning_rate": 2.3369384911419284e-06, "loss": 0.8401, "step": 13225 }, { "epoch": 0.7898005493849277, "grad_norm": 2.27018141746521, "learning_rate": 2.3362749651648865e-06, "loss": 0.7865, "step": 13226 }, { "epoch": 0.7898602651379434, "grad_norm": 1.9402108192443848, "learning_rate": 2.335611439187844e-06, "loss": 0.785, "step": 13227 }, { "epoch": 0.789919980890959, "grad_norm": 1.9676769971847534, "learning_rate": 2.3349479132108023e-06, "loss": 0.8248, "step": 13228 }, { "epoch": 0.7899796966439747, "grad_norm": 1.6377487182617188, "learning_rate": 2.3342843872337604e-06, "loss": 0.8289, "step": 13229 }, { "epoch": 0.7900394123969904, "grad_norm": 1.831650733947754, "learning_rate": 2.3336208612567185e-06, "loss": 0.8348, "step": 13230 }, { "epoch": 0.7900991281500059, "grad_norm": 1.6658960580825806, "learning_rate": 2.3329573352796766e-06, "loss": 0.7987, "step": 13231 }, { "epoch": 0.7901588439030216, "grad_norm": 2.445514678955078, "learning_rate": 2.3322938093026343e-06, "loss": 0.8316, "step": 13232 }, { "epoch": 0.7902185596560373, "grad_norm": 3.2976760864257812, "learning_rate": 2.3316302833255924e-06, "loss": 0.8137, "step": 13233 }, { "epoch": 0.7902782754090529, "grad_norm": 1.8086975812911987, "learning_rate": 2.3309667573485505e-06, "loss": 0.811, "step": 13234 }, { "epoch": 0.7903379911620686, "grad_norm": 2.064488410949707, "learning_rate": 2.3303032313715086e-06, "loss": 0.8537, "step": 13235 }, { "epoch": 0.7903977069150842, "grad_norm": 1.9772998094558716, "learning_rate": 2.3296397053944662e-06, "loss": 0.7613, "step": 13236 }, { "epoch": 0.7904574226680998, "grad_norm": 2.053178548812866, "learning_rate": 2.3289761794174243e-06, "loss": 0.8327, "step": 13237 }, { "epoch": 0.7905171384211155, "grad_norm": 2.120647668838501, "learning_rate": 2.3283126534403824e-06, "loss": 0.8545, "step": 13238 }, { "epoch": 0.7905768541741311, "grad_norm": 1.7091768980026245, "learning_rate": 2.32764912746334e-06, "loss": 0.8037, "step": 13239 }, { "epoch": 0.7906365699271468, "grad_norm": 2.0829551219940186, "learning_rate": 2.3269856014862986e-06, "loss": 0.8166, "step": 13240 }, { "epoch": 0.7906962856801625, "grad_norm": 2.2848381996154785, "learning_rate": 2.3263220755092563e-06, "loss": 0.8101, "step": 13241 }, { "epoch": 0.790756001433178, "grad_norm": 3.019043445587158, "learning_rate": 2.3256585495322144e-06, "loss": 0.8578, "step": 13242 }, { "epoch": 0.7908157171861937, "grad_norm": 1.602460503578186, "learning_rate": 2.3249950235551725e-06, "loss": 0.786, "step": 13243 }, { "epoch": 0.7908754329392094, "grad_norm": 1.567816972732544, "learning_rate": 2.32433149757813e-06, "loss": 0.8309, "step": 13244 }, { "epoch": 0.790935148692225, "grad_norm": 1.748752474784851, "learning_rate": 2.3236679716010883e-06, "loss": 0.8126, "step": 13245 }, { "epoch": 0.7909948644452407, "grad_norm": 15.06983470916748, "learning_rate": 2.3230044456240464e-06, "loss": 0.8451, "step": 13246 }, { "epoch": 0.7910545801982563, "grad_norm": 2.4595565795898438, "learning_rate": 2.322340919647004e-06, "loss": 0.8179, "step": 13247 }, { "epoch": 0.7911142959512719, "grad_norm": 1.9612045288085938, "learning_rate": 2.3216773936699626e-06, "loss": 0.8096, "step": 13248 }, { "epoch": 0.7911740117042876, "grad_norm": 1.9815500974655151, "learning_rate": 2.3210138676929203e-06, "loss": 0.8075, "step": 13249 }, { "epoch": 0.7912337274573032, "grad_norm": 2.3789620399475098, "learning_rate": 2.3203503417158784e-06, "loss": 0.8332, "step": 13250 }, { "epoch": 0.7912934432103189, "grad_norm": 1.9074652194976807, "learning_rate": 2.3196868157388365e-06, "loss": 0.8478, "step": 13251 }, { "epoch": 0.7913531589633346, "grad_norm": 4.978566646575928, "learning_rate": 2.319023289761794e-06, "loss": 0.8438, "step": 13252 }, { "epoch": 0.7914128747163501, "grad_norm": 1.905226230621338, "learning_rate": 2.3183597637847523e-06, "loss": 0.8282, "step": 13253 }, { "epoch": 0.7914725904693658, "grad_norm": 2.412036895751953, "learning_rate": 2.3176962378077104e-06, "loss": 0.85, "step": 13254 }, { "epoch": 0.7915323062223815, "grad_norm": 2.2141008377075195, "learning_rate": 2.3170327118306685e-06, "loss": 0.8157, "step": 13255 }, { "epoch": 0.7915920219753971, "grad_norm": 2.222203254699707, "learning_rate": 2.3163691858536266e-06, "loss": 0.8076, "step": 13256 }, { "epoch": 0.7916517377284128, "grad_norm": 1.8146151304244995, "learning_rate": 2.3157056598765842e-06, "loss": 0.828, "step": 13257 }, { "epoch": 0.7917114534814284, "grad_norm": 2.1547505855560303, "learning_rate": 2.3150421338995423e-06, "loss": 0.8168, "step": 13258 }, { "epoch": 0.791771169234444, "grad_norm": 2.4794156551361084, "learning_rate": 2.3143786079225004e-06, "loss": 0.8412, "step": 13259 }, { "epoch": 0.7918308849874597, "grad_norm": 6.007411003112793, "learning_rate": 2.3137150819454585e-06, "loss": 0.7742, "step": 13260 }, { "epoch": 0.7918906007404753, "grad_norm": 1.7923429012298584, "learning_rate": 2.3130515559684162e-06, "loss": 0.8113, "step": 13261 }, { "epoch": 0.791950316493491, "grad_norm": 1.9751932621002197, "learning_rate": 2.3123880299913743e-06, "loss": 0.8019, "step": 13262 }, { "epoch": 0.7920100322465067, "grad_norm": 2.0236518383026123, "learning_rate": 2.3117245040143324e-06, "loss": 0.796, "step": 13263 }, { "epoch": 0.7920697479995222, "grad_norm": 4.256272792816162, "learning_rate": 2.31106097803729e-06, "loss": 0.8033, "step": 13264 }, { "epoch": 0.7921294637525379, "grad_norm": 2.5504634380340576, "learning_rate": 2.3103974520602486e-06, "loss": 0.8035, "step": 13265 }, { "epoch": 0.7921891795055536, "grad_norm": 1.939867615699768, "learning_rate": 2.3097339260832063e-06, "loss": 0.8107, "step": 13266 }, { "epoch": 0.7922488952585692, "grad_norm": 4.62144660949707, "learning_rate": 2.3090704001061644e-06, "loss": 0.8412, "step": 13267 }, { "epoch": 0.7923086110115849, "grad_norm": 1.8367581367492676, "learning_rate": 2.3084068741291225e-06, "loss": 0.8247, "step": 13268 }, { "epoch": 0.7923683267646005, "grad_norm": 2.587348222732544, "learning_rate": 2.30774334815208e-06, "loss": 0.8013, "step": 13269 }, { "epoch": 0.7924280425176161, "grad_norm": 1.908147931098938, "learning_rate": 2.3070798221750383e-06, "loss": 0.827, "step": 13270 }, { "epoch": 0.7924877582706318, "grad_norm": 3.567988872528076, "learning_rate": 2.3064162961979964e-06, "loss": 0.8053, "step": 13271 }, { "epoch": 0.7925474740236474, "grad_norm": 2.3116302490234375, "learning_rate": 2.305752770220954e-06, "loss": 0.8227, "step": 13272 }, { "epoch": 0.7926071897766631, "grad_norm": 2.481699228286743, "learning_rate": 2.3050892442439126e-06, "loss": 0.8205, "step": 13273 }, { "epoch": 0.7926669055296788, "grad_norm": 2.6568443775177, "learning_rate": 2.3044257182668703e-06, "loss": 0.7908, "step": 13274 }, { "epoch": 0.7927266212826943, "grad_norm": 1.838346004486084, "learning_rate": 2.3037621922898284e-06, "loss": 0.821, "step": 13275 }, { "epoch": 0.79278633703571, "grad_norm": 3.1724634170532227, "learning_rate": 2.3030986663127865e-06, "loss": 0.8449, "step": 13276 }, { "epoch": 0.7928460527887257, "grad_norm": 3.3847055435180664, "learning_rate": 2.302435140335744e-06, "loss": 0.811, "step": 13277 }, { "epoch": 0.7929057685417413, "grad_norm": 1.7434520721435547, "learning_rate": 2.3017716143587022e-06, "loss": 0.82, "step": 13278 }, { "epoch": 0.792965484294757, "grad_norm": 5.659163475036621, "learning_rate": 2.3011080883816603e-06, "loss": 0.8181, "step": 13279 }, { "epoch": 0.7930252000477725, "grad_norm": 1.6375707387924194, "learning_rate": 2.3004445624046184e-06, "loss": 0.811, "step": 13280 }, { "epoch": 0.7930849158007882, "grad_norm": 3.4907386302948, "learning_rate": 2.2997810364275765e-06, "loss": 0.7896, "step": 13281 }, { "epoch": 0.7931446315538039, "grad_norm": 3.9492077827453613, "learning_rate": 2.299117510450534e-06, "loss": 0.8471, "step": 13282 }, { "epoch": 0.7932043473068195, "grad_norm": 2.5659379959106445, "learning_rate": 2.2984539844734923e-06, "loss": 0.7915, "step": 13283 }, { "epoch": 0.7932640630598352, "grad_norm": 1.9977763891220093, "learning_rate": 2.2977904584964504e-06, "loss": 0.8138, "step": 13284 }, { "epoch": 0.7933237788128509, "grad_norm": 1.7555997371673584, "learning_rate": 2.2971269325194085e-06, "loss": 0.8314, "step": 13285 }, { "epoch": 0.7933834945658664, "grad_norm": 1.9935609102249146, "learning_rate": 2.296463406542366e-06, "loss": 0.8218, "step": 13286 }, { "epoch": 0.7934432103188821, "grad_norm": 3.022618293762207, "learning_rate": 2.2957998805653243e-06, "loss": 0.8329, "step": 13287 }, { "epoch": 0.7935029260718978, "grad_norm": 2.3266851902008057, "learning_rate": 2.2951363545882824e-06, "loss": 0.8068, "step": 13288 }, { "epoch": 0.7935626418249134, "grad_norm": 2.633449077606201, "learning_rate": 2.29447282861124e-06, "loss": 0.848, "step": 13289 }, { "epoch": 0.7936223575779291, "grad_norm": 2.4709715843200684, "learning_rate": 2.2938093026341986e-06, "loss": 0.7908, "step": 13290 }, { "epoch": 0.7936820733309446, "grad_norm": 1.9466032981872559, "learning_rate": 2.2931457766571563e-06, "loss": 0.8381, "step": 13291 }, { "epoch": 0.7937417890839603, "grad_norm": 1.9274729490280151, "learning_rate": 2.2924822506801144e-06, "loss": 0.8195, "step": 13292 }, { "epoch": 0.793801504836976, "grad_norm": 2.222085475921631, "learning_rate": 2.2918187247030725e-06, "loss": 0.795, "step": 13293 }, { "epoch": 0.7938612205899916, "grad_norm": 2.985173225402832, "learning_rate": 2.29115519872603e-06, "loss": 0.8048, "step": 13294 }, { "epoch": 0.7939209363430073, "grad_norm": 1.720191240310669, "learning_rate": 2.2904916727489882e-06, "loss": 0.8379, "step": 13295 }, { "epoch": 0.793980652096023, "grad_norm": 1.9759128093719482, "learning_rate": 2.2898281467719463e-06, "loss": 0.8005, "step": 13296 }, { "epoch": 0.7940403678490385, "grad_norm": 1.5991145372390747, "learning_rate": 2.289164620794904e-06, "loss": 0.8015, "step": 13297 }, { "epoch": 0.7941000836020542, "grad_norm": 2.4205734729766846, "learning_rate": 2.2885010948178625e-06, "loss": 0.8091, "step": 13298 }, { "epoch": 0.7941597993550699, "grad_norm": 4.303693771362305, "learning_rate": 2.2878375688408202e-06, "loss": 0.833, "step": 13299 }, { "epoch": 0.7942195151080855, "grad_norm": 2.021350622177124, "learning_rate": 2.2871740428637783e-06, "loss": 0.8477, "step": 13300 }, { "epoch": 0.7942792308611012, "grad_norm": 4.22097110748291, "learning_rate": 2.2865105168867364e-06, "loss": 0.8321, "step": 13301 }, { "epoch": 0.7943389466141169, "grad_norm": 1.8131929636001587, "learning_rate": 2.285846990909694e-06, "loss": 0.8523, "step": 13302 }, { "epoch": 0.7943986623671324, "grad_norm": 9.091024398803711, "learning_rate": 2.285183464932652e-06, "loss": 0.8412, "step": 13303 }, { "epoch": 0.7944583781201481, "grad_norm": 2.9672954082489014, "learning_rate": 2.2845199389556103e-06, "loss": 0.8253, "step": 13304 }, { "epoch": 0.7945180938731637, "grad_norm": 2.8801610469818115, "learning_rate": 2.2838564129785684e-06, "loss": 0.8341, "step": 13305 }, { "epoch": 0.7945778096261794, "grad_norm": 2.0244572162628174, "learning_rate": 2.2831928870015265e-06, "loss": 0.819, "step": 13306 }, { "epoch": 0.7946375253791951, "grad_norm": 2.209113836288452, "learning_rate": 2.282529361024484e-06, "loss": 0.8356, "step": 13307 }, { "epoch": 0.7946972411322106, "grad_norm": 2.260918140411377, "learning_rate": 2.2818658350474423e-06, "loss": 0.7988, "step": 13308 }, { "epoch": 0.7947569568852263, "grad_norm": 1.6417498588562012, "learning_rate": 2.2812023090704004e-06, "loss": 0.8058, "step": 13309 }, { "epoch": 0.794816672638242, "grad_norm": 2.6288363933563232, "learning_rate": 2.2805387830933585e-06, "loss": 0.8359, "step": 13310 }, { "epoch": 0.7948763883912576, "grad_norm": 1.85556161403656, "learning_rate": 2.279875257116316e-06, "loss": 0.834, "step": 13311 }, { "epoch": 0.7949361041442733, "grad_norm": 1.870273232460022, "learning_rate": 2.2792117311392743e-06, "loss": 0.872, "step": 13312 }, { "epoch": 0.794995819897289, "grad_norm": 2.227996826171875, "learning_rate": 2.2785482051622324e-06, "loss": 0.8147, "step": 13313 }, { "epoch": 0.7950555356503045, "grad_norm": 1.8769335746765137, "learning_rate": 2.27788467918519e-06, "loss": 0.8053, "step": 13314 }, { "epoch": 0.7951152514033202, "grad_norm": 2.222348213195801, "learning_rate": 2.2772211532081486e-06, "loss": 0.8292, "step": 13315 }, { "epoch": 0.7951749671563358, "grad_norm": 2.063897132873535, "learning_rate": 2.2765576272311062e-06, "loss": 0.8357, "step": 13316 }, { "epoch": 0.7952346829093515, "grad_norm": 7.854896068572998, "learning_rate": 2.2758941012540643e-06, "loss": 0.7935, "step": 13317 }, { "epoch": 0.7952943986623672, "grad_norm": 2.145867109298706, "learning_rate": 2.2752305752770224e-06, "loss": 0.837, "step": 13318 }, { "epoch": 0.7953541144153827, "grad_norm": 5.271227836608887, "learning_rate": 2.27456704929998e-06, "loss": 0.8281, "step": 13319 }, { "epoch": 0.7954138301683984, "grad_norm": 2.205904245376587, "learning_rate": 2.2739035233229382e-06, "loss": 0.8286, "step": 13320 }, { "epoch": 0.7954735459214141, "grad_norm": 2.5840048789978027, "learning_rate": 2.2732399973458963e-06, "loss": 0.801, "step": 13321 }, { "epoch": 0.7955332616744297, "grad_norm": 3.0860698223114014, "learning_rate": 2.272576471368854e-06, "loss": 0.8093, "step": 13322 }, { "epoch": 0.7955929774274454, "grad_norm": 2.285571813583374, "learning_rate": 2.2719129453918125e-06, "loss": 0.7915, "step": 13323 }, { "epoch": 0.7956526931804611, "grad_norm": 2.342637777328491, "learning_rate": 2.27124941941477e-06, "loss": 0.8256, "step": 13324 }, { "epoch": 0.7957124089334766, "grad_norm": 2.271059036254883, "learning_rate": 2.2705858934377283e-06, "loss": 0.8304, "step": 13325 }, { "epoch": 0.7957721246864923, "grad_norm": 2.526099443435669, "learning_rate": 2.2699223674606864e-06, "loss": 0.8182, "step": 13326 }, { "epoch": 0.7958318404395079, "grad_norm": 2.0798118114471436, "learning_rate": 2.269258841483644e-06, "loss": 0.8619, "step": 13327 }, { "epoch": 0.7958915561925236, "grad_norm": 2.2651963233947754, "learning_rate": 2.268595315506602e-06, "loss": 0.8527, "step": 13328 }, { "epoch": 0.7959512719455393, "grad_norm": 3.620713710784912, "learning_rate": 2.2679317895295603e-06, "loss": 0.8121, "step": 13329 }, { "epoch": 0.7960109876985548, "grad_norm": 2.7691543102264404, "learning_rate": 2.2672682635525184e-06, "loss": 0.8178, "step": 13330 }, { "epoch": 0.7960707034515705, "grad_norm": 2.5330801010131836, "learning_rate": 2.2666047375754765e-06, "loss": 0.798, "step": 13331 }, { "epoch": 0.7961304192045862, "grad_norm": 1.9863213300704956, "learning_rate": 2.265941211598434e-06, "loss": 0.7944, "step": 13332 }, { "epoch": 0.7961901349576018, "grad_norm": 2.6603055000305176, "learning_rate": 2.2652776856213922e-06, "loss": 0.8489, "step": 13333 }, { "epoch": 0.7962498507106175, "grad_norm": 3.7041354179382324, "learning_rate": 2.2646141596443504e-06, "loss": 0.803, "step": 13334 }, { "epoch": 0.7963095664636332, "grad_norm": 2.160580635070801, "learning_rate": 2.2639506336673085e-06, "loss": 0.8056, "step": 13335 }, { "epoch": 0.7963692822166487, "grad_norm": 5.678552150726318, "learning_rate": 2.263287107690266e-06, "loss": 0.8271, "step": 13336 }, { "epoch": 0.7964289979696644, "grad_norm": 2.2826642990112305, "learning_rate": 2.2626235817132242e-06, "loss": 0.8202, "step": 13337 }, { "epoch": 0.79648871372268, "grad_norm": 3.370391845703125, "learning_rate": 2.2619600557361823e-06, "loss": 0.826, "step": 13338 }, { "epoch": 0.7965484294756957, "grad_norm": 2.0480217933654785, "learning_rate": 2.26129652975914e-06, "loss": 0.8637, "step": 13339 }, { "epoch": 0.7966081452287114, "grad_norm": 2.0032973289489746, "learning_rate": 2.2606330037820985e-06, "loss": 0.8126, "step": 13340 }, { "epoch": 0.7966678609817269, "grad_norm": 2.1625356674194336, "learning_rate": 2.259969477805056e-06, "loss": 0.8085, "step": 13341 }, { "epoch": 0.7967275767347426, "grad_norm": 1.8794411420822144, "learning_rate": 2.2593059518280143e-06, "loss": 0.8201, "step": 13342 }, { "epoch": 0.7967872924877583, "grad_norm": 1.9634392261505127, "learning_rate": 2.2586424258509724e-06, "loss": 0.8252, "step": 13343 }, { "epoch": 0.7968470082407739, "grad_norm": 1.9054944515228271, "learning_rate": 2.25797889987393e-06, "loss": 0.8407, "step": 13344 }, { "epoch": 0.7969067239937896, "grad_norm": 1.7050590515136719, "learning_rate": 2.257315373896888e-06, "loss": 0.8364, "step": 13345 }, { "epoch": 0.7969664397468053, "grad_norm": 1.5048754215240479, "learning_rate": 2.2566518479198463e-06, "loss": 0.8172, "step": 13346 }, { "epoch": 0.7970261554998208, "grad_norm": 2.229893922805786, "learning_rate": 2.255988321942804e-06, "loss": 0.8168, "step": 13347 }, { "epoch": 0.7970858712528365, "grad_norm": 1.849692463874817, "learning_rate": 2.2553247959657625e-06, "loss": 0.8184, "step": 13348 }, { "epoch": 0.7971455870058521, "grad_norm": 2.1537859439849854, "learning_rate": 2.25466126998872e-06, "loss": 0.7902, "step": 13349 }, { "epoch": 0.7972053027588678, "grad_norm": 3.9315125942230225, "learning_rate": 2.2539977440116783e-06, "loss": 0.7925, "step": 13350 }, { "epoch": 0.7972650185118835, "grad_norm": 2.6938345432281494, "learning_rate": 2.2533342180346364e-06, "loss": 0.789, "step": 13351 }, { "epoch": 0.797324734264899, "grad_norm": 2.3362579345703125, "learning_rate": 2.252670692057594e-06, "loss": 0.8566, "step": 13352 }, { "epoch": 0.7973844500179147, "grad_norm": 2.0782620906829834, "learning_rate": 2.252007166080552e-06, "loss": 0.8197, "step": 13353 }, { "epoch": 0.7974441657709304, "grad_norm": 2.348024368286133, "learning_rate": 2.2513436401035102e-06, "loss": 0.8395, "step": 13354 }, { "epoch": 0.797503881523946, "grad_norm": 5.255416393280029, "learning_rate": 2.2506801141264683e-06, "loss": 0.8513, "step": 13355 }, { "epoch": 0.7975635972769617, "grad_norm": 1.9706246852874756, "learning_rate": 2.2500165881494264e-06, "loss": 0.839, "step": 13356 }, { "epoch": 0.7976233130299774, "grad_norm": 1.7637362480163574, "learning_rate": 2.249353062172384e-06, "loss": 0.7935, "step": 13357 }, { "epoch": 0.7976830287829929, "grad_norm": 4.162992477416992, "learning_rate": 2.2486895361953422e-06, "loss": 0.8497, "step": 13358 }, { "epoch": 0.7977427445360086, "grad_norm": 3.501669406890869, "learning_rate": 2.2480260102183003e-06, "loss": 0.791, "step": 13359 }, { "epoch": 0.7978024602890242, "grad_norm": 2.830634355545044, "learning_rate": 2.2473624842412584e-06, "loss": 0.8137, "step": 13360 }, { "epoch": 0.7978621760420399, "grad_norm": 5.5597710609436035, "learning_rate": 2.246698958264216e-06, "loss": 0.7962, "step": 13361 }, { "epoch": 0.7979218917950556, "grad_norm": 4.04929256439209, "learning_rate": 2.246035432287174e-06, "loss": 0.8361, "step": 13362 }, { "epoch": 0.7979816075480711, "grad_norm": 2.4267003536224365, "learning_rate": 2.2453719063101323e-06, "loss": 0.8134, "step": 13363 }, { "epoch": 0.7980413233010868, "grad_norm": 1.7864710092544556, "learning_rate": 2.24470838033309e-06, "loss": 0.7983, "step": 13364 }, { "epoch": 0.7981010390541025, "grad_norm": 1.911401391029358, "learning_rate": 2.2440448543560485e-06, "loss": 0.8256, "step": 13365 }, { "epoch": 0.7981607548071181, "grad_norm": 2.766309976577759, "learning_rate": 2.243381328379006e-06, "loss": 0.8101, "step": 13366 }, { "epoch": 0.7982204705601338, "grad_norm": 3.1198832988739014, "learning_rate": 2.2427178024019643e-06, "loss": 0.8349, "step": 13367 }, { "epoch": 0.7982801863131495, "grad_norm": 1.8887423276901245, "learning_rate": 2.2420542764249224e-06, "loss": 0.8341, "step": 13368 }, { "epoch": 0.798339902066165, "grad_norm": 2.2177512645721436, "learning_rate": 2.24139075044788e-06, "loss": 0.8194, "step": 13369 }, { "epoch": 0.7983996178191807, "grad_norm": 2.3497564792633057, "learning_rate": 2.240727224470838e-06, "loss": 0.8364, "step": 13370 }, { "epoch": 0.7984593335721963, "grad_norm": 2.4765784740448, "learning_rate": 2.2400636984937963e-06, "loss": 0.8262, "step": 13371 }, { "epoch": 0.798519049325212, "grad_norm": 1.8200182914733887, "learning_rate": 2.239400172516754e-06, "loss": 0.8267, "step": 13372 }, { "epoch": 0.7985787650782277, "grad_norm": 1.4821306467056274, "learning_rate": 2.2387366465397125e-06, "loss": 0.8357, "step": 13373 }, { "epoch": 0.7986384808312433, "grad_norm": 2.3297886848449707, "learning_rate": 2.23807312056267e-06, "loss": 0.8069, "step": 13374 }, { "epoch": 0.7986981965842589, "grad_norm": 1.8531928062438965, "learning_rate": 2.2374095945856282e-06, "loss": 0.8367, "step": 13375 }, { "epoch": 0.7987579123372746, "grad_norm": 2.5140841007232666, "learning_rate": 2.2367460686085863e-06, "loss": 0.8463, "step": 13376 }, { "epoch": 0.7988176280902902, "grad_norm": 2.4791548252105713, "learning_rate": 2.236082542631544e-06, "loss": 0.7985, "step": 13377 }, { "epoch": 0.7988773438433059, "grad_norm": 4.057086944580078, "learning_rate": 2.235419016654502e-06, "loss": 0.767, "step": 13378 }, { "epoch": 0.7989370595963216, "grad_norm": 6.6954216957092285, "learning_rate": 2.2347554906774602e-06, "loss": 0.8471, "step": 13379 }, { "epoch": 0.7989967753493371, "grad_norm": 1.8963638544082642, "learning_rate": 2.2340919647004183e-06, "loss": 0.8121, "step": 13380 }, { "epoch": 0.7990564911023528, "grad_norm": 3.0624911785125732, "learning_rate": 2.2334284387233764e-06, "loss": 0.8575, "step": 13381 }, { "epoch": 0.7991162068553684, "grad_norm": 2.1706135272979736, "learning_rate": 2.232764912746334e-06, "loss": 0.8256, "step": 13382 }, { "epoch": 0.7991759226083841, "grad_norm": 2.4706215858459473, "learning_rate": 2.232101386769292e-06, "loss": 0.8146, "step": 13383 }, { "epoch": 0.7992356383613998, "grad_norm": 3.1260526180267334, "learning_rate": 2.2314378607922503e-06, "loss": 0.8262, "step": 13384 }, { "epoch": 0.7992953541144154, "grad_norm": 3.9192092418670654, "learning_rate": 2.2307743348152084e-06, "loss": 0.8536, "step": 13385 }, { "epoch": 0.799355069867431, "grad_norm": 1.597028374671936, "learning_rate": 2.230110808838166e-06, "loss": 0.8122, "step": 13386 }, { "epoch": 0.7994147856204467, "grad_norm": 2.6694750785827637, "learning_rate": 2.229447282861124e-06, "loss": 0.8083, "step": 13387 }, { "epoch": 0.7994745013734623, "grad_norm": 1.9669053554534912, "learning_rate": 2.2287837568840823e-06, "loss": 0.8191, "step": 13388 }, { "epoch": 0.799534217126478, "grad_norm": 1.9045366048812866, "learning_rate": 2.2281202309070404e-06, "loss": 0.8396, "step": 13389 }, { "epoch": 0.7995939328794937, "grad_norm": 6.409529685974121, "learning_rate": 2.2274567049299985e-06, "loss": 0.8097, "step": 13390 }, { "epoch": 0.7996536486325092, "grad_norm": 2.1682989597320557, "learning_rate": 2.226793178952956e-06, "loss": 0.8486, "step": 13391 }, { "epoch": 0.7997133643855249, "grad_norm": 2.8665754795074463, "learning_rate": 2.2261296529759142e-06, "loss": 0.8463, "step": 13392 }, { "epoch": 0.7997730801385405, "grad_norm": 3.875011682510376, "learning_rate": 2.2254661269988723e-06, "loss": 0.8386, "step": 13393 }, { "epoch": 0.7998327958915562, "grad_norm": 2.207357168197632, "learning_rate": 2.22480260102183e-06, "loss": 0.8217, "step": 13394 }, { "epoch": 0.7998925116445719, "grad_norm": 3.3115057945251465, "learning_rate": 2.224139075044788e-06, "loss": 0.823, "step": 13395 }, { "epoch": 0.7999522273975875, "grad_norm": 2.0670971870422363, "learning_rate": 2.2234755490677462e-06, "loss": 0.8173, "step": 13396 }, { "epoch": 0.8000119431506031, "grad_norm": 2.5461652278900146, "learning_rate": 2.222812023090704e-06, "loss": 0.8145, "step": 13397 }, { "epoch": 0.8000716589036188, "grad_norm": 3.2509562969207764, "learning_rate": 2.2221484971136624e-06, "loss": 0.8214, "step": 13398 }, { "epoch": 0.8001313746566344, "grad_norm": 1.8039727210998535, "learning_rate": 2.22148497113662e-06, "loss": 0.8309, "step": 13399 }, { "epoch": 0.8001910904096501, "grad_norm": 2.1793980598449707, "learning_rate": 2.220821445159578e-06, "loss": 0.8249, "step": 13400 }, { "epoch": 0.8001910904096501, "eval_text_loss": 0.8908492922782898, "eval_text_runtime": 15.2109, "eval_text_samples_per_second": 262.969, "eval_text_steps_per_second": 0.526, "step": 13400 }, { "epoch": 0.8001910904096501, "eval_image_loss": 0.5938320159912109, "eval_image_runtime": 5.014, "eval_image_samples_per_second": 797.759, "eval_image_steps_per_second": 1.596, "step": 13400 }, { "epoch": 0.8001910904096501, "eval_video_loss": 1.0222713947296143, "eval_video_runtime": 77.5281, "eval_video_samples_per_second": 51.594, "eval_video_steps_per_second": 0.103, "step": 13400 }, { "epoch": 0.8002508061626658, "grad_norm": 1.8313300609588623, "learning_rate": 2.2201579191825363e-06, "loss": 0.8018, "step": 13401 }, { "epoch": 0.8003105219156813, "grad_norm": 3.184497356414795, "learning_rate": 2.219494393205494e-06, "loss": 0.8458, "step": 13402 }, { "epoch": 0.800370237668697, "grad_norm": 2.115407943725586, "learning_rate": 2.218830867228452e-06, "loss": 0.7661, "step": 13403 }, { "epoch": 0.8004299534217126, "grad_norm": 2.2421393394470215, "learning_rate": 2.21816734125141e-06, "loss": 0.7998, "step": 13404 }, { "epoch": 0.8004896691747283, "grad_norm": 2.0456087589263916, "learning_rate": 2.2175038152743683e-06, "loss": 0.8036, "step": 13405 }, { "epoch": 0.800549384927744, "grad_norm": 2.5365281105041504, "learning_rate": 2.2168402892973264e-06, "loss": 0.8408, "step": 13406 }, { "epoch": 0.8006091006807596, "grad_norm": 1.8353887796401978, "learning_rate": 2.216176763320284e-06, "loss": 0.8019, "step": 13407 }, { "epoch": 0.8006688164337752, "grad_norm": 2.4561593532562256, "learning_rate": 2.215513237343242e-06, "loss": 0.8148, "step": 13408 }, { "epoch": 0.8007285321867909, "grad_norm": 2.083592414855957, "learning_rate": 2.2148497113662003e-06, "loss": 0.8128, "step": 13409 }, { "epoch": 0.8007882479398065, "grad_norm": 1.7198513746261597, "learning_rate": 2.2141861853891584e-06, "loss": 0.7917, "step": 13410 }, { "epoch": 0.8008479636928222, "grad_norm": 2.066035032272339, "learning_rate": 2.213522659412116e-06, "loss": 0.7975, "step": 13411 }, { "epoch": 0.8009076794458379, "grad_norm": 2.1676719188690186, "learning_rate": 2.212859133435074e-06, "loss": 0.8036, "step": 13412 }, { "epoch": 0.8009673951988534, "grad_norm": 2.2369015216827393, "learning_rate": 2.2121956074580322e-06, "loss": 0.8532, "step": 13413 }, { "epoch": 0.8010271109518691, "grad_norm": 2.0906174182891846, "learning_rate": 2.2115320814809903e-06, "loss": 0.8059, "step": 13414 }, { "epoch": 0.8010868267048847, "grad_norm": 2.47613787651062, "learning_rate": 2.210868555503948e-06, "loss": 0.8095, "step": 13415 }, { "epoch": 0.8011465424579004, "grad_norm": 1.8316707611083984, "learning_rate": 2.210205029526906e-06, "loss": 0.8516, "step": 13416 }, { "epoch": 0.8012062582109161, "grad_norm": 1.7819302082061768, "learning_rate": 2.2095415035498642e-06, "loss": 0.7876, "step": 13417 }, { "epoch": 0.8012659739639317, "grad_norm": 8.11207389831543, "learning_rate": 2.2088779775728223e-06, "loss": 0.8125, "step": 13418 }, { "epoch": 0.8013256897169473, "grad_norm": 2.086498737335205, "learning_rate": 2.20821445159578e-06, "loss": 0.8346, "step": 13419 }, { "epoch": 0.801385405469963, "grad_norm": 4.17787504196167, "learning_rate": 2.207550925618738e-06, "loss": 0.8114, "step": 13420 }, { "epoch": 0.8014451212229786, "grad_norm": 1.8767176866531372, "learning_rate": 2.206887399641696e-06, "loss": 0.7991, "step": 13421 }, { "epoch": 0.8015048369759943, "grad_norm": 4.867363929748535, "learning_rate": 2.206223873664654e-06, "loss": 0.825, "step": 13422 }, { "epoch": 0.80156455272901, "grad_norm": 1.6496831178665161, "learning_rate": 2.2055603476876124e-06, "loss": 0.8072, "step": 13423 }, { "epoch": 0.8016242684820255, "grad_norm": 4.047792434692383, "learning_rate": 2.20489682171057e-06, "loss": 0.8515, "step": 13424 }, { "epoch": 0.8016839842350412, "grad_norm": 2.2036983966827393, "learning_rate": 2.204233295733528e-06, "loss": 0.7846, "step": 13425 }, { "epoch": 0.8017436999880568, "grad_norm": 1.902674674987793, "learning_rate": 2.2035697697564863e-06, "loss": 0.8064, "step": 13426 }, { "epoch": 0.8018034157410725, "grad_norm": 2.6613595485687256, "learning_rate": 2.202906243779444e-06, "loss": 0.8393, "step": 13427 }, { "epoch": 0.8018631314940882, "grad_norm": 4.056354999542236, "learning_rate": 2.202242717802402e-06, "loss": 0.8542, "step": 13428 }, { "epoch": 0.8019228472471038, "grad_norm": 1.9915838241577148, "learning_rate": 2.20157919182536e-06, "loss": 0.8092, "step": 13429 }, { "epoch": 0.8019825630001194, "grad_norm": 105.18861389160156, "learning_rate": 2.200915665848318e-06, "loss": 0.8476, "step": 13430 }, { "epoch": 0.802042278753135, "grad_norm": 1.9061897993087769, "learning_rate": 2.2002521398712764e-06, "loss": 0.8114, "step": 13431 }, { "epoch": 0.8021019945061507, "grad_norm": 1.8460414409637451, "learning_rate": 2.199588613894234e-06, "loss": 0.8198, "step": 13432 }, { "epoch": 0.8021617102591664, "grad_norm": 2.291130304336548, "learning_rate": 2.198925087917192e-06, "loss": 0.7963, "step": 13433 }, { "epoch": 0.802221426012182, "grad_norm": 2.246311902999878, "learning_rate": 2.1982615619401502e-06, "loss": 0.8628, "step": 13434 }, { "epoch": 0.8022811417651977, "grad_norm": 1.694406270980835, "learning_rate": 2.197598035963108e-06, "loss": 0.8166, "step": 13435 }, { "epoch": 0.8023408575182133, "grad_norm": 3.277315139770508, "learning_rate": 2.196934509986066e-06, "loss": 0.8304, "step": 13436 }, { "epoch": 0.8024005732712289, "grad_norm": 1.9209816455841064, "learning_rate": 2.196270984009024e-06, "loss": 0.846, "step": 13437 }, { "epoch": 0.8024602890242446, "grad_norm": 2.3524532318115234, "learning_rate": 2.1956074580319822e-06, "loss": 0.822, "step": 13438 }, { "epoch": 0.8025200047772603, "grad_norm": 2.882845878601074, "learning_rate": 2.1949439320549403e-06, "loss": 0.8109, "step": 13439 }, { "epoch": 0.8025797205302759, "grad_norm": 2.699240207672119, "learning_rate": 2.194280406077898e-06, "loss": 0.8188, "step": 13440 }, { "epoch": 0.8026394362832915, "grad_norm": 3.212496280670166, "learning_rate": 2.193616880100856e-06, "loss": 0.7945, "step": 13441 }, { "epoch": 0.8026991520363072, "grad_norm": 1.878851294517517, "learning_rate": 2.192953354123814e-06, "loss": 0.7791, "step": 13442 }, { "epoch": 0.8027588677893228, "grad_norm": 2.7894279956817627, "learning_rate": 2.1922898281467723e-06, "loss": 0.8095, "step": 13443 }, { "epoch": 0.8028185835423385, "grad_norm": 2.044334888458252, "learning_rate": 2.19162630216973e-06, "loss": 0.8318, "step": 13444 }, { "epoch": 0.8028782992953541, "grad_norm": 1.4325817823410034, "learning_rate": 2.190962776192688e-06, "loss": 0.7788, "step": 13445 }, { "epoch": 0.8029380150483698, "grad_norm": 1.7902601957321167, "learning_rate": 2.190299250215646e-06, "loss": 0.8136, "step": 13446 }, { "epoch": 0.8029977308013854, "grad_norm": 4.043580532073975, "learning_rate": 2.189635724238604e-06, "loss": 0.8303, "step": 13447 }, { "epoch": 0.803057446554401, "grad_norm": 1.763740062713623, "learning_rate": 2.1889721982615624e-06, "loss": 0.8148, "step": 13448 }, { "epoch": 0.8031171623074167, "grad_norm": 1.8622642755508423, "learning_rate": 2.18830867228452e-06, "loss": 0.7787, "step": 13449 }, { "epoch": 0.8031768780604324, "grad_norm": 2.358288288116455, "learning_rate": 2.187645146307478e-06, "loss": 0.7757, "step": 13450 }, { "epoch": 0.803236593813448, "grad_norm": 2.939746856689453, "learning_rate": 2.1869816203304362e-06, "loss": 0.8292, "step": 13451 }, { "epoch": 0.8032963095664636, "grad_norm": 1.950503945350647, "learning_rate": 2.186318094353394e-06, "loss": 0.7814, "step": 13452 }, { "epoch": 0.8033560253194792, "grad_norm": 1.7058018445968628, "learning_rate": 2.185654568376352e-06, "loss": 0.8024, "step": 13453 }, { "epoch": 0.8034157410724949, "grad_norm": 2.5309348106384277, "learning_rate": 2.18499104239931e-06, "loss": 0.8209, "step": 13454 }, { "epoch": 0.8034754568255106, "grad_norm": 4.348400115966797, "learning_rate": 2.184327516422268e-06, "loss": 0.8104, "step": 13455 }, { "epoch": 0.8035351725785262, "grad_norm": 1.9711010456085205, "learning_rate": 2.1836639904452263e-06, "loss": 0.8007, "step": 13456 }, { "epoch": 0.8035948883315419, "grad_norm": 3.551166296005249, "learning_rate": 2.183000464468184e-06, "loss": 0.796, "step": 13457 }, { "epoch": 0.8036546040845575, "grad_norm": 3.19329571723938, "learning_rate": 2.182336938491142e-06, "loss": 0.8154, "step": 13458 }, { "epoch": 0.8037143198375731, "grad_norm": 2.2740049362182617, "learning_rate": 2.1816734125141e-06, "loss": 0.8275, "step": 13459 }, { "epoch": 0.8037740355905888, "grad_norm": 2.309497117996216, "learning_rate": 2.181009886537058e-06, "loss": 0.8373, "step": 13460 }, { "epoch": 0.8038337513436045, "grad_norm": 2.5061604976654053, "learning_rate": 2.180346360560016e-06, "loss": 0.8204, "step": 13461 }, { "epoch": 0.8038934670966201, "grad_norm": 2.4638288021087646, "learning_rate": 2.179682834582974e-06, "loss": 0.7703, "step": 13462 }, { "epoch": 0.8039531828496357, "grad_norm": 2.3378851413726807, "learning_rate": 2.179019308605932e-06, "loss": 0.8301, "step": 13463 }, { "epoch": 0.8040128986026513, "grad_norm": 2.5058512687683105, "learning_rate": 2.1783557826288903e-06, "loss": 0.8302, "step": 13464 }, { "epoch": 0.804072614355667, "grad_norm": 2.7608211040496826, "learning_rate": 2.177692256651848e-06, "loss": 0.823, "step": 13465 }, { "epoch": 0.8041323301086827, "grad_norm": 1.8097113370895386, "learning_rate": 2.177028730674806e-06, "loss": 0.8101, "step": 13466 }, { "epoch": 0.8041920458616983, "grad_norm": 2.739610195159912, "learning_rate": 2.176365204697764e-06, "loss": 0.8258, "step": 13467 }, { "epoch": 0.804251761614714, "grad_norm": 2.819021701812744, "learning_rate": 2.1757016787207223e-06, "loss": 0.8225, "step": 13468 }, { "epoch": 0.8043114773677296, "grad_norm": 19.841915130615234, "learning_rate": 2.17503815274368e-06, "loss": 0.85, "step": 13469 }, { "epoch": 0.8043711931207452, "grad_norm": 1.724068284034729, "learning_rate": 2.174374626766638e-06, "loss": 0.8305, "step": 13470 }, { "epoch": 0.8044309088737609, "grad_norm": 1.9786715507507324, "learning_rate": 2.173711100789596e-06, "loss": 0.8163, "step": 13471 }, { "epoch": 0.8044906246267766, "grad_norm": 2.1397509574890137, "learning_rate": 2.173047574812554e-06, "loss": 0.7993, "step": 13472 }, { "epoch": 0.8045503403797922, "grad_norm": 3.1753692626953125, "learning_rate": 2.1723840488355123e-06, "loss": 0.803, "step": 13473 }, { "epoch": 0.8046100561328078, "grad_norm": 12.888378143310547, "learning_rate": 2.17172052285847e-06, "loss": 0.8071, "step": 13474 }, { "epoch": 0.8046697718858234, "grad_norm": 2.3835325241088867, "learning_rate": 2.171056996881428e-06, "loss": 0.8474, "step": 13475 }, { "epoch": 0.8047294876388391, "grad_norm": 2.376962900161743, "learning_rate": 2.1703934709043862e-06, "loss": 0.8086, "step": 13476 }, { "epoch": 0.8047892033918548, "grad_norm": 2.2538721561431885, "learning_rate": 2.169729944927344e-06, "loss": 0.8359, "step": 13477 }, { "epoch": 0.8048489191448704, "grad_norm": 2.414602518081665, "learning_rate": 2.169066418950302e-06, "loss": 0.7873, "step": 13478 }, { "epoch": 0.8049086348978861, "grad_norm": 2.196579933166504, "learning_rate": 2.16840289297326e-06, "loss": 0.8357, "step": 13479 }, { "epoch": 0.8049683506509017, "grad_norm": 3.086540460586548, "learning_rate": 2.1677393669962178e-06, "loss": 0.7993, "step": 13480 }, { "epoch": 0.8050280664039173, "grad_norm": 2.479109764099121, "learning_rate": 2.1670758410191763e-06, "loss": 0.7845, "step": 13481 }, { "epoch": 0.805087782156933, "grad_norm": 2.313723564147949, "learning_rate": 2.166412315042134e-06, "loss": 0.8425, "step": 13482 }, { "epoch": 0.8051474979099487, "grad_norm": 3.403989553451538, "learning_rate": 2.165748789065092e-06, "loss": 0.8205, "step": 13483 }, { "epoch": 0.8052072136629643, "grad_norm": 2.441293716430664, "learning_rate": 2.16508526308805e-06, "loss": 0.8097, "step": 13484 }, { "epoch": 0.8052669294159799, "grad_norm": 4.554473876953125, "learning_rate": 2.164421737111008e-06, "loss": 0.8344, "step": 13485 }, { "epoch": 0.8053266451689955, "grad_norm": 1.842652678489685, "learning_rate": 2.163758211133966e-06, "loss": 0.8315, "step": 13486 }, { "epoch": 0.8053863609220112, "grad_norm": 3.948855400085449, "learning_rate": 2.163094685156924e-06, "loss": 0.7938, "step": 13487 }, { "epoch": 0.8054460766750269, "grad_norm": 2.897106647491455, "learning_rate": 2.162431159179882e-06, "loss": 0.8717, "step": 13488 }, { "epoch": 0.8055057924280425, "grad_norm": 1.9680755138397217, "learning_rate": 2.1617676332028403e-06, "loss": 0.816, "step": 13489 }, { "epoch": 0.8055655081810582, "grad_norm": 3.042577028274536, "learning_rate": 2.161104107225798e-06, "loss": 0.8488, "step": 13490 }, { "epoch": 0.8056252239340738, "grad_norm": 2.084554672241211, "learning_rate": 2.160440581248756e-06, "loss": 0.8342, "step": 13491 }, { "epoch": 0.8056849396870894, "grad_norm": 2.8014886379241943, "learning_rate": 2.159777055271714e-06, "loss": 0.8217, "step": 13492 }, { "epoch": 0.8057446554401051, "grad_norm": 2.314038038253784, "learning_rate": 2.1591135292946722e-06, "loss": 0.8265, "step": 13493 }, { "epoch": 0.8058043711931208, "grad_norm": 2.3745384216308594, "learning_rate": 2.15845000331763e-06, "loss": 0.8107, "step": 13494 }, { "epoch": 0.8058640869461364, "grad_norm": 2.267261028289795, "learning_rate": 2.157786477340588e-06, "loss": 0.8068, "step": 13495 }, { "epoch": 0.805923802699152, "grad_norm": 10.265157699584961, "learning_rate": 2.157122951363546e-06, "loss": 0.804, "step": 13496 }, { "epoch": 0.8059835184521676, "grad_norm": 3.4386954307556152, "learning_rate": 2.1564594253865038e-06, "loss": 0.8487, "step": 13497 }, { "epoch": 0.8060432342051833, "grad_norm": 2.8331685066223145, "learning_rate": 2.1557958994094623e-06, "loss": 0.8502, "step": 13498 }, { "epoch": 0.806102949958199, "grad_norm": 3.568437337875366, "learning_rate": 2.15513237343242e-06, "loss": 0.8089, "step": 13499 }, { "epoch": 0.8061626657112146, "grad_norm": 1.9308761358261108, "learning_rate": 2.154468847455378e-06, "loss": 0.7941, "step": 13500 }, { "epoch": 0.8062223814642303, "grad_norm": 2.368164300918579, "learning_rate": 2.153805321478336e-06, "loss": 0.8466, "step": 13501 }, { "epoch": 0.8062820972172459, "grad_norm": 1.7308540344238281, "learning_rate": 2.153141795501294e-06, "loss": 0.7983, "step": 13502 }, { "epoch": 0.8063418129702615, "grad_norm": 3.1192405223846436, "learning_rate": 2.152478269524252e-06, "loss": 0.853, "step": 13503 }, { "epoch": 0.8064015287232772, "grad_norm": 2.325404167175293, "learning_rate": 2.15181474354721e-06, "loss": 0.8027, "step": 13504 }, { "epoch": 0.8064612444762929, "grad_norm": 2.2160940170288086, "learning_rate": 2.1511512175701677e-06, "loss": 0.7936, "step": 13505 }, { "epoch": 0.8065209602293085, "grad_norm": 3.1474788188934326, "learning_rate": 2.1504876915931263e-06, "loss": 0.8271, "step": 13506 }, { "epoch": 0.8065806759823242, "grad_norm": 3.226694107055664, "learning_rate": 2.149824165616084e-06, "loss": 0.7986, "step": 13507 }, { "epoch": 0.8066403917353397, "grad_norm": 3.5688674449920654, "learning_rate": 2.149160639639042e-06, "loss": 0.8146, "step": 13508 }, { "epoch": 0.8067001074883554, "grad_norm": 2.173103094100952, "learning_rate": 2.148497113662e-06, "loss": 0.8199, "step": 13509 }, { "epoch": 0.8067598232413711, "grad_norm": 3.5958776473999023, "learning_rate": 2.147833587684958e-06, "loss": 0.83, "step": 13510 }, { "epoch": 0.8068195389943867, "grad_norm": 1.85396409034729, "learning_rate": 2.147170061707916e-06, "loss": 0.8272, "step": 13511 }, { "epoch": 0.8068792547474024, "grad_norm": 2.612628698348999, "learning_rate": 2.146506535730874e-06, "loss": 0.8172, "step": 13512 }, { "epoch": 0.806938970500418, "grad_norm": 1.9188250303268433, "learning_rate": 2.145843009753832e-06, "loss": 0.7988, "step": 13513 }, { "epoch": 0.8069986862534336, "grad_norm": 2.2035882472991943, "learning_rate": 2.1451794837767902e-06, "loss": 0.7454, "step": 13514 }, { "epoch": 0.8070584020064493, "grad_norm": 3.009594202041626, "learning_rate": 2.144515957799748e-06, "loss": 0.7973, "step": 13515 }, { "epoch": 0.807118117759465, "grad_norm": 3.725125551223755, "learning_rate": 2.143852431822706e-06, "loss": 0.8278, "step": 13516 }, { "epoch": 0.8071778335124806, "grad_norm": 3.8427605628967285, "learning_rate": 2.143188905845664e-06, "loss": 0.8116, "step": 13517 }, { "epoch": 0.8072375492654963, "grad_norm": 3.5224804878234863, "learning_rate": 2.142525379868622e-06, "loss": 0.8289, "step": 13518 }, { "epoch": 0.8072972650185118, "grad_norm": 1.962875485420227, "learning_rate": 2.14186185389158e-06, "loss": 0.7714, "step": 13519 }, { "epoch": 0.8073569807715275, "grad_norm": 2.0285329818725586, "learning_rate": 2.141198327914538e-06, "loss": 0.8169, "step": 13520 }, { "epoch": 0.8074166965245432, "grad_norm": 2.2887210845947266, "learning_rate": 2.140534801937496e-06, "loss": 0.8329, "step": 13521 }, { "epoch": 0.8074764122775588, "grad_norm": 2.4262945652008057, "learning_rate": 2.1398712759604538e-06, "loss": 0.7885, "step": 13522 }, { "epoch": 0.8075361280305745, "grad_norm": 1.8156654834747314, "learning_rate": 2.1392077499834123e-06, "loss": 0.802, "step": 13523 }, { "epoch": 0.8075958437835901, "grad_norm": 2.702277183532715, "learning_rate": 2.13854422400637e-06, "loss": 0.8429, "step": 13524 }, { "epoch": 0.8076555595366057, "grad_norm": 2.116262912750244, "learning_rate": 2.137880698029328e-06, "loss": 0.8242, "step": 13525 }, { "epoch": 0.8077152752896214, "grad_norm": 2.039285898208618, "learning_rate": 2.137217172052286e-06, "loss": 0.802, "step": 13526 }, { "epoch": 0.8077749910426371, "grad_norm": 2.2402899265289307, "learning_rate": 2.136553646075244e-06, "loss": 0.8201, "step": 13527 }, { "epoch": 0.8078347067956527, "grad_norm": 2.080162763595581, "learning_rate": 2.135890120098202e-06, "loss": 0.7985, "step": 13528 }, { "epoch": 0.8078944225486684, "grad_norm": 4.7639055252075195, "learning_rate": 2.13522659412116e-06, "loss": 0.8192, "step": 13529 }, { "epoch": 0.8079541383016839, "grad_norm": 2.3539273738861084, "learning_rate": 2.1345630681441177e-06, "loss": 0.8442, "step": 13530 }, { "epoch": 0.8080138540546996, "grad_norm": 1.8419300317764282, "learning_rate": 2.1338995421670762e-06, "loss": 0.8184, "step": 13531 }, { "epoch": 0.8080735698077153, "grad_norm": 3.5420501232147217, "learning_rate": 2.133236016190034e-06, "loss": 0.8509, "step": 13532 }, { "epoch": 0.8081332855607309, "grad_norm": 2.2506699562072754, "learning_rate": 2.132572490212992e-06, "loss": 0.8243, "step": 13533 }, { "epoch": 0.8081930013137466, "grad_norm": 2.366048812866211, "learning_rate": 2.13190896423595e-06, "loss": 0.8211, "step": 13534 }, { "epoch": 0.8082527170667622, "grad_norm": 2.4674248695373535, "learning_rate": 2.131245438258908e-06, "loss": 0.832, "step": 13535 }, { "epoch": 0.8083124328197778, "grad_norm": 2.7896888256073, "learning_rate": 2.130581912281866e-06, "loss": 0.8404, "step": 13536 }, { "epoch": 0.8083721485727935, "grad_norm": 3.086437225341797, "learning_rate": 2.129918386304824e-06, "loss": 0.8295, "step": 13537 }, { "epoch": 0.8084318643258092, "grad_norm": 3.710073232650757, "learning_rate": 2.129254860327782e-06, "loss": 0.837, "step": 13538 }, { "epoch": 0.8084915800788248, "grad_norm": 1.8976609706878662, "learning_rate": 2.12859133435074e-06, "loss": 0.8443, "step": 13539 }, { "epoch": 0.8085512958318405, "grad_norm": 1.9966087341308594, "learning_rate": 2.127927808373698e-06, "loss": 0.8057, "step": 13540 }, { "epoch": 0.808611011584856, "grad_norm": 7.675221920013428, "learning_rate": 2.127264282396656e-06, "loss": 0.8096, "step": 13541 }, { "epoch": 0.8086707273378717, "grad_norm": 2.710875988006592, "learning_rate": 2.126600756419614e-06, "loss": 0.8479, "step": 13542 }, { "epoch": 0.8087304430908874, "grad_norm": 3.6816353797912598, "learning_rate": 2.125937230442572e-06, "loss": 0.8364, "step": 13543 }, { "epoch": 0.808790158843903, "grad_norm": 2.3527750968933105, "learning_rate": 2.12527370446553e-06, "loss": 0.8391, "step": 13544 }, { "epoch": 0.8088498745969187, "grad_norm": 2.457343578338623, "learning_rate": 2.124610178488488e-06, "loss": 0.8124, "step": 13545 }, { "epoch": 0.8089095903499343, "grad_norm": 2.01887583732605, "learning_rate": 2.123946652511446e-06, "loss": 0.8598, "step": 13546 }, { "epoch": 0.8089693061029499, "grad_norm": 2.1918647289276123, "learning_rate": 2.1232831265344037e-06, "loss": 0.8169, "step": 13547 }, { "epoch": 0.8090290218559656, "grad_norm": 1.584174633026123, "learning_rate": 2.1226196005573623e-06, "loss": 0.8316, "step": 13548 }, { "epoch": 0.8090887376089813, "grad_norm": 1.6642969846725464, "learning_rate": 2.12195607458032e-06, "loss": 0.7817, "step": 13549 }, { "epoch": 0.8091484533619969, "grad_norm": 2.2990872859954834, "learning_rate": 2.121292548603278e-06, "loss": 0.8242, "step": 13550 }, { "epoch": 0.8092081691150126, "grad_norm": 2.1947083473205566, "learning_rate": 2.120629022626236e-06, "loss": 0.8338, "step": 13551 }, { "epoch": 0.8092678848680281, "grad_norm": 2.351175308227539, "learning_rate": 2.119965496649194e-06, "loss": 0.8186, "step": 13552 }, { "epoch": 0.8093276006210438, "grad_norm": 2.3802311420440674, "learning_rate": 2.119301970672152e-06, "loss": 0.8203, "step": 13553 }, { "epoch": 0.8093873163740595, "grad_norm": 1.8216559886932373, "learning_rate": 2.11863844469511e-06, "loss": 0.7993, "step": 13554 }, { "epoch": 0.8094470321270751, "grad_norm": 1.9407508373260498, "learning_rate": 2.1179749187180677e-06, "loss": 0.8147, "step": 13555 }, { "epoch": 0.8095067478800908, "grad_norm": 2.289435863494873, "learning_rate": 2.117311392741026e-06, "loss": 0.8146, "step": 13556 }, { "epoch": 0.8095664636331064, "grad_norm": 2.7590439319610596, "learning_rate": 2.116647866763984e-06, "loss": 0.8186, "step": 13557 }, { "epoch": 0.809626179386122, "grad_norm": 2.4973063468933105, "learning_rate": 2.115984340786942e-06, "loss": 0.8159, "step": 13558 }, { "epoch": 0.8096858951391377, "grad_norm": 2.3588857650756836, "learning_rate": 2.1153208148099e-06, "loss": 0.8211, "step": 13559 }, { "epoch": 0.8097456108921534, "grad_norm": 1.6686164140701294, "learning_rate": 2.1146572888328578e-06, "loss": 0.8083, "step": 13560 }, { "epoch": 0.809805326645169, "grad_norm": 1.9073609113693237, "learning_rate": 2.113993762855816e-06, "loss": 0.7892, "step": 13561 }, { "epoch": 0.8098650423981847, "grad_norm": 2.3977317810058594, "learning_rate": 2.113330236878774e-06, "loss": 0.8424, "step": 13562 }, { "epoch": 0.8099247581512002, "grad_norm": 2.614414691925049, "learning_rate": 2.112666710901732e-06, "loss": 0.8145, "step": 13563 }, { "epoch": 0.8099844739042159, "grad_norm": 3.5433971881866455, "learning_rate": 2.11200318492469e-06, "loss": 0.7914, "step": 13564 }, { "epoch": 0.8100441896572316, "grad_norm": 3.195551872253418, "learning_rate": 2.111339658947648e-06, "loss": 0.7852, "step": 13565 }, { "epoch": 0.8101039054102472, "grad_norm": 1.776461124420166, "learning_rate": 2.110676132970606e-06, "loss": 0.7876, "step": 13566 }, { "epoch": 0.8101636211632629, "grad_norm": 2.2850358486175537, "learning_rate": 2.110012606993564e-06, "loss": 0.7914, "step": 13567 }, { "epoch": 0.8102233369162786, "grad_norm": 3.0501816272735596, "learning_rate": 2.109349081016522e-06, "loss": 0.8128, "step": 13568 }, { "epoch": 0.8102830526692941, "grad_norm": 2.4326016902923584, "learning_rate": 2.10868555503948e-06, "loss": 0.8392, "step": 13569 }, { "epoch": 0.8103427684223098, "grad_norm": 1.9137729406356812, "learning_rate": 2.108022029062438e-06, "loss": 0.8105, "step": 13570 }, { "epoch": 0.8104024841753255, "grad_norm": 2.144113540649414, "learning_rate": 2.107358503085396e-06, "loss": 0.8013, "step": 13571 }, { "epoch": 0.8104621999283411, "grad_norm": 2.0484707355499268, "learning_rate": 2.1066949771083537e-06, "loss": 0.8334, "step": 13572 }, { "epoch": 0.8105219156813568, "grad_norm": 1.8903629779815674, "learning_rate": 2.1060314511313122e-06, "loss": 0.8263, "step": 13573 }, { "epoch": 0.8105816314343723, "grad_norm": 2.6411755084991455, "learning_rate": 2.10536792515427e-06, "loss": 0.8066, "step": 13574 }, { "epoch": 0.810641347187388, "grad_norm": 2.73541522026062, "learning_rate": 2.104704399177228e-06, "loss": 0.8696, "step": 13575 }, { "epoch": 0.8107010629404037, "grad_norm": 2.449449300765991, "learning_rate": 2.104040873200186e-06, "loss": 0.8399, "step": 13576 }, { "epoch": 0.8107607786934193, "grad_norm": 2.0389821529388428, "learning_rate": 2.1033773472231438e-06, "loss": 0.8269, "step": 13577 }, { "epoch": 0.810820494446435, "grad_norm": 3.9287185668945312, "learning_rate": 2.102713821246102e-06, "loss": 0.7989, "step": 13578 }, { "epoch": 0.8108802101994507, "grad_norm": 2.4575343132019043, "learning_rate": 2.10205029526906e-06, "loss": 0.8397, "step": 13579 }, { "epoch": 0.8109399259524662, "grad_norm": 1.8727428913116455, "learning_rate": 2.1013867692920177e-06, "loss": 0.8334, "step": 13580 }, { "epoch": 0.8109996417054819, "grad_norm": 2.036491632461548, "learning_rate": 2.100723243314976e-06, "loss": 0.808, "step": 13581 }, { "epoch": 0.8110593574584976, "grad_norm": 8.010503768920898, "learning_rate": 2.100059717337934e-06, "loss": 0.8389, "step": 13582 }, { "epoch": 0.8111190732115132, "grad_norm": 3.546699047088623, "learning_rate": 2.099396191360892e-06, "loss": 0.8382, "step": 13583 }, { "epoch": 0.8111787889645289, "grad_norm": 1.9079400300979614, "learning_rate": 2.09873266538385e-06, "loss": 0.8122, "step": 13584 }, { "epoch": 0.8112385047175444, "grad_norm": 2.4242615699768066, "learning_rate": 2.0980691394068077e-06, "loss": 0.8347, "step": 13585 }, { "epoch": 0.8112982204705601, "grad_norm": 4.0654616355896, "learning_rate": 2.097405613429766e-06, "loss": 0.8293, "step": 13586 }, { "epoch": 0.8113579362235758, "grad_norm": 1.9568700790405273, "learning_rate": 2.096742087452724e-06, "loss": 0.8033, "step": 13587 }, { "epoch": 0.8114176519765914, "grad_norm": 2.796921491622925, "learning_rate": 2.096078561475682e-06, "loss": 0.8093, "step": 13588 }, { "epoch": 0.8114773677296071, "grad_norm": 1.7530564069747925, "learning_rate": 2.09541503549864e-06, "loss": 0.8194, "step": 13589 }, { "epoch": 0.8115370834826228, "grad_norm": 1.7123245000839233, "learning_rate": 2.094751509521598e-06, "loss": 0.8251, "step": 13590 }, { "epoch": 0.8115967992356383, "grad_norm": 2.9533493518829346, "learning_rate": 2.094087983544556e-06, "loss": 0.8382, "step": 13591 }, { "epoch": 0.811656514988654, "grad_norm": 3.4719009399414062, "learning_rate": 2.093424457567514e-06, "loss": 0.8684, "step": 13592 }, { "epoch": 0.8117162307416697, "grad_norm": 2.4220287799835205, "learning_rate": 2.092760931590472e-06, "loss": 0.8524, "step": 13593 }, { "epoch": 0.8117759464946853, "grad_norm": 2.721071481704712, "learning_rate": 2.09209740561343e-06, "loss": 0.8392, "step": 13594 }, { "epoch": 0.811835662247701, "grad_norm": 2.4499142169952393, "learning_rate": 2.091433879636388e-06, "loss": 0.8216, "step": 13595 }, { "epoch": 0.8118953780007165, "grad_norm": 2.4859564304351807, "learning_rate": 2.090770353659346e-06, "loss": 0.8081, "step": 13596 }, { "epoch": 0.8119550937537322, "grad_norm": 2.036339521408081, "learning_rate": 2.090106827682304e-06, "loss": 0.8265, "step": 13597 }, { "epoch": 0.8120148095067479, "grad_norm": 1.8819410800933838, "learning_rate": 2.089443301705262e-06, "loss": 0.8312, "step": 13598 }, { "epoch": 0.8120745252597635, "grad_norm": 3.092278242111206, "learning_rate": 2.08877977572822e-06, "loss": 0.8348, "step": 13599 }, { "epoch": 0.8121342410127792, "grad_norm": 2.9412622451782227, "learning_rate": 2.088116249751178e-06, "loss": 0.8229, "step": 13600 }, { "epoch": 0.8121342410127792, "eval_text_loss": 0.8898487687110901, "eval_text_runtime": 15.1723, "eval_text_samples_per_second": 263.639, "eval_text_steps_per_second": 0.527, "step": 13600 }, { "epoch": 0.8121342410127792, "eval_image_loss": 0.592008113861084, "eval_image_runtime": 5.0497, "eval_image_samples_per_second": 792.133, "eval_image_steps_per_second": 1.584, "step": 13600 }, { "epoch": 0.8121342410127792, "eval_video_loss": 1.0201597213745117, "eval_video_runtime": 76.7997, "eval_video_samples_per_second": 52.084, "eval_video_steps_per_second": 0.104, "step": 13600 }, { "epoch": 0.8121939567657949, "grad_norm": 1.9402546882629395, "learning_rate": 2.087452723774136e-06, "loss": 0.8089, "step": 13601 }, { "epoch": 0.8122536725188104, "grad_norm": 3.084961414337158, "learning_rate": 2.0867891977970938e-06, "loss": 0.8536, "step": 13602 }, { "epoch": 0.8123133882718261, "grad_norm": 2.180755376815796, "learning_rate": 2.086125671820052e-06, "loss": 0.8371, "step": 13603 }, { "epoch": 0.8123731040248418, "grad_norm": 1.7529796361923218, "learning_rate": 2.08546214584301e-06, "loss": 0.8195, "step": 13604 }, { "epoch": 0.8124328197778574, "grad_norm": 1.9416557550430298, "learning_rate": 2.0847986198659676e-06, "loss": 0.8292, "step": 13605 }, { "epoch": 0.8124925355308731, "grad_norm": 1.8254585266113281, "learning_rate": 2.084135093888926e-06, "loss": 0.8425, "step": 13606 }, { "epoch": 0.8125522512838886, "grad_norm": 1.8743419647216797, "learning_rate": 2.083471567911884e-06, "loss": 0.806, "step": 13607 }, { "epoch": 0.8126119670369043, "grad_norm": 2.8523824214935303, "learning_rate": 2.082808041934842e-06, "loss": 0.8297, "step": 13608 }, { "epoch": 0.81267168278992, "grad_norm": 1.8933171033859253, "learning_rate": 2.0821445159578e-06, "loss": 0.8211, "step": 13609 }, { "epoch": 0.8127313985429356, "grad_norm": 5.405916213989258, "learning_rate": 2.0814809899807577e-06, "loss": 0.8335, "step": 13610 }, { "epoch": 0.8127911142959513, "grad_norm": 2.314976215362549, "learning_rate": 2.080817464003716e-06, "loss": 0.818, "step": 13611 }, { "epoch": 0.812850830048967, "grad_norm": 1.7967376708984375, "learning_rate": 2.080153938026674e-06, "loss": 0.8605, "step": 13612 }, { "epoch": 0.8129105458019825, "grad_norm": 1.9569820165634155, "learning_rate": 2.079490412049632e-06, "loss": 0.8072, "step": 13613 }, { "epoch": 0.8129702615549982, "grad_norm": 1.61091947555542, "learning_rate": 2.07882688607259e-06, "loss": 0.8229, "step": 13614 }, { "epoch": 0.8130299773080139, "grad_norm": 2.8520424365997314, "learning_rate": 2.0781633600955478e-06, "loss": 0.8358, "step": 13615 }, { "epoch": 0.8130896930610295, "grad_norm": 2.984463930130005, "learning_rate": 2.077499834118506e-06, "loss": 0.8356, "step": 13616 }, { "epoch": 0.8131494088140452, "grad_norm": 2.0160131454467773, "learning_rate": 2.076836308141464e-06, "loss": 0.8376, "step": 13617 }, { "epoch": 0.8132091245670607, "grad_norm": 2.9849748611450195, "learning_rate": 2.076172782164422e-06, "loss": 0.807, "step": 13618 }, { "epoch": 0.8132688403200764, "grad_norm": 5.2073211669921875, "learning_rate": 2.0755092561873798e-06, "loss": 0.7994, "step": 13619 }, { "epoch": 0.8133285560730921, "grad_norm": 2.423658847808838, "learning_rate": 2.074845730210338e-06, "loss": 0.8164, "step": 13620 }, { "epoch": 0.8133882718261077, "grad_norm": 2.926720380783081, "learning_rate": 2.074182204233296e-06, "loss": 0.8507, "step": 13621 }, { "epoch": 0.8134479875791234, "grad_norm": 1.6092779636383057, "learning_rate": 2.073518678256254e-06, "loss": 0.803, "step": 13622 }, { "epoch": 0.8135077033321391, "grad_norm": 2.148422956466675, "learning_rate": 2.072855152279212e-06, "loss": 0.7786, "step": 13623 }, { "epoch": 0.8135674190851546, "grad_norm": 1.8419102430343628, "learning_rate": 2.07219162630217e-06, "loss": 0.8359, "step": 13624 }, { "epoch": 0.8136271348381703, "grad_norm": 1.9484896659851074, "learning_rate": 2.071528100325128e-06, "loss": 0.8139, "step": 13625 }, { "epoch": 0.813686850591186, "grad_norm": 2.873532772064209, "learning_rate": 2.070864574348086e-06, "loss": 0.8486, "step": 13626 }, { "epoch": 0.8137465663442016, "grad_norm": 2.7745048999786377, "learning_rate": 2.0702010483710437e-06, "loss": 0.8115, "step": 13627 }, { "epoch": 0.8138062820972173, "grad_norm": 2.7797045707702637, "learning_rate": 2.069537522394002e-06, "loss": 0.8232, "step": 13628 }, { "epoch": 0.8138659978502328, "grad_norm": 1.7756574153900146, "learning_rate": 2.06887399641696e-06, "loss": 0.807, "step": 13629 }, { "epoch": 0.8139257136032485, "grad_norm": 1.8889726400375366, "learning_rate": 2.0682104704399176e-06, "loss": 0.8233, "step": 13630 }, { "epoch": 0.8139854293562642, "grad_norm": 2.1534554958343506, "learning_rate": 2.067546944462876e-06, "loss": 0.8105, "step": 13631 }, { "epoch": 0.8140451451092798, "grad_norm": 1.7939274311065674, "learning_rate": 2.066883418485834e-06, "loss": 0.8058, "step": 13632 }, { "epoch": 0.8141048608622955, "grad_norm": 2.2317733764648438, "learning_rate": 2.066219892508792e-06, "loss": 0.815, "step": 13633 }, { "epoch": 0.8141645766153112, "grad_norm": 2.064457416534424, "learning_rate": 2.06555636653175e-06, "loss": 0.8149, "step": 13634 }, { "epoch": 0.8142242923683267, "grad_norm": 1.756155252456665, "learning_rate": 2.0648928405547077e-06, "loss": 0.8104, "step": 13635 }, { "epoch": 0.8142840081213424, "grad_norm": 1.5050582885742188, "learning_rate": 2.0642293145776658e-06, "loss": 0.7683, "step": 13636 }, { "epoch": 0.814343723874358, "grad_norm": 1.9430549144744873, "learning_rate": 2.063565788600624e-06, "loss": 0.8492, "step": 13637 }, { "epoch": 0.8144034396273737, "grad_norm": 2.222867012023926, "learning_rate": 2.062902262623582e-06, "loss": 0.8427, "step": 13638 }, { "epoch": 0.8144631553803894, "grad_norm": 2.6339282989501953, "learning_rate": 2.06223873664654e-06, "loss": 0.8278, "step": 13639 }, { "epoch": 0.814522871133405, "grad_norm": 18.10734748840332, "learning_rate": 2.0615752106694978e-06, "loss": 0.8714, "step": 13640 }, { "epoch": 0.8145825868864206, "grad_norm": 3.526249647140503, "learning_rate": 2.060911684692456e-06, "loss": 0.8577, "step": 13641 }, { "epoch": 0.8146423026394363, "grad_norm": 1.889275312423706, "learning_rate": 2.060248158715414e-06, "loss": 0.8129, "step": 13642 }, { "epoch": 0.8147020183924519, "grad_norm": 2.0037856101989746, "learning_rate": 2.059584632738372e-06, "loss": 0.8425, "step": 13643 }, { "epoch": 0.8147617341454676, "grad_norm": 2.1230154037475586, "learning_rate": 2.0589211067613297e-06, "loss": 0.7988, "step": 13644 }, { "epoch": 0.8148214498984833, "grad_norm": 2.075488567352295, "learning_rate": 2.058257580784288e-06, "loss": 0.8196, "step": 13645 }, { "epoch": 0.8148811656514988, "grad_norm": 2.2827396392822266, "learning_rate": 2.057594054807246e-06, "loss": 0.8292, "step": 13646 }, { "epoch": 0.8149408814045145, "grad_norm": 2.8711037635803223, "learning_rate": 2.056930528830204e-06, "loss": 0.825, "step": 13647 }, { "epoch": 0.8150005971575301, "grad_norm": 4.413105010986328, "learning_rate": 2.056267002853162e-06, "loss": 0.786, "step": 13648 }, { "epoch": 0.8150603129105458, "grad_norm": 2.0584657192230225, "learning_rate": 2.05560347687612e-06, "loss": 0.8274, "step": 13649 }, { "epoch": 0.8151200286635615, "grad_norm": 2.231316089630127, "learning_rate": 2.054939950899078e-06, "loss": 0.833, "step": 13650 }, { "epoch": 0.8151797444165771, "grad_norm": 2.235544443130493, "learning_rate": 2.054276424922036e-06, "loss": 0.7844, "step": 13651 }, { "epoch": 0.8152394601695927, "grad_norm": 1.859486699104309, "learning_rate": 2.0536128989449937e-06, "loss": 0.8218, "step": 13652 }, { "epoch": 0.8152991759226084, "grad_norm": 2.0374977588653564, "learning_rate": 2.052949372967952e-06, "loss": 0.8212, "step": 13653 }, { "epoch": 0.815358891675624, "grad_norm": 8.7094087600708, "learning_rate": 2.05228584699091e-06, "loss": 0.7939, "step": 13654 }, { "epoch": 0.8154186074286397, "grad_norm": 1.988257884979248, "learning_rate": 2.0516223210138676e-06, "loss": 0.8491, "step": 13655 }, { "epoch": 0.8154783231816554, "grad_norm": 2.279881000518799, "learning_rate": 2.050958795036826e-06, "loss": 0.8138, "step": 13656 }, { "epoch": 0.8155380389346709, "grad_norm": 4.601383209228516, "learning_rate": 2.0502952690597838e-06, "loss": 0.8183, "step": 13657 }, { "epoch": 0.8155977546876866, "grad_norm": 4.898900032043457, "learning_rate": 2.049631743082742e-06, "loss": 0.8695, "step": 13658 }, { "epoch": 0.8156574704407022, "grad_norm": 3.428745746612549, "learning_rate": 2.0489682171057e-06, "loss": 0.8328, "step": 13659 }, { "epoch": 0.8157171861937179, "grad_norm": 2.4632649421691895, "learning_rate": 2.0483046911286576e-06, "loss": 0.856, "step": 13660 }, { "epoch": 0.8157769019467336, "grad_norm": 1.8209813833236694, "learning_rate": 2.0476411651516157e-06, "loss": 0.8251, "step": 13661 }, { "epoch": 0.8158366176997492, "grad_norm": 1.7961599826812744, "learning_rate": 2.046977639174574e-06, "loss": 0.8056, "step": 13662 }, { "epoch": 0.8158963334527648, "grad_norm": 1.9250792264938354, "learning_rate": 2.046314113197532e-06, "loss": 0.7627, "step": 13663 }, { "epoch": 0.8159560492057805, "grad_norm": 1.9691452980041504, "learning_rate": 2.04565058722049e-06, "loss": 0.8381, "step": 13664 }, { "epoch": 0.8160157649587961, "grad_norm": 4.939687728881836, "learning_rate": 2.0449870612434477e-06, "loss": 0.8592, "step": 13665 }, { "epoch": 0.8160754807118118, "grad_norm": 1.8750404119491577, "learning_rate": 2.044323535266406e-06, "loss": 0.7976, "step": 13666 }, { "epoch": 0.8161351964648275, "grad_norm": 2.0674283504486084, "learning_rate": 2.043660009289364e-06, "loss": 0.8362, "step": 13667 }, { "epoch": 0.816194912217843, "grad_norm": 2.40863299369812, "learning_rate": 2.042996483312322e-06, "loss": 0.8214, "step": 13668 }, { "epoch": 0.8162546279708587, "grad_norm": 4.191926956176758, "learning_rate": 2.0423329573352797e-06, "loss": 0.8756, "step": 13669 }, { "epoch": 0.8163143437238743, "grad_norm": 2.6489510536193848, "learning_rate": 2.041669431358238e-06, "loss": 0.8155, "step": 13670 }, { "epoch": 0.81637405947689, "grad_norm": 2.9016942977905273, "learning_rate": 2.041005905381196e-06, "loss": 0.8516, "step": 13671 }, { "epoch": 0.8164337752299057, "grad_norm": 2.2409729957580566, "learning_rate": 2.040342379404154e-06, "loss": 0.8327, "step": 13672 }, { "epoch": 0.8164934909829213, "grad_norm": 3.098172426223755, "learning_rate": 2.039678853427112e-06, "loss": 0.8266, "step": 13673 }, { "epoch": 0.8165532067359369, "grad_norm": 2.6211295127868652, "learning_rate": 2.0390153274500698e-06, "loss": 0.8028, "step": 13674 }, { "epoch": 0.8166129224889526, "grad_norm": 2.159677743911743, "learning_rate": 2.038351801473028e-06, "loss": 0.8245, "step": 13675 }, { "epoch": 0.8166726382419682, "grad_norm": 1.7598637342453003, "learning_rate": 2.037688275495986e-06, "loss": 0.7975, "step": 13676 }, { "epoch": 0.8167323539949839, "grad_norm": 1.7939987182617188, "learning_rate": 2.0370247495189437e-06, "loss": 0.803, "step": 13677 }, { "epoch": 0.8167920697479996, "grad_norm": 2.502142906188965, "learning_rate": 2.0363612235419018e-06, "loss": 0.8331, "step": 13678 }, { "epoch": 0.8168517855010151, "grad_norm": 5.00685453414917, "learning_rate": 2.03569769756486e-06, "loss": 0.8001, "step": 13679 }, { "epoch": 0.8169115012540308, "grad_norm": 3.0936386585235596, "learning_rate": 2.0350341715878175e-06, "loss": 0.8396, "step": 13680 }, { "epoch": 0.8169712170070464, "grad_norm": 2.3794057369232178, "learning_rate": 2.034370645610776e-06, "loss": 0.7937, "step": 13681 }, { "epoch": 0.8170309327600621, "grad_norm": 2.0511085987091064, "learning_rate": 2.0337071196337337e-06, "loss": 0.8342, "step": 13682 }, { "epoch": 0.8170906485130778, "grad_norm": 2.469560384750366, "learning_rate": 2.033043593656692e-06, "loss": 0.8399, "step": 13683 }, { "epoch": 0.8171503642660934, "grad_norm": 1.9518224000930786, "learning_rate": 2.03238006767965e-06, "loss": 0.8282, "step": 13684 }, { "epoch": 0.817210080019109, "grad_norm": 1.8323664665222168, "learning_rate": 2.0317165417026076e-06, "loss": 0.8474, "step": 13685 }, { "epoch": 0.8172697957721247, "grad_norm": 3.11010479927063, "learning_rate": 2.0310530157255657e-06, "loss": 0.7857, "step": 13686 }, { "epoch": 0.8173295115251403, "grad_norm": 2.635899066925049, "learning_rate": 2.030389489748524e-06, "loss": 0.8139, "step": 13687 }, { "epoch": 0.817389227278156, "grad_norm": 2.3328158855438232, "learning_rate": 2.029725963771482e-06, "loss": 0.8017, "step": 13688 }, { "epoch": 0.8174489430311717, "grad_norm": 1.917175054550171, "learning_rate": 2.02906243779444e-06, "loss": 0.8088, "step": 13689 }, { "epoch": 0.8175086587841872, "grad_norm": 1.832053780555725, "learning_rate": 2.0283989118173977e-06, "loss": 0.81, "step": 13690 }, { "epoch": 0.8175683745372029, "grad_norm": 2.664698839187622, "learning_rate": 2.027735385840356e-06, "loss": 0.8541, "step": 13691 }, { "epoch": 0.8176280902902185, "grad_norm": 2.6616599559783936, "learning_rate": 2.027071859863314e-06, "loss": 0.8367, "step": 13692 }, { "epoch": 0.8176878060432342, "grad_norm": 3.7330899238586426, "learning_rate": 2.026408333886272e-06, "loss": 0.7748, "step": 13693 }, { "epoch": 0.8177475217962499, "grad_norm": 2.662626266479492, "learning_rate": 2.0257448079092297e-06, "loss": 0.8389, "step": 13694 }, { "epoch": 0.8178072375492655, "grad_norm": 2.231257677078247, "learning_rate": 2.0250812819321878e-06, "loss": 0.7947, "step": 13695 }, { "epoch": 0.8178669533022811, "grad_norm": 2.283261299133301, "learning_rate": 2.024417755955146e-06, "loss": 0.7979, "step": 13696 }, { "epoch": 0.8179266690552968, "grad_norm": 2.6788947582244873, "learning_rate": 2.023754229978104e-06, "loss": 0.8187, "step": 13697 }, { "epoch": 0.8179863848083124, "grad_norm": 1.849217414855957, "learning_rate": 2.023090704001062e-06, "loss": 0.8419, "step": 13698 }, { "epoch": 0.8180461005613281, "grad_norm": 2.1479904651641846, "learning_rate": 2.0224271780240198e-06, "loss": 0.7676, "step": 13699 }, { "epoch": 0.8181058163143438, "grad_norm": 1.693228006362915, "learning_rate": 2.021763652046978e-06, "loss": 0.8464, "step": 13700 }, { "epoch": 0.8181655320673594, "grad_norm": 2.08737850189209, "learning_rate": 2.021100126069936e-06, "loss": 0.8159, "step": 13701 }, { "epoch": 0.818225247820375, "grad_norm": 4.71022367477417, "learning_rate": 2.0204366000928936e-06, "loss": 0.806, "step": 13702 }, { "epoch": 0.8182849635733906, "grad_norm": 1.929940938949585, "learning_rate": 2.0197730741158517e-06, "loss": 0.7894, "step": 13703 }, { "epoch": 0.8183446793264063, "grad_norm": 3.7635347843170166, "learning_rate": 2.01910954813881e-06, "loss": 0.8178, "step": 13704 }, { "epoch": 0.818404395079422, "grad_norm": 2.7298622131347656, "learning_rate": 2.0184460221617675e-06, "loss": 0.7928, "step": 13705 }, { "epoch": 0.8184641108324376, "grad_norm": 2.9619662761688232, "learning_rate": 2.017782496184726e-06, "loss": 0.8516, "step": 13706 }, { "epoch": 0.8185238265854532, "grad_norm": 2.505786657333374, "learning_rate": 2.0171189702076837e-06, "loss": 0.8047, "step": 13707 }, { "epoch": 0.8185835423384689, "grad_norm": 2.07362961769104, "learning_rate": 2.016455444230642e-06, "loss": 0.8264, "step": 13708 }, { "epoch": 0.8186432580914845, "grad_norm": 3.0092086791992188, "learning_rate": 2.0157919182536e-06, "loss": 0.7729, "step": 13709 }, { "epoch": 0.8187029738445002, "grad_norm": 2.1956777572631836, "learning_rate": 2.0151283922765576e-06, "loss": 0.8013, "step": 13710 }, { "epoch": 0.8187626895975159, "grad_norm": 2.213604211807251, "learning_rate": 2.0144648662995157e-06, "loss": 0.8162, "step": 13711 }, { "epoch": 0.8188224053505315, "grad_norm": 2.306438684463501, "learning_rate": 2.013801340322474e-06, "loss": 0.8056, "step": 13712 }, { "epoch": 0.8188821211035471, "grad_norm": 2.963193893432617, "learning_rate": 2.0131378143454315e-06, "loss": 0.8499, "step": 13713 }, { "epoch": 0.8189418368565627, "grad_norm": 1.8146953582763672, "learning_rate": 2.01247428836839e-06, "loss": 0.814, "step": 13714 }, { "epoch": 0.8190015526095784, "grad_norm": 2.896956443786621, "learning_rate": 2.0118107623913477e-06, "loss": 0.8049, "step": 13715 }, { "epoch": 0.8190612683625941, "grad_norm": 1.659661054611206, "learning_rate": 2.0111472364143058e-06, "loss": 0.8235, "step": 13716 }, { "epoch": 0.8191209841156097, "grad_norm": 2.9162583351135254, "learning_rate": 2.010483710437264e-06, "loss": 0.7817, "step": 13717 }, { "epoch": 0.8191806998686253, "grad_norm": 1.8738927841186523, "learning_rate": 2.0098201844602215e-06, "loss": 0.8371, "step": 13718 }, { "epoch": 0.819240415621641, "grad_norm": 2.22521710395813, "learning_rate": 2.0091566584831796e-06, "loss": 0.8199, "step": 13719 }, { "epoch": 0.8193001313746566, "grad_norm": 2.315626859664917, "learning_rate": 2.0084931325061377e-06, "loss": 0.7856, "step": 13720 }, { "epoch": 0.8193598471276723, "grad_norm": 1.7932803630828857, "learning_rate": 2.007829606529096e-06, "loss": 0.8199, "step": 13721 }, { "epoch": 0.819419562880688, "grad_norm": 2.236525774002075, "learning_rate": 2.007166080552054e-06, "loss": 0.848, "step": 13722 }, { "epoch": 0.8194792786337036, "grad_norm": 2.007789373397827, "learning_rate": 2.0065025545750116e-06, "loss": 0.8046, "step": 13723 }, { "epoch": 0.8195389943867192, "grad_norm": 1.7055476903915405, "learning_rate": 2.0058390285979697e-06, "loss": 0.8342, "step": 13724 }, { "epoch": 0.8195987101397348, "grad_norm": 4.151740550994873, "learning_rate": 2.005175502620928e-06, "loss": 0.8362, "step": 13725 }, { "epoch": 0.8196584258927505, "grad_norm": 2.066392660140991, "learning_rate": 2.004511976643886e-06, "loss": 0.7815, "step": 13726 }, { "epoch": 0.8197181416457662, "grad_norm": 1.7907404899597168, "learning_rate": 2.0038484506668436e-06, "loss": 0.7782, "step": 13727 }, { "epoch": 0.8197778573987818, "grad_norm": 1.9331384897232056, "learning_rate": 2.0031849246898017e-06, "loss": 0.8199, "step": 13728 }, { "epoch": 0.8198375731517974, "grad_norm": 3.458570957183838, "learning_rate": 2.00252139871276e-06, "loss": 0.8136, "step": 13729 }, { "epoch": 0.8198972889048131, "grad_norm": 2.7329823970794678, "learning_rate": 2.0018578727357175e-06, "loss": 0.8122, "step": 13730 }, { "epoch": 0.8199570046578287, "grad_norm": 2.538890838623047, "learning_rate": 2.001194346758676e-06, "loss": 0.7955, "step": 13731 }, { "epoch": 0.8200167204108444, "grad_norm": 2.496697425842285, "learning_rate": 2.0005308207816337e-06, "loss": 0.8313, "step": 13732 }, { "epoch": 0.8200764361638601, "grad_norm": 1.8427664041519165, "learning_rate": 1.9998672948045918e-06, "loss": 0.85, "step": 13733 }, { "epoch": 0.8201361519168757, "grad_norm": 2.1741528511047363, "learning_rate": 1.99920376882755e-06, "loss": 0.8065, "step": 13734 }, { "epoch": 0.8201958676698913, "grad_norm": 3.648865222930908, "learning_rate": 1.9985402428505076e-06, "loss": 0.8108, "step": 13735 }, { "epoch": 0.8202555834229069, "grad_norm": 3.357941150665283, "learning_rate": 1.9978767168734657e-06, "loss": 0.7901, "step": 13736 }, { "epoch": 0.8203152991759226, "grad_norm": 3.7364554405212402, "learning_rate": 1.9972131908964238e-06, "loss": 0.82, "step": 13737 }, { "epoch": 0.8203750149289383, "grad_norm": 1.6665133237838745, "learning_rate": 1.9965496649193814e-06, "loss": 0.8324, "step": 13738 }, { "epoch": 0.8204347306819539, "grad_norm": 2.0814216136932373, "learning_rate": 1.99588613894234e-06, "loss": 0.8087, "step": 13739 }, { "epoch": 0.8204944464349695, "grad_norm": 1.9994505643844604, "learning_rate": 1.9952226129652976e-06, "loss": 0.8159, "step": 13740 }, { "epoch": 0.8205541621879852, "grad_norm": 2.397141456604004, "learning_rate": 1.9945590869882557e-06, "loss": 0.8402, "step": 13741 }, { "epoch": 0.8206138779410008, "grad_norm": 2.4980356693267822, "learning_rate": 1.993895561011214e-06, "loss": 0.8091, "step": 13742 }, { "epoch": 0.8206735936940165, "grad_norm": 3.132645606994629, "learning_rate": 1.9932320350341715e-06, "loss": 0.8115, "step": 13743 }, { "epoch": 0.8207333094470322, "grad_norm": 2.561535358428955, "learning_rate": 1.9925685090571296e-06, "loss": 0.8252, "step": 13744 }, { "epoch": 0.8207930252000478, "grad_norm": 2.068950891494751, "learning_rate": 1.9919049830800877e-06, "loss": 0.8118, "step": 13745 }, { "epoch": 0.8208527409530634, "grad_norm": 2.027648687362671, "learning_rate": 1.991241457103046e-06, "loss": 0.8306, "step": 13746 }, { "epoch": 0.820912456706079, "grad_norm": 2.3235976696014404, "learning_rate": 1.990577931126004e-06, "loss": 0.8151, "step": 13747 }, { "epoch": 0.8209721724590947, "grad_norm": 1.9976155757904053, "learning_rate": 1.9899144051489616e-06, "loss": 0.8118, "step": 13748 }, { "epoch": 0.8210318882121104, "grad_norm": 2.0311028957366943, "learning_rate": 1.9892508791719197e-06, "loss": 0.8415, "step": 13749 }, { "epoch": 0.821091603965126, "grad_norm": 2.015974283218384, "learning_rate": 1.988587353194878e-06, "loss": 0.8438, "step": 13750 }, { "epoch": 0.8211513197181416, "grad_norm": 2.15354061126709, "learning_rate": 1.987923827217836e-06, "loss": 0.8244, "step": 13751 }, { "epoch": 0.8212110354711573, "grad_norm": 2.153942823410034, "learning_rate": 1.9872603012407936e-06, "loss": 0.813, "step": 13752 }, { "epoch": 0.8212707512241729, "grad_norm": 2.3390309810638428, "learning_rate": 1.9865967752637517e-06, "loss": 0.7926, "step": 13753 }, { "epoch": 0.8213304669771886, "grad_norm": 1.7970290184020996, "learning_rate": 1.9859332492867098e-06, "loss": 0.8115, "step": 13754 }, { "epoch": 0.8213901827302043, "grad_norm": 1.6879768371582031, "learning_rate": 1.9852697233096675e-06, "loss": 0.7598, "step": 13755 }, { "epoch": 0.8214498984832199, "grad_norm": 1.9651259183883667, "learning_rate": 1.984606197332626e-06, "loss": 0.8275, "step": 13756 }, { "epoch": 0.8215096142362355, "grad_norm": 2.0643391609191895, "learning_rate": 1.9839426713555837e-06, "loss": 0.8014, "step": 13757 }, { "epoch": 0.8215693299892511, "grad_norm": 2.0332188606262207, "learning_rate": 1.9832791453785418e-06, "loss": 0.8299, "step": 13758 }, { "epoch": 0.8216290457422668, "grad_norm": 7.4511284828186035, "learning_rate": 1.9826156194015e-06, "loss": 0.8222, "step": 13759 }, { "epoch": 0.8216887614952825, "grad_norm": 1.8425931930541992, "learning_rate": 1.9819520934244575e-06, "loss": 0.7933, "step": 13760 }, { "epoch": 0.8217484772482981, "grad_norm": 2.260552406311035, "learning_rate": 1.9812885674474156e-06, "loss": 0.7954, "step": 13761 }, { "epoch": 0.8218081930013137, "grad_norm": 2.2369537353515625, "learning_rate": 1.9806250414703737e-06, "loss": 0.8184, "step": 13762 }, { "epoch": 0.8218679087543294, "grad_norm": 3.5562169551849365, "learning_rate": 1.9799615154933314e-06, "loss": 0.8052, "step": 13763 }, { "epoch": 0.821927624507345, "grad_norm": 3.2023775577545166, "learning_rate": 1.97929798951629e-06, "loss": 0.7981, "step": 13764 }, { "epoch": 0.8219873402603607, "grad_norm": 3.1258163452148438, "learning_rate": 1.9786344635392476e-06, "loss": 0.8082, "step": 13765 }, { "epoch": 0.8220470560133764, "grad_norm": 2.922684907913208, "learning_rate": 1.9779709375622057e-06, "loss": 0.8095, "step": 13766 }, { "epoch": 0.822106771766392, "grad_norm": 2.460792303085327, "learning_rate": 1.977307411585164e-06, "loss": 0.8271, "step": 13767 }, { "epoch": 0.8221664875194076, "grad_norm": 2.8036038875579834, "learning_rate": 1.9766438856081215e-06, "loss": 0.823, "step": 13768 }, { "epoch": 0.8222262032724232, "grad_norm": 2.2916784286499023, "learning_rate": 1.9759803596310796e-06, "loss": 0.7895, "step": 13769 }, { "epoch": 0.8222859190254389, "grad_norm": 2.2794747352600098, "learning_rate": 1.9753168336540377e-06, "loss": 0.8022, "step": 13770 }, { "epoch": 0.8223456347784546, "grad_norm": 1.9881515502929688, "learning_rate": 1.974653307676996e-06, "loss": 0.8399, "step": 13771 }, { "epoch": 0.8224053505314702, "grad_norm": 2.483062505722046, "learning_rate": 1.973989781699954e-06, "loss": 0.7997, "step": 13772 }, { "epoch": 0.8224650662844859, "grad_norm": 2.3864309787750244, "learning_rate": 1.9733262557229116e-06, "loss": 0.8181, "step": 13773 }, { "epoch": 0.8225247820375015, "grad_norm": 1.8070439100265503, "learning_rate": 1.9726627297458697e-06, "loss": 0.8066, "step": 13774 }, { "epoch": 0.8225844977905171, "grad_norm": 2.4248428344726562, "learning_rate": 1.9719992037688278e-06, "loss": 0.8148, "step": 13775 }, { "epoch": 0.8226442135435328, "grad_norm": 1.6957648992538452, "learning_rate": 1.971335677791786e-06, "loss": 0.8143, "step": 13776 }, { "epoch": 0.8227039292965485, "grad_norm": 2.6929314136505127, "learning_rate": 1.9706721518147435e-06, "loss": 0.8071, "step": 13777 }, { "epoch": 0.8227636450495641, "grad_norm": 1.8124701976776123, "learning_rate": 1.9700086258377016e-06, "loss": 0.8247, "step": 13778 }, { "epoch": 0.8228233608025797, "grad_norm": 6.874910354614258, "learning_rate": 1.9693450998606597e-06, "loss": 0.8087, "step": 13779 }, { "epoch": 0.8228830765555953, "grad_norm": 2.3837950229644775, "learning_rate": 1.968681573883618e-06, "loss": 0.8456, "step": 13780 }, { "epoch": 0.822942792308611, "grad_norm": 2.0861384868621826, "learning_rate": 1.968018047906576e-06, "loss": 0.7899, "step": 13781 }, { "epoch": 0.8230025080616267, "grad_norm": 2.1280877590179443, "learning_rate": 1.9673545219295336e-06, "loss": 0.8255, "step": 13782 }, { "epoch": 0.8230622238146423, "grad_norm": 2.4027152061462402, "learning_rate": 1.9666909959524917e-06, "loss": 0.8405, "step": 13783 }, { "epoch": 0.823121939567658, "grad_norm": 2.8682868480682373, "learning_rate": 1.96602746997545e-06, "loss": 0.8571, "step": 13784 }, { "epoch": 0.8231816553206736, "grad_norm": 1.6438121795654297, "learning_rate": 1.9653639439984075e-06, "loss": 0.782, "step": 13785 }, { "epoch": 0.8232413710736892, "grad_norm": 1.8771723508834839, "learning_rate": 1.9647004180213656e-06, "loss": 0.7971, "step": 13786 }, { "epoch": 0.8233010868267049, "grad_norm": 2.0812366008758545, "learning_rate": 1.9640368920443237e-06, "loss": 0.8533, "step": 13787 }, { "epoch": 0.8233608025797206, "grad_norm": 1.7634958028793335, "learning_rate": 1.9633733660672814e-06, "loss": 0.8048, "step": 13788 }, { "epoch": 0.8234205183327362, "grad_norm": 2.093707799911499, "learning_rate": 1.96270984009024e-06, "loss": 0.8684, "step": 13789 }, { "epoch": 0.8234802340857518, "grad_norm": 2.1797101497650146, "learning_rate": 1.9620463141131976e-06, "loss": 0.7748, "step": 13790 }, { "epoch": 0.8235399498387674, "grad_norm": 2.8748629093170166, "learning_rate": 1.9613827881361557e-06, "loss": 0.7945, "step": 13791 }, { "epoch": 0.8235996655917831, "grad_norm": 2.2864556312561035, "learning_rate": 1.9607192621591138e-06, "loss": 0.8264, "step": 13792 }, { "epoch": 0.8236593813447988, "grad_norm": 2.253854274749756, "learning_rate": 1.9600557361820715e-06, "loss": 0.8131, "step": 13793 }, { "epoch": 0.8237190970978144, "grad_norm": 1.818390130996704, "learning_rate": 1.9593922102050296e-06, "loss": 0.8411, "step": 13794 }, { "epoch": 0.8237788128508301, "grad_norm": 2.30637788772583, "learning_rate": 1.9587286842279877e-06, "loss": 0.797, "step": 13795 }, { "epoch": 0.8238385286038457, "grad_norm": 2.1567022800445557, "learning_rate": 1.9580651582509458e-06, "loss": 0.8324, "step": 13796 }, { "epoch": 0.8238982443568613, "grad_norm": 1.833678126335144, "learning_rate": 1.957401632273904e-06, "loss": 0.82, "step": 13797 }, { "epoch": 0.823957960109877, "grad_norm": 2.0771234035491943, "learning_rate": 1.9567381062968615e-06, "loss": 0.8521, "step": 13798 }, { "epoch": 0.8240176758628927, "grad_norm": 2.108793020248413, "learning_rate": 1.9560745803198196e-06, "loss": 0.7676, "step": 13799 }, { "epoch": 0.8240773916159083, "grad_norm": 2.2487611770629883, "learning_rate": 1.9554110543427777e-06, "loss": 0.855, "step": 13800 }, { "epoch": 0.8240773916159083, "eval_text_loss": 0.8893476128578186, "eval_text_runtime": 15.1825, "eval_text_samples_per_second": 263.462, "eval_text_steps_per_second": 0.527, "step": 13800 }, { "epoch": 0.8240773916159083, "eval_image_loss": 0.5906894207000732, "eval_image_runtime": 5.0211, "eval_image_samples_per_second": 796.643, "eval_image_steps_per_second": 1.593, "step": 13800 }, { "epoch": 0.8240773916159083, "eval_video_loss": 1.0185401439666748, "eval_video_runtime": 76.6905, "eval_video_samples_per_second": 52.158, "eval_video_steps_per_second": 0.104, "step": 13800 }, { "epoch": 0.8241371073689239, "grad_norm": 1.7477664947509766, "learning_rate": 1.954747528365736e-06, "loss": 0.8134, "step": 13801 }, { "epoch": 0.8241968231219395, "grad_norm": 2.722224473953247, "learning_rate": 1.9540840023886935e-06, "loss": 0.8534, "step": 13802 }, { "epoch": 0.8242565388749552, "grad_norm": 2.704209804534912, "learning_rate": 1.9534204764116516e-06, "loss": 0.7853, "step": 13803 }, { "epoch": 0.8243162546279709, "grad_norm": 2.1021859645843506, "learning_rate": 1.9527569504346097e-06, "loss": 0.8468, "step": 13804 }, { "epoch": 0.8243759703809865, "grad_norm": 2.0967037677764893, "learning_rate": 1.952093424457568e-06, "loss": 0.7861, "step": 13805 }, { "epoch": 0.8244356861340022, "grad_norm": 2.2875607013702393, "learning_rate": 1.951429898480526e-06, "loss": 0.8234, "step": 13806 }, { "epoch": 0.8244954018870178, "grad_norm": 2.3238866329193115, "learning_rate": 1.9507663725034836e-06, "loss": 0.81, "step": 13807 }, { "epoch": 0.8245551176400334, "grad_norm": 1.9170387983322144, "learning_rate": 1.9501028465264417e-06, "loss": 0.8254, "step": 13808 }, { "epoch": 0.8246148333930491, "grad_norm": 2.047807216644287, "learning_rate": 1.9494393205494e-06, "loss": 0.8045, "step": 13809 }, { "epoch": 0.8246745491460648, "grad_norm": 1.776879906654358, "learning_rate": 1.9487757945723575e-06, "loss": 0.786, "step": 13810 }, { "epoch": 0.8247342648990804, "grad_norm": 2.1095643043518066, "learning_rate": 1.9481122685953156e-06, "loss": 0.7851, "step": 13811 }, { "epoch": 0.824793980652096, "grad_norm": 2.268903970718384, "learning_rate": 1.9474487426182737e-06, "loss": 0.7947, "step": 13812 }, { "epoch": 0.8248536964051116, "grad_norm": 2.404789924621582, "learning_rate": 1.9467852166412314e-06, "loss": 0.795, "step": 13813 }, { "epoch": 0.8249134121581273, "grad_norm": 1.8258336782455444, "learning_rate": 1.94612169066419e-06, "loss": 0.8011, "step": 13814 }, { "epoch": 0.824973127911143, "grad_norm": 1.9771069288253784, "learning_rate": 1.9454581646871476e-06, "loss": 0.8081, "step": 13815 }, { "epoch": 0.8250328436641586, "grad_norm": 2.166306734085083, "learning_rate": 1.9447946387101057e-06, "loss": 0.806, "step": 13816 }, { "epoch": 0.8250925594171743, "grad_norm": 1.919999122619629, "learning_rate": 1.9441311127330638e-06, "loss": 0.7706, "step": 13817 }, { "epoch": 0.8251522751701899, "grad_norm": 5.932955265045166, "learning_rate": 1.9434675867560214e-06, "loss": 0.8571, "step": 13818 }, { "epoch": 0.8252119909232055, "grad_norm": 2.709974765777588, "learning_rate": 1.9428040607789795e-06, "loss": 0.7895, "step": 13819 }, { "epoch": 0.8252717066762212, "grad_norm": 2.173251152038574, "learning_rate": 1.9421405348019376e-06, "loss": 0.7973, "step": 13820 }, { "epoch": 0.8253314224292368, "grad_norm": 2.6373403072357178, "learning_rate": 1.9414770088248957e-06, "loss": 0.8022, "step": 13821 }, { "epoch": 0.8253911381822525, "grad_norm": 3.1867880821228027, "learning_rate": 1.940813482847854e-06, "loss": 0.8147, "step": 13822 }, { "epoch": 0.8254508539352681, "grad_norm": 2.1893861293792725, "learning_rate": 1.9401499568708115e-06, "loss": 0.8052, "step": 13823 }, { "epoch": 0.8255105696882837, "grad_norm": 1.9453479051589966, "learning_rate": 1.9394864308937696e-06, "loss": 0.8722, "step": 13824 }, { "epoch": 0.8255702854412994, "grad_norm": 3.846646547317505, "learning_rate": 1.9388229049167277e-06, "loss": 0.8265, "step": 13825 }, { "epoch": 0.8256300011943151, "grad_norm": 3.1819345951080322, "learning_rate": 1.938159378939686e-06, "loss": 0.8457, "step": 13826 }, { "epoch": 0.8256897169473307, "grad_norm": 2.4012937545776367, "learning_rate": 1.9374958529626435e-06, "loss": 0.8048, "step": 13827 }, { "epoch": 0.8257494327003464, "grad_norm": 2.343496322631836, "learning_rate": 1.9368323269856016e-06, "loss": 0.8256, "step": 13828 }, { "epoch": 0.825809148453362, "grad_norm": 1.9392518997192383, "learning_rate": 1.9361688010085597e-06, "loss": 0.805, "step": 13829 }, { "epoch": 0.8258688642063776, "grad_norm": 2.3639822006225586, "learning_rate": 1.935505275031518e-06, "loss": 0.8055, "step": 13830 }, { "epoch": 0.8259285799593933, "grad_norm": 2.067294120788574, "learning_rate": 1.934841749054476e-06, "loss": 0.8362, "step": 13831 }, { "epoch": 0.825988295712409, "grad_norm": 2.240016222000122, "learning_rate": 1.9341782230774336e-06, "loss": 0.8301, "step": 13832 }, { "epoch": 0.8260480114654246, "grad_norm": 1.9749646186828613, "learning_rate": 1.9335146971003917e-06, "loss": 0.8047, "step": 13833 }, { "epoch": 0.8261077272184403, "grad_norm": 1.7631558179855347, "learning_rate": 1.9328511711233498e-06, "loss": 0.7979, "step": 13834 }, { "epoch": 0.8261674429714558, "grad_norm": 1.900833249092102, "learning_rate": 1.9321876451463074e-06, "loss": 0.8083, "step": 13835 }, { "epoch": 0.8262271587244715, "grad_norm": 2.8055765628814697, "learning_rate": 1.9315241191692655e-06, "loss": 0.8156, "step": 13836 }, { "epoch": 0.8262868744774872, "grad_norm": 2.871448040008545, "learning_rate": 1.9308605931922236e-06, "loss": 0.81, "step": 13837 }, { "epoch": 0.8263465902305028, "grad_norm": 2.1130566596984863, "learning_rate": 1.9301970672151813e-06, "loss": 0.8364, "step": 13838 }, { "epoch": 0.8264063059835185, "grad_norm": 1.8440076112747192, "learning_rate": 1.92953354123814e-06, "loss": 0.7909, "step": 13839 }, { "epoch": 0.826466021736534, "grad_norm": 3.28552508354187, "learning_rate": 1.9288700152610975e-06, "loss": 0.818, "step": 13840 }, { "epoch": 0.8265257374895497, "grad_norm": 3.6515018939971924, "learning_rate": 1.9282064892840556e-06, "loss": 0.851, "step": 13841 }, { "epoch": 0.8265854532425654, "grad_norm": 2.885063886642456, "learning_rate": 1.9275429633070137e-06, "loss": 0.8193, "step": 13842 }, { "epoch": 0.826645168995581, "grad_norm": 2.3076164722442627, "learning_rate": 1.9268794373299714e-06, "loss": 0.8211, "step": 13843 }, { "epoch": 0.8267048847485967, "grad_norm": 2.180647850036621, "learning_rate": 1.9262159113529295e-06, "loss": 0.832, "step": 13844 }, { "epoch": 0.8267646005016124, "grad_norm": 2.0728516578674316, "learning_rate": 1.9255523853758876e-06, "loss": 0.8301, "step": 13845 }, { "epoch": 0.8268243162546279, "grad_norm": 1.6293060779571533, "learning_rate": 1.9248888593988457e-06, "loss": 0.7901, "step": 13846 }, { "epoch": 0.8268840320076436, "grad_norm": 2.0602211952209473, "learning_rate": 1.924225333421804e-06, "loss": 0.8014, "step": 13847 }, { "epoch": 0.8269437477606593, "grad_norm": 2.42895245552063, "learning_rate": 1.9235618074447615e-06, "loss": 0.832, "step": 13848 }, { "epoch": 0.8270034635136749, "grad_norm": 2.601365566253662, "learning_rate": 1.9228982814677196e-06, "loss": 0.8055, "step": 13849 }, { "epoch": 0.8270631792666906, "grad_norm": 2.5417284965515137, "learning_rate": 1.9222347554906777e-06, "loss": 0.8319, "step": 13850 }, { "epoch": 0.8271228950197062, "grad_norm": 2.643968343734741, "learning_rate": 1.9215712295136358e-06, "loss": 0.8724, "step": 13851 }, { "epoch": 0.8271826107727218, "grad_norm": 10.974205017089844, "learning_rate": 1.9209077035365935e-06, "loss": 0.8134, "step": 13852 }, { "epoch": 0.8272423265257375, "grad_norm": 1.7414864301681519, "learning_rate": 1.9202441775595516e-06, "loss": 0.835, "step": 13853 }, { "epoch": 0.8273020422787531, "grad_norm": 4.069952011108398, "learning_rate": 1.9195806515825097e-06, "loss": 0.8378, "step": 13854 }, { "epoch": 0.8273617580317688, "grad_norm": 2.333341360092163, "learning_rate": 1.9189171256054678e-06, "loss": 0.8748, "step": 13855 }, { "epoch": 0.8274214737847845, "grad_norm": 3.0861549377441406, "learning_rate": 1.918253599628426e-06, "loss": 0.7715, "step": 13856 }, { "epoch": 0.8274811895378, "grad_norm": 2.8743691444396973, "learning_rate": 1.9175900736513835e-06, "loss": 0.8092, "step": 13857 }, { "epoch": 0.8275409052908157, "grad_norm": 2.0209057331085205, "learning_rate": 1.9169265476743416e-06, "loss": 0.8198, "step": 13858 }, { "epoch": 0.8276006210438314, "grad_norm": 2.6439194679260254, "learning_rate": 1.9162630216972997e-06, "loss": 0.8304, "step": 13859 }, { "epoch": 0.827660336796847, "grad_norm": 1.7353001832962036, "learning_rate": 1.9155994957202574e-06, "loss": 0.8174, "step": 13860 }, { "epoch": 0.8277200525498627, "grad_norm": 2.093165636062622, "learning_rate": 1.9149359697432155e-06, "loss": 0.821, "step": 13861 }, { "epoch": 0.8277797683028782, "grad_norm": 2.0244321823120117, "learning_rate": 1.9142724437661736e-06, "loss": 0.7882, "step": 13862 }, { "epoch": 0.8278394840558939, "grad_norm": 3.3203001022338867, "learning_rate": 1.9136089177891313e-06, "loss": 0.8273, "step": 13863 }, { "epoch": 0.8278991998089096, "grad_norm": 1.6338645219802856, "learning_rate": 1.91294539181209e-06, "loss": 0.7962, "step": 13864 }, { "epoch": 0.8279589155619252, "grad_norm": 1.9825692176818848, "learning_rate": 1.9122818658350475e-06, "loss": 0.7821, "step": 13865 }, { "epoch": 0.8280186313149409, "grad_norm": 2.2012600898742676, "learning_rate": 1.9116183398580056e-06, "loss": 0.79, "step": 13866 }, { "epoch": 0.8280783470679566, "grad_norm": 2.633997917175293, "learning_rate": 1.9109548138809637e-06, "loss": 0.7937, "step": 13867 }, { "epoch": 0.8281380628209721, "grad_norm": 1.7441107034683228, "learning_rate": 1.9102912879039214e-06, "loss": 0.8208, "step": 13868 }, { "epoch": 0.8281977785739878, "grad_norm": 3.685474395751953, "learning_rate": 1.9096277619268795e-06, "loss": 0.8093, "step": 13869 }, { "epoch": 0.8282574943270035, "grad_norm": 2.1851823329925537, "learning_rate": 1.9089642359498376e-06, "loss": 0.817, "step": 13870 }, { "epoch": 0.8283172100800191, "grad_norm": 3.9697253704071045, "learning_rate": 1.9083007099727957e-06, "loss": 0.8291, "step": 13871 }, { "epoch": 0.8283769258330348, "grad_norm": 2.1519017219543457, "learning_rate": 1.9076371839957538e-06, "loss": 0.7676, "step": 13872 }, { "epoch": 0.8284366415860503, "grad_norm": 2.4554812908172607, "learning_rate": 1.9069736580187115e-06, "loss": 0.8255, "step": 13873 }, { "epoch": 0.828496357339066, "grad_norm": 3.831061601638794, "learning_rate": 1.9063101320416696e-06, "loss": 0.7933, "step": 13874 }, { "epoch": 0.8285560730920817, "grad_norm": 1.695508360862732, "learning_rate": 1.9056466060646274e-06, "loss": 0.7896, "step": 13875 }, { "epoch": 0.8286157888450973, "grad_norm": 1.7885890007019043, "learning_rate": 1.9049830800875858e-06, "loss": 0.8283, "step": 13876 }, { "epoch": 0.828675504598113, "grad_norm": 3.3068439960479736, "learning_rate": 1.9043195541105436e-06, "loss": 0.8125, "step": 13877 }, { "epoch": 0.8287352203511287, "grad_norm": 2.494164228439331, "learning_rate": 1.9036560281335015e-06, "loss": 0.8174, "step": 13878 }, { "epoch": 0.8287949361041442, "grad_norm": 4.318349838256836, "learning_rate": 1.9029925021564596e-06, "loss": 0.8344, "step": 13879 }, { "epoch": 0.8288546518571599, "grad_norm": 2.532639980316162, "learning_rate": 1.9023289761794175e-06, "loss": 0.8111, "step": 13880 }, { "epoch": 0.8289143676101756, "grad_norm": 2.1069741249084473, "learning_rate": 1.9016654502023756e-06, "loss": 0.8097, "step": 13881 }, { "epoch": 0.8289740833631912, "grad_norm": 2.8213462829589844, "learning_rate": 1.9010019242253335e-06, "loss": 0.7958, "step": 13882 }, { "epoch": 0.8290337991162069, "grad_norm": 2.200437068939209, "learning_rate": 1.9003383982482914e-06, "loss": 0.8265, "step": 13883 }, { "epoch": 0.8290935148692224, "grad_norm": 1.9956016540527344, "learning_rate": 1.8996748722712497e-06, "loss": 0.7786, "step": 13884 }, { "epoch": 0.8291532306222381, "grad_norm": 2.4438905715942383, "learning_rate": 1.8990113462942076e-06, "loss": 0.7928, "step": 13885 }, { "epoch": 0.8292129463752538, "grad_norm": 2.078941583633423, "learning_rate": 1.8983478203171657e-06, "loss": 0.8245, "step": 13886 }, { "epoch": 0.8292726621282694, "grad_norm": 3.696626663208008, "learning_rate": 1.8976842943401236e-06, "loss": 0.828, "step": 13887 }, { "epoch": 0.8293323778812851, "grad_norm": 2.1131465435028076, "learning_rate": 1.8970207683630815e-06, "loss": 0.7931, "step": 13888 }, { "epoch": 0.8293920936343008, "grad_norm": 1.9882968664169312, "learning_rate": 1.8963572423860396e-06, "loss": 0.8238, "step": 13889 }, { "epoch": 0.8294518093873163, "grad_norm": 2.415656089782715, "learning_rate": 1.8956937164089975e-06, "loss": 0.858, "step": 13890 }, { "epoch": 0.829511525140332, "grad_norm": 1.8402994871139526, "learning_rate": 1.8950301904319558e-06, "loss": 0.8149, "step": 13891 }, { "epoch": 0.8295712408933477, "grad_norm": 3.0443360805511475, "learning_rate": 1.8943666644549137e-06, "loss": 0.823, "step": 13892 }, { "epoch": 0.8296309566463633, "grad_norm": 2.6637158393859863, "learning_rate": 1.8937031384778716e-06, "loss": 0.8039, "step": 13893 }, { "epoch": 0.829690672399379, "grad_norm": 2.254110097885132, "learning_rate": 1.8930396125008297e-06, "loss": 0.7981, "step": 13894 }, { "epoch": 0.8297503881523945, "grad_norm": 1.5883326530456543, "learning_rate": 1.8923760865237875e-06, "loss": 0.8324, "step": 13895 }, { "epoch": 0.8298101039054102, "grad_norm": 1.7657256126403809, "learning_rate": 1.8917125605467456e-06, "loss": 0.8104, "step": 13896 }, { "epoch": 0.8298698196584259, "grad_norm": 1.9497753381729126, "learning_rate": 1.8910490345697035e-06, "loss": 0.8278, "step": 13897 }, { "epoch": 0.8299295354114415, "grad_norm": 2.482919216156006, "learning_rate": 1.8903855085926614e-06, "loss": 0.7966, "step": 13898 }, { "epoch": 0.8299892511644572, "grad_norm": 2.2906458377838135, "learning_rate": 1.8897219826156195e-06, "loss": 0.8482, "step": 13899 }, { "epoch": 0.8300489669174729, "grad_norm": 2.355438470840454, "learning_rate": 1.8890584566385774e-06, "loss": 0.8078, "step": 13900 }, { "epoch": 0.8301086826704884, "grad_norm": 2.131286859512329, "learning_rate": 1.8883949306615357e-06, "loss": 0.8216, "step": 13901 }, { "epoch": 0.8301683984235041, "grad_norm": 2.7617788314819336, "learning_rate": 1.8877314046844936e-06, "loss": 0.8514, "step": 13902 }, { "epoch": 0.8302281141765198, "grad_norm": 1.9415576457977295, "learning_rate": 1.8870678787074515e-06, "loss": 0.8077, "step": 13903 }, { "epoch": 0.8302878299295354, "grad_norm": 7.776114463806152, "learning_rate": 1.8864043527304096e-06, "loss": 0.8684, "step": 13904 }, { "epoch": 0.8303475456825511, "grad_norm": 3.2011501789093018, "learning_rate": 1.8857408267533675e-06, "loss": 0.8008, "step": 13905 }, { "epoch": 0.8304072614355668, "grad_norm": 1.9696685075759888, "learning_rate": 1.8850773007763256e-06, "loss": 0.8198, "step": 13906 }, { "epoch": 0.8304669771885823, "grad_norm": 1.8388991355895996, "learning_rate": 1.8844137747992835e-06, "loss": 0.7888, "step": 13907 }, { "epoch": 0.830526692941598, "grad_norm": 2.302429437637329, "learning_rate": 1.8837502488222414e-06, "loss": 0.8297, "step": 13908 }, { "epoch": 0.8305864086946136, "grad_norm": 1.8316384553909302, "learning_rate": 1.8830867228451997e-06, "loss": 0.8169, "step": 13909 }, { "epoch": 0.8306461244476293, "grad_norm": 1.8079148530960083, "learning_rate": 1.8824231968681576e-06, "loss": 0.8158, "step": 13910 }, { "epoch": 0.830705840200645, "grad_norm": 2.505723237991333, "learning_rate": 1.8817596708911157e-06, "loss": 0.8231, "step": 13911 }, { "epoch": 0.8307655559536605, "grad_norm": 11.64832878112793, "learning_rate": 1.8810961449140736e-06, "loss": 0.8197, "step": 13912 }, { "epoch": 0.8308252717066762, "grad_norm": 2.140784978866577, "learning_rate": 1.8804326189370314e-06, "loss": 0.8263, "step": 13913 }, { "epoch": 0.8308849874596919, "grad_norm": 2.627126693725586, "learning_rate": 1.8797690929599895e-06, "loss": 0.813, "step": 13914 }, { "epoch": 0.8309447032127075, "grad_norm": 2.1654272079467773, "learning_rate": 1.8791055669829474e-06, "loss": 0.814, "step": 13915 }, { "epoch": 0.8310044189657232, "grad_norm": 1.926430106163025, "learning_rate": 1.8784420410059057e-06, "loss": 0.8382, "step": 13916 }, { "epoch": 0.8310641347187389, "grad_norm": 2.2547640800476074, "learning_rate": 1.8777785150288636e-06, "loss": 0.7938, "step": 13917 }, { "epoch": 0.8311238504717544, "grad_norm": 2.3998489379882812, "learning_rate": 1.8771149890518215e-06, "loss": 0.826, "step": 13918 }, { "epoch": 0.8311835662247701, "grad_norm": 2.525938034057617, "learning_rate": 1.8764514630747796e-06, "loss": 0.8357, "step": 13919 }, { "epoch": 0.8312432819777857, "grad_norm": 2.4911458492279053, "learning_rate": 1.8757879370977375e-06, "loss": 0.8189, "step": 13920 }, { "epoch": 0.8313029977308014, "grad_norm": 2.7119455337524414, "learning_rate": 1.8751244111206956e-06, "loss": 0.795, "step": 13921 }, { "epoch": 0.8313627134838171, "grad_norm": 2.5015923976898193, "learning_rate": 1.8744608851436535e-06, "loss": 0.794, "step": 13922 }, { "epoch": 0.8314224292368326, "grad_norm": 2.362048625946045, "learning_rate": 1.8737973591666114e-06, "loss": 0.8441, "step": 13923 }, { "epoch": 0.8314821449898483, "grad_norm": 2.6643459796905518, "learning_rate": 1.8731338331895695e-06, "loss": 0.8085, "step": 13924 }, { "epoch": 0.831541860742864, "grad_norm": 1.8482271432876587, "learning_rate": 1.8724703072125274e-06, "loss": 0.8284, "step": 13925 }, { "epoch": 0.8316015764958796, "grad_norm": 2.187140703201294, "learning_rate": 1.8718067812354857e-06, "loss": 0.8091, "step": 13926 }, { "epoch": 0.8316612922488953, "grad_norm": 2.4313509464263916, "learning_rate": 1.8711432552584436e-06, "loss": 0.8436, "step": 13927 }, { "epoch": 0.831721008001911, "grad_norm": 2.944448709487915, "learning_rate": 1.8704797292814015e-06, "loss": 0.7928, "step": 13928 }, { "epoch": 0.8317807237549265, "grad_norm": 2.528791666030884, "learning_rate": 1.8698162033043596e-06, "loss": 0.8511, "step": 13929 }, { "epoch": 0.8318404395079422, "grad_norm": 3.4060862064361572, "learning_rate": 1.8691526773273175e-06, "loss": 0.8318, "step": 13930 }, { "epoch": 0.8319001552609578, "grad_norm": 1.7778204679489136, "learning_rate": 1.8684891513502756e-06, "loss": 0.8076, "step": 13931 }, { "epoch": 0.8319598710139735, "grad_norm": 2.346238136291504, "learning_rate": 1.8678256253732335e-06, "loss": 0.7694, "step": 13932 }, { "epoch": 0.8320195867669892, "grad_norm": 2.4680755138397217, "learning_rate": 1.8671620993961913e-06, "loss": 0.8039, "step": 13933 }, { "epoch": 0.8320793025200047, "grad_norm": 2.225337028503418, "learning_rate": 1.8664985734191497e-06, "loss": 0.8208, "step": 13934 }, { "epoch": 0.8321390182730204, "grad_norm": 1.9628666639328003, "learning_rate": 1.8658350474421075e-06, "loss": 0.7871, "step": 13935 }, { "epoch": 0.8321987340260361, "grad_norm": 1.9911644458770752, "learning_rate": 1.8651715214650656e-06, "loss": 0.8234, "step": 13936 }, { "epoch": 0.8322584497790517, "grad_norm": 2.157536506652832, "learning_rate": 1.8645079954880235e-06, "loss": 0.812, "step": 13937 }, { "epoch": 0.8323181655320674, "grad_norm": 2.8942654132843018, "learning_rate": 1.8638444695109814e-06, "loss": 0.8052, "step": 13938 }, { "epoch": 0.832377881285083, "grad_norm": 4.260815620422363, "learning_rate": 1.8631809435339395e-06, "loss": 0.8074, "step": 13939 }, { "epoch": 0.8324375970380986, "grad_norm": 3.253868818283081, "learning_rate": 1.8625174175568974e-06, "loss": 0.8252, "step": 13940 }, { "epoch": 0.8324973127911143, "grad_norm": 2.1789164543151855, "learning_rate": 1.8618538915798557e-06, "loss": 0.8423, "step": 13941 }, { "epoch": 0.8325570285441299, "grad_norm": 2.6863315105438232, "learning_rate": 1.8611903656028136e-06, "loss": 0.8335, "step": 13942 }, { "epoch": 0.8326167442971456, "grad_norm": 1.9276632070541382, "learning_rate": 1.8605268396257715e-06, "loss": 0.7888, "step": 13943 }, { "epoch": 0.8326764600501613, "grad_norm": 1.682695746421814, "learning_rate": 1.8598633136487296e-06, "loss": 0.8527, "step": 13944 }, { "epoch": 0.8327361758031768, "grad_norm": 1.8034672737121582, "learning_rate": 1.8591997876716875e-06, "loss": 0.8183, "step": 13945 }, { "epoch": 0.8327958915561925, "grad_norm": 2.3283920288085938, "learning_rate": 1.8585362616946456e-06, "loss": 0.8481, "step": 13946 }, { "epoch": 0.8328556073092082, "grad_norm": 2.038672685623169, "learning_rate": 1.8578727357176035e-06, "loss": 0.8304, "step": 13947 }, { "epoch": 0.8329153230622238, "grad_norm": 2.1765213012695312, "learning_rate": 1.8572092097405614e-06, "loss": 0.8313, "step": 13948 }, { "epoch": 0.8329750388152395, "grad_norm": 2.1699516773223877, "learning_rate": 1.8565456837635195e-06, "loss": 0.8325, "step": 13949 }, { "epoch": 0.8330347545682552, "grad_norm": 1.6257572174072266, "learning_rate": 1.8558821577864774e-06, "loss": 0.8126, "step": 13950 }, { "epoch": 0.8330944703212707, "grad_norm": 2.2318999767303467, "learning_rate": 1.8552186318094357e-06, "loss": 0.7867, "step": 13951 }, { "epoch": 0.8331541860742864, "grad_norm": 1.835957407951355, "learning_rate": 1.8545551058323936e-06, "loss": 0.8001, "step": 13952 }, { "epoch": 0.833213901827302, "grad_norm": 2.2018778324127197, "learning_rate": 1.8538915798553514e-06, "loss": 0.8219, "step": 13953 }, { "epoch": 0.8332736175803177, "grad_norm": 1.8986507654190063, "learning_rate": 1.8532280538783095e-06, "loss": 0.8192, "step": 13954 }, { "epoch": 0.8333333333333334, "grad_norm": 2.375913619995117, "learning_rate": 1.8525645279012674e-06, "loss": 0.812, "step": 13955 }, { "epoch": 0.8333930490863489, "grad_norm": 1.6869300603866577, "learning_rate": 1.8519010019242255e-06, "loss": 0.8055, "step": 13956 }, { "epoch": 0.8334527648393646, "grad_norm": 2.199854612350464, "learning_rate": 1.8512374759471834e-06, "loss": 0.8111, "step": 13957 }, { "epoch": 0.8335124805923803, "grad_norm": 6.749666213989258, "learning_rate": 1.8505739499701413e-06, "loss": 0.8164, "step": 13958 }, { "epoch": 0.8335721963453959, "grad_norm": 2.8319244384765625, "learning_rate": 1.8499104239930996e-06, "loss": 0.8127, "step": 13959 }, { "epoch": 0.8336319120984116, "grad_norm": 2.130483627319336, "learning_rate": 1.8492468980160575e-06, "loss": 0.8378, "step": 13960 }, { "epoch": 0.8336916278514273, "grad_norm": 1.654826045036316, "learning_rate": 1.8485833720390156e-06, "loss": 0.8005, "step": 13961 }, { "epoch": 0.8337513436044428, "grad_norm": 2.8031439781188965, "learning_rate": 1.8479198460619735e-06, "loss": 0.7834, "step": 13962 }, { "epoch": 0.8338110593574585, "grad_norm": 8.591784477233887, "learning_rate": 1.8472563200849314e-06, "loss": 0.8139, "step": 13963 }, { "epoch": 0.8338707751104741, "grad_norm": 3.1438591480255127, "learning_rate": 1.8465927941078895e-06, "loss": 0.814, "step": 13964 }, { "epoch": 0.8339304908634898, "grad_norm": 2.354123592376709, "learning_rate": 1.8459292681308474e-06, "loss": 0.8469, "step": 13965 }, { "epoch": 0.8339902066165055, "grad_norm": 2.3055145740509033, "learning_rate": 1.8452657421538057e-06, "loss": 0.8089, "step": 13966 }, { "epoch": 0.8340499223695211, "grad_norm": 1.7066022157669067, "learning_rate": 1.8446022161767636e-06, "loss": 0.7909, "step": 13967 }, { "epoch": 0.8341096381225367, "grad_norm": 1.8780478239059448, "learning_rate": 1.8439386901997215e-06, "loss": 0.8191, "step": 13968 }, { "epoch": 0.8341693538755524, "grad_norm": 2.558443307876587, "learning_rate": 1.8432751642226796e-06, "loss": 0.8279, "step": 13969 }, { "epoch": 0.834229069628568, "grad_norm": 1.9383950233459473, "learning_rate": 1.8426116382456375e-06, "loss": 0.8421, "step": 13970 }, { "epoch": 0.8342887853815837, "grad_norm": 1.6651757955551147, "learning_rate": 1.8419481122685956e-06, "loss": 0.7837, "step": 13971 }, { "epoch": 0.8343485011345994, "grad_norm": 3.7736144065856934, "learning_rate": 1.8412845862915534e-06, "loss": 0.8292, "step": 13972 }, { "epoch": 0.8344082168876149, "grad_norm": 2.018446683883667, "learning_rate": 1.8406210603145113e-06, "loss": 0.838, "step": 13973 }, { "epoch": 0.8344679326406306, "grad_norm": 3.610280752182007, "learning_rate": 1.8399575343374696e-06, "loss": 0.8026, "step": 13974 }, { "epoch": 0.8345276483936462, "grad_norm": 1.909174919128418, "learning_rate": 1.8392940083604275e-06, "loss": 0.802, "step": 13975 }, { "epoch": 0.8345873641466619, "grad_norm": 1.8074201345443726, "learning_rate": 1.8386304823833856e-06, "loss": 0.8234, "step": 13976 }, { "epoch": 0.8346470798996776, "grad_norm": 2.247634172439575, "learning_rate": 1.8379669564063435e-06, "loss": 0.7846, "step": 13977 }, { "epoch": 0.8347067956526932, "grad_norm": 2.1467578411102295, "learning_rate": 1.8373034304293014e-06, "loss": 0.8489, "step": 13978 }, { "epoch": 0.8347665114057088, "grad_norm": 2.181353807449341, "learning_rate": 1.8366399044522595e-06, "loss": 0.8325, "step": 13979 }, { "epoch": 0.8348262271587245, "grad_norm": 1.754921793937683, "learning_rate": 1.8359763784752174e-06, "loss": 0.8109, "step": 13980 }, { "epoch": 0.8348859429117401, "grad_norm": 2.054802656173706, "learning_rate": 1.8353128524981755e-06, "loss": 0.8223, "step": 13981 }, { "epoch": 0.8349456586647558, "grad_norm": 2.0905754566192627, "learning_rate": 1.8346493265211334e-06, "loss": 0.8352, "step": 13982 }, { "epoch": 0.8350053744177715, "grad_norm": 2.5701513290405273, "learning_rate": 1.8339858005440913e-06, "loss": 0.8507, "step": 13983 }, { "epoch": 0.835065090170787, "grad_norm": 1.7130757570266724, "learning_rate": 1.8333222745670496e-06, "loss": 0.8106, "step": 13984 }, { "epoch": 0.8351248059238027, "grad_norm": 2.333280563354492, "learning_rate": 1.8326587485900075e-06, "loss": 0.8186, "step": 13985 }, { "epoch": 0.8351845216768183, "grad_norm": 2.6002390384674072, "learning_rate": 1.8319952226129656e-06, "loss": 0.8197, "step": 13986 }, { "epoch": 0.835244237429834, "grad_norm": 1.8126922845840454, "learning_rate": 1.8313316966359235e-06, "loss": 0.8282, "step": 13987 }, { "epoch": 0.8353039531828497, "grad_norm": 3.2114648818969727, "learning_rate": 1.8306681706588814e-06, "loss": 0.8119, "step": 13988 }, { "epoch": 0.8353636689358653, "grad_norm": 2.337764263153076, "learning_rate": 1.8300046446818395e-06, "loss": 0.8178, "step": 13989 }, { "epoch": 0.8354233846888809, "grad_norm": 1.8659635782241821, "learning_rate": 1.8293411187047973e-06, "loss": 0.8297, "step": 13990 }, { "epoch": 0.8354831004418966, "grad_norm": 3.8290205001831055, "learning_rate": 1.8286775927277557e-06, "loss": 0.8213, "step": 13991 }, { "epoch": 0.8355428161949122, "grad_norm": 1.7795517444610596, "learning_rate": 1.8280140667507135e-06, "loss": 0.7778, "step": 13992 }, { "epoch": 0.8356025319479279, "grad_norm": 2.100987195968628, "learning_rate": 1.8273505407736714e-06, "loss": 0.8449, "step": 13993 }, { "epoch": 0.8356622477009436, "grad_norm": 2.016352653503418, "learning_rate": 1.8266870147966295e-06, "loss": 0.8305, "step": 13994 }, { "epoch": 0.8357219634539591, "grad_norm": 2.2118940353393555, "learning_rate": 1.8260234888195874e-06, "loss": 0.8187, "step": 13995 }, { "epoch": 0.8357816792069748, "grad_norm": 2.3116328716278076, "learning_rate": 1.8253599628425453e-06, "loss": 0.8212, "step": 13996 }, { "epoch": 0.8358413949599904, "grad_norm": 2.9715609550476074, "learning_rate": 1.8246964368655034e-06, "loss": 0.836, "step": 13997 }, { "epoch": 0.8359011107130061, "grad_norm": 2.2965643405914307, "learning_rate": 1.8240329108884613e-06, "loss": 0.8432, "step": 13998 }, { "epoch": 0.8359608264660218, "grad_norm": 2.274953842163086, "learning_rate": 1.8233693849114196e-06, "loss": 0.8709, "step": 13999 }, { "epoch": 0.8360205422190374, "grad_norm": 1.7169077396392822, "learning_rate": 1.8227058589343775e-06, "loss": 0.8154, "step": 14000 }, { "epoch": 0.8360205422190374, "eval_text_loss": 0.8887479305267334, "eval_text_runtime": 15.6635, "eval_text_samples_per_second": 255.37, "eval_text_steps_per_second": 0.511, "step": 14000 }, { "epoch": 0.8360205422190374, "eval_image_loss": 0.5900670886039734, "eval_image_runtime": 5.7475, "eval_image_samples_per_second": 695.955, "eval_image_steps_per_second": 1.392, "step": 14000 }, { "epoch": 0.8360205422190374, "eval_video_loss": 1.0173320770263672, "eval_video_runtime": 80.0933, "eval_video_samples_per_second": 49.942, "eval_video_steps_per_second": 0.1, "step": 14000 }, { "epoch": 0.836080257972053, "grad_norm": 1.6274847984313965, "learning_rate": 1.8220423329573354e-06, "loss": 0.8087, "step": 14001 }, { "epoch": 0.8361399737250687, "grad_norm": 1.7462787628173828, "learning_rate": 1.8213788069802935e-06, "loss": 0.8356, "step": 14002 }, { "epoch": 0.8361996894780843, "grad_norm": 2.064300298690796, "learning_rate": 1.8207152810032514e-06, "loss": 0.8289, "step": 14003 }, { "epoch": 0.8362594052311, "grad_norm": 1.8373793363571167, "learning_rate": 1.8200517550262095e-06, "loss": 0.7954, "step": 14004 }, { "epoch": 0.8363191209841156, "grad_norm": 4.2361860275268555, "learning_rate": 1.8193882290491674e-06, "loss": 0.8547, "step": 14005 }, { "epoch": 0.8363788367371312, "grad_norm": 2.063840627670288, "learning_rate": 1.8187247030721253e-06, "loss": 0.8172, "step": 14006 }, { "epoch": 0.8364385524901469, "grad_norm": 1.974257469177246, "learning_rate": 1.8180611770950834e-06, "loss": 0.8417, "step": 14007 }, { "epoch": 0.8364982682431625, "grad_norm": 1.795252799987793, "learning_rate": 1.8173976511180413e-06, "loss": 0.8516, "step": 14008 }, { "epoch": 0.8365579839961782, "grad_norm": 1.901954174041748, "learning_rate": 1.8167341251409996e-06, "loss": 0.7864, "step": 14009 }, { "epoch": 0.8366176997491939, "grad_norm": 2.0186026096343994, "learning_rate": 1.8160705991639575e-06, "loss": 0.8055, "step": 14010 }, { "epoch": 0.8366774155022095, "grad_norm": 2.144063711166382, "learning_rate": 1.8154070731869153e-06, "loss": 0.8231, "step": 14011 }, { "epoch": 0.8367371312552251, "grad_norm": 2.1852455139160156, "learning_rate": 1.8147435472098734e-06, "loss": 0.8424, "step": 14012 }, { "epoch": 0.8367968470082408, "grad_norm": 2.046929121017456, "learning_rate": 1.8140800212328313e-06, "loss": 0.8162, "step": 14013 }, { "epoch": 0.8368565627612564, "grad_norm": 2.256300210952759, "learning_rate": 1.8134164952557894e-06, "loss": 0.8288, "step": 14014 }, { "epoch": 0.8369162785142721, "grad_norm": 1.9993699789047241, "learning_rate": 1.8127529692787473e-06, "loss": 0.8105, "step": 14015 }, { "epoch": 0.8369759942672877, "grad_norm": 1.88297700881958, "learning_rate": 1.8120894433017052e-06, "loss": 0.7607, "step": 14016 }, { "epoch": 0.8370357100203033, "grad_norm": 3.667865753173828, "learning_rate": 1.8114259173246635e-06, "loss": 0.8438, "step": 14017 }, { "epoch": 0.837095425773319, "grad_norm": 2.698984384536743, "learning_rate": 1.8107623913476214e-06, "loss": 0.8267, "step": 14018 }, { "epoch": 0.8371551415263346, "grad_norm": 2.910121202468872, "learning_rate": 1.8100988653705795e-06, "loss": 0.8273, "step": 14019 }, { "epoch": 0.8372148572793503, "grad_norm": 1.818569540977478, "learning_rate": 1.8094353393935374e-06, "loss": 0.8069, "step": 14020 }, { "epoch": 0.837274573032366, "grad_norm": 2.0169496536254883, "learning_rate": 1.8087718134164953e-06, "loss": 0.814, "step": 14021 }, { "epoch": 0.8373342887853816, "grad_norm": 6.135181427001953, "learning_rate": 1.8081082874394534e-06, "loss": 0.8168, "step": 14022 }, { "epoch": 0.8373940045383972, "grad_norm": 2.56821346282959, "learning_rate": 1.8074447614624113e-06, "loss": 0.8508, "step": 14023 }, { "epoch": 0.8374537202914129, "grad_norm": 1.918373465538025, "learning_rate": 1.8067812354853696e-06, "loss": 0.842, "step": 14024 }, { "epoch": 0.8375134360444285, "grad_norm": 5.319711685180664, "learning_rate": 1.8061177095083275e-06, "loss": 0.8176, "step": 14025 }, { "epoch": 0.8375731517974442, "grad_norm": 2.226996660232544, "learning_rate": 1.8054541835312854e-06, "loss": 0.8118, "step": 14026 }, { "epoch": 0.8376328675504598, "grad_norm": 2.1535890102386475, "learning_rate": 1.8047906575542435e-06, "loss": 0.8121, "step": 14027 }, { "epoch": 0.8376925833034754, "grad_norm": 2.066568374633789, "learning_rate": 1.8041271315772014e-06, "loss": 0.8183, "step": 14028 }, { "epoch": 0.8377522990564911, "grad_norm": 1.9707484245300293, "learning_rate": 1.8034636056001595e-06, "loss": 0.827, "step": 14029 }, { "epoch": 0.8378120148095067, "grad_norm": 2.478444814682007, "learning_rate": 1.8028000796231173e-06, "loss": 0.8423, "step": 14030 }, { "epoch": 0.8378717305625224, "grad_norm": 2.1562955379486084, "learning_rate": 1.8021365536460752e-06, "loss": 0.8441, "step": 14031 }, { "epoch": 0.8379314463155381, "grad_norm": 3.586050033569336, "learning_rate": 1.8014730276690333e-06, "loss": 0.8024, "step": 14032 }, { "epoch": 0.8379911620685537, "grad_norm": 1.8104827404022217, "learning_rate": 1.8008095016919912e-06, "loss": 0.7874, "step": 14033 }, { "epoch": 0.8380508778215693, "grad_norm": 2.1989188194274902, "learning_rate": 1.8001459757149495e-06, "loss": 0.7978, "step": 14034 }, { "epoch": 0.838110593574585, "grad_norm": 2.0087344646453857, "learning_rate": 1.7994824497379074e-06, "loss": 0.8157, "step": 14035 }, { "epoch": 0.8381703093276006, "grad_norm": 2.0112216472625732, "learning_rate": 1.7988189237608653e-06, "loss": 0.7795, "step": 14036 }, { "epoch": 0.8382300250806163, "grad_norm": 2.291252851486206, "learning_rate": 1.7981553977838234e-06, "loss": 0.8288, "step": 14037 }, { "epoch": 0.838289740833632, "grad_norm": 3.07431697845459, "learning_rate": 1.7974918718067813e-06, "loss": 0.8627, "step": 14038 }, { "epoch": 0.8383494565866476, "grad_norm": 2.045463800430298, "learning_rate": 1.7968283458297394e-06, "loss": 0.8504, "step": 14039 }, { "epoch": 0.8384091723396632, "grad_norm": 2.5305445194244385, "learning_rate": 1.7961648198526973e-06, "loss": 0.8434, "step": 14040 }, { "epoch": 0.8384688880926788, "grad_norm": 1.7668155431747437, "learning_rate": 1.7955012938756552e-06, "loss": 0.8286, "step": 14041 }, { "epoch": 0.8385286038456945, "grad_norm": 2.099252700805664, "learning_rate": 1.7948377678986135e-06, "loss": 0.8386, "step": 14042 }, { "epoch": 0.8385883195987102, "grad_norm": 1.8925021886825562, "learning_rate": 1.7941742419215714e-06, "loss": 0.8115, "step": 14043 }, { "epoch": 0.8386480353517258, "grad_norm": 1.872293472290039, "learning_rate": 1.7935107159445295e-06, "loss": 0.8095, "step": 14044 }, { "epoch": 0.8387077511047414, "grad_norm": 2.4803009033203125, "learning_rate": 1.7928471899674874e-06, "loss": 0.7673, "step": 14045 }, { "epoch": 0.838767466857757, "grad_norm": 2.133435010910034, "learning_rate": 1.7921836639904453e-06, "loss": 0.8059, "step": 14046 }, { "epoch": 0.8388271826107727, "grad_norm": 3.74509859085083, "learning_rate": 1.7915201380134034e-06, "loss": 0.8234, "step": 14047 }, { "epoch": 0.8388868983637884, "grad_norm": 2.3887808322906494, "learning_rate": 1.7908566120363612e-06, "loss": 0.7782, "step": 14048 }, { "epoch": 0.838946614116804, "grad_norm": 1.6756013631820679, "learning_rate": 1.7901930860593196e-06, "loss": 0.8306, "step": 14049 }, { "epoch": 0.8390063298698197, "grad_norm": 2.095221757888794, "learning_rate": 1.7895295600822774e-06, "loss": 0.8041, "step": 14050 }, { "epoch": 0.8390660456228353, "grad_norm": 2.6652777194976807, "learning_rate": 1.7888660341052353e-06, "loss": 0.7975, "step": 14051 }, { "epoch": 0.8391257613758509, "grad_norm": 7.798861503601074, "learning_rate": 1.7882025081281934e-06, "loss": 0.8228, "step": 14052 }, { "epoch": 0.8391854771288666, "grad_norm": 2.706559419631958, "learning_rate": 1.7875389821511513e-06, "loss": 0.8153, "step": 14053 }, { "epoch": 0.8392451928818823, "grad_norm": 2.3151400089263916, "learning_rate": 1.7868754561741094e-06, "loss": 0.824, "step": 14054 }, { "epoch": 0.8393049086348979, "grad_norm": 2.243826389312744, "learning_rate": 1.7862119301970673e-06, "loss": 0.8001, "step": 14055 }, { "epoch": 0.8393646243879135, "grad_norm": 1.545481562614441, "learning_rate": 1.7855484042200252e-06, "loss": 0.826, "step": 14056 }, { "epoch": 0.8394243401409291, "grad_norm": 4.69822359085083, "learning_rate": 1.7848848782429833e-06, "loss": 0.8447, "step": 14057 }, { "epoch": 0.8394840558939448, "grad_norm": 1.7325012683868408, "learning_rate": 1.7842213522659412e-06, "loss": 0.7867, "step": 14058 }, { "epoch": 0.8395437716469605, "grad_norm": 2.07888126373291, "learning_rate": 1.7835578262888995e-06, "loss": 0.8507, "step": 14059 }, { "epoch": 0.8396034873999761, "grad_norm": 1.8603557348251343, "learning_rate": 1.7828943003118574e-06, "loss": 0.8265, "step": 14060 }, { "epoch": 0.8396632031529918, "grad_norm": 3.0245604515075684, "learning_rate": 1.7822307743348153e-06, "loss": 0.8255, "step": 14061 }, { "epoch": 0.8397229189060074, "grad_norm": 1.8300930261611938, "learning_rate": 1.7815672483577734e-06, "loss": 0.805, "step": 14062 }, { "epoch": 0.839782634659023, "grad_norm": 1.6955658197402954, "learning_rate": 1.7809037223807313e-06, "loss": 0.8166, "step": 14063 }, { "epoch": 0.8398423504120387, "grad_norm": 2.246054172515869, "learning_rate": 1.7802401964036894e-06, "loss": 0.8251, "step": 14064 }, { "epoch": 0.8399020661650544, "grad_norm": 3.173377275466919, "learning_rate": 1.7795766704266473e-06, "loss": 0.8139, "step": 14065 }, { "epoch": 0.83996178191807, "grad_norm": 2.467402219772339, "learning_rate": 1.7789131444496051e-06, "loss": 0.796, "step": 14066 }, { "epoch": 0.8400214976710856, "grad_norm": 3.3453867435455322, "learning_rate": 1.7782496184725635e-06, "loss": 0.7811, "step": 14067 }, { "epoch": 0.8400812134241012, "grad_norm": 2.0054690837860107, "learning_rate": 1.7775860924955214e-06, "loss": 0.8102, "step": 14068 }, { "epoch": 0.8401409291771169, "grad_norm": 3.9024651050567627, "learning_rate": 1.7769225665184795e-06, "loss": 0.8509, "step": 14069 }, { "epoch": 0.8402006449301326, "grad_norm": 1.7823772430419922, "learning_rate": 1.7762590405414373e-06, "loss": 0.8115, "step": 14070 }, { "epoch": 0.8402603606831482, "grad_norm": 1.8521368503570557, "learning_rate": 1.7755955145643952e-06, "loss": 0.7969, "step": 14071 }, { "epoch": 0.8403200764361639, "grad_norm": 2.1364684104919434, "learning_rate": 1.7749319885873533e-06, "loss": 0.826, "step": 14072 }, { "epoch": 0.8403797921891795, "grad_norm": 2.097522258758545, "learning_rate": 1.7742684626103112e-06, "loss": 0.8482, "step": 14073 }, { "epoch": 0.8404395079421951, "grad_norm": 2.4530436992645264, "learning_rate": 1.7736049366332695e-06, "loss": 0.8299, "step": 14074 }, { "epoch": 0.8404992236952108, "grad_norm": 3.3235912322998047, "learning_rate": 1.7729414106562274e-06, "loss": 0.814, "step": 14075 }, { "epoch": 0.8405589394482265, "grad_norm": 2.6010141372680664, "learning_rate": 1.7722778846791853e-06, "loss": 0.8141, "step": 14076 }, { "epoch": 0.8406186552012421, "grad_norm": 2.0768256187438965, "learning_rate": 1.7716143587021434e-06, "loss": 0.829, "step": 14077 }, { "epoch": 0.8406783709542577, "grad_norm": 2.0480151176452637, "learning_rate": 1.7709508327251013e-06, "loss": 0.8402, "step": 14078 }, { "epoch": 0.8407380867072733, "grad_norm": 2.389482259750366, "learning_rate": 1.7702873067480594e-06, "loss": 0.7856, "step": 14079 }, { "epoch": 0.840797802460289, "grad_norm": 2.0932300090789795, "learning_rate": 1.7696237807710173e-06, "loss": 0.8397, "step": 14080 }, { "epoch": 0.8408575182133047, "grad_norm": 2.1952075958251953, "learning_rate": 1.7689602547939752e-06, "loss": 0.7751, "step": 14081 }, { "epoch": 0.8409172339663203, "grad_norm": 1.8131614923477173, "learning_rate": 1.7682967288169333e-06, "loss": 0.8302, "step": 14082 }, { "epoch": 0.840976949719336, "grad_norm": 2.301609754562378, "learning_rate": 1.7676332028398912e-06, "loss": 0.778, "step": 14083 }, { "epoch": 0.8410366654723516, "grad_norm": 2.4887478351593018, "learning_rate": 1.7669696768628495e-06, "loss": 0.806, "step": 14084 }, { "epoch": 0.8410963812253672, "grad_norm": 2.0220227241516113, "learning_rate": 1.7663061508858074e-06, "loss": 0.8226, "step": 14085 }, { "epoch": 0.8411560969783829, "grad_norm": 1.8866089582443237, "learning_rate": 1.7656426249087653e-06, "loss": 0.8199, "step": 14086 }, { "epoch": 0.8412158127313986, "grad_norm": 1.9719579219818115, "learning_rate": 1.7649790989317234e-06, "loss": 0.8037, "step": 14087 }, { "epoch": 0.8412755284844142, "grad_norm": 2.0951719284057617, "learning_rate": 1.7643155729546812e-06, "loss": 0.7948, "step": 14088 }, { "epoch": 0.8413352442374298, "grad_norm": 2.2134268283843994, "learning_rate": 1.7636520469776393e-06, "loss": 0.8352, "step": 14089 }, { "epoch": 0.8413949599904454, "grad_norm": 1.7818000316619873, "learning_rate": 1.7629885210005972e-06, "loss": 0.805, "step": 14090 }, { "epoch": 0.8414546757434611, "grad_norm": 1.9044111967086792, "learning_rate": 1.7623249950235551e-06, "loss": 0.7847, "step": 14091 }, { "epoch": 0.8415143914964768, "grad_norm": 4.504777431488037, "learning_rate": 1.7616614690465134e-06, "loss": 0.8112, "step": 14092 }, { "epoch": 0.8415741072494924, "grad_norm": 3.0478291511535645, "learning_rate": 1.7609979430694713e-06, "loss": 0.7947, "step": 14093 }, { "epoch": 0.8416338230025081, "grad_norm": 2.9961447715759277, "learning_rate": 1.7603344170924294e-06, "loss": 0.8473, "step": 14094 }, { "epoch": 0.8416935387555237, "grad_norm": 2.07814359664917, "learning_rate": 1.7596708911153873e-06, "loss": 0.817, "step": 14095 }, { "epoch": 0.8417532545085393, "grad_norm": 2.6390159130096436, "learning_rate": 1.7590073651383452e-06, "loss": 0.8777, "step": 14096 }, { "epoch": 0.841812970261555, "grad_norm": 1.9468481540679932, "learning_rate": 1.7583438391613033e-06, "loss": 0.8198, "step": 14097 }, { "epoch": 0.8418726860145707, "grad_norm": 2.2799816131591797, "learning_rate": 1.7576803131842612e-06, "loss": 0.8202, "step": 14098 }, { "epoch": 0.8419324017675863, "grad_norm": 2.2147202491760254, "learning_rate": 1.7570167872072195e-06, "loss": 0.7842, "step": 14099 }, { "epoch": 0.8419921175206019, "grad_norm": 2.0861313343048096, "learning_rate": 1.7563532612301774e-06, "loss": 0.7917, "step": 14100 }, { "epoch": 0.8420518332736175, "grad_norm": 1.999927282333374, "learning_rate": 1.7556897352531353e-06, "loss": 0.8049, "step": 14101 }, { "epoch": 0.8421115490266332, "grad_norm": 2.455713987350464, "learning_rate": 1.7550262092760934e-06, "loss": 0.8308, "step": 14102 }, { "epoch": 0.8421712647796489, "grad_norm": 2.928654432296753, "learning_rate": 1.7543626832990513e-06, "loss": 0.8542, "step": 14103 }, { "epoch": 0.8422309805326645, "grad_norm": 2.782198429107666, "learning_rate": 1.7536991573220094e-06, "loss": 0.7891, "step": 14104 }, { "epoch": 0.8422906962856802, "grad_norm": 2.81506085395813, "learning_rate": 1.7530356313449673e-06, "loss": 0.8214, "step": 14105 }, { "epoch": 0.8423504120386958, "grad_norm": 2.142669439315796, "learning_rate": 1.7523721053679251e-06, "loss": 0.8119, "step": 14106 }, { "epoch": 0.8424101277917114, "grad_norm": 1.8367563486099243, "learning_rate": 1.7517085793908832e-06, "loss": 0.8466, "step": 14107 }, { "epoch": 0.8424698435447271, "grad_norm": 2.059197187423706, "learning_rate": 1.7510450534138411e-06, "loss": 0.8454, "step": 14108 }, { "epoch": 0.8425295592977428, "grad_norm": 1.994812250137329, "learning_rate": 1.7503815274367994e-06, "loss": 0.7881, "step": 14109 }, { "epoch": 0.8425892750507584, "grad_norm": 4.789581775665283, "learning_rate": 1.7497180014597573e-06, "loss": 0.8465, "step": 14110 }, { "epoch": 0.8426489908037741, "grad_norm": 2.1759603023529053, "learning_rate": 1.7490544754827152e-06, "loss": 0.8251, "step": 14111 }, { "epoch": 0.8427087065567896, "grad_norm": 2.7200605869293213, "learning_rate": 1.7483909495056733e-06, "loss": 0.8385, "step": 14112 }, { "epoch": 0.8427684223098053, "grad_norm": 1.7254689931869507, "learning_rate": 1.7477274235286312e-06, "loss": 0.8025, "step": 14113 }, { "epoch": 0.842828138062821, "grad_norm": 2.1640188694000244, "learning_rate": 1.7470638975515893e-06, "loss": 0.7957, "step": 14114 }, { "epoch": 0.8428878538158366, "grad_norm": 2.905435562133789, "learning_rate": 1.7464003715745472e-06, "loss": 0.7737, "step": 14115 }, { "epoch": 0.8429475695688523, "grad_norm": 2.6295769214630127, "learning_rate": 1.745736845597505e-06, "loss": 0.805, "step": 14116 }, { "epoch": 0.8430072853218679, "grad_norm": 3.5131618976593018, "learning_rate": 1.7450733196204634e-06, "loss": 0.8215, "step": 14117 }, { "epoch": 0.8430670010748835, "grad_norm": 2.3378615379333496, "learning_rate": 1.7444097936434213e-06, "loss": 0.8052, "step": 14118 }, { "epoch": 0.8431267168278992, "grad_norm": 1.6699789762496948, "learning_rate": 1.7437462676663794e-06, "loss": 0.8162, "step": 14119 }, { "epoch": 0.8431864325809149, "grad_norm": 1.973160743713379, "learning_rate": 1.7430827416893373e-06, "loss": 0.8416, "step": 14120 }, { "epoch": 0.8432461483339305, "grad_norm": 2.779359817504883, "learning_rate": 1.7424192157122952e-06, "loss": 0.8443, "step": 14121 }, { "epoch": 0.8433058640869462, "grad_norm": 2.303096055984497, "learning_rate": 1.7417556897352533e-06, "loss": 0.8462, "step": 14122 }, { "epoch": 0.8433655798399617, "grad_norm": 3.498778820037842, "learning_rate": 1.7410921637582112e-06, "loss": 0.8133, "step": 14123 }, { "epoch": 0.8434252955929774, "grad_norm": 3.0517423152923584, "learning_rate": 1.7404286377811695e-06, "loss": 0.8524, "step": 14124 }, { "epoch": 0.8434850113459931, "grad_norm": 2.358057737350464, "learning_rate": 1.7397651118041274e-06, "loss": 0.8325, "step": 14125 }, { "epoch": 0.8435447270990087, "grad_norm": 2.808734178543091, "learning_rate": 1.7391015858270852e-06, "loss": 0.8174, "step": 14126 }, { "epoch": 0.8436044428520244, "grad_norm": 2.211583375930786, "learning_rate": 1.7384380598500433e-06, "loss": 0.7957, "step": 14127 }, { "epoch": 0.84366415860504, "grad_norm": 3.6275084018707275, "learning_rate": 1.7377745338730012e-06, "loss": 0.8179, "step": 14128 }, { "epoch": 0.8437238743580556, "grad_norm": 2.1906113624572754, "learning_rate": 1.7371110078959593e-06, "loss": 0.8329, "step": 14129 }, { "epoch": 0.8437835901110713, "grad_norm": 3.511084794998169, "learning_rate": 1.7364474819189172e-06, "loss": 0.804, "step": 14130 }, { "epoch": 0.843843305864087, "grad_norm": 4.943333148956299, "learning_rate": 1.7357839559418751e-06, "loss": 0.789, "step": 14131 }, { "epoch": 0.8439030216171026, "grad_norm": 2.2914347648620605, "learning_rate": 1.7351204299648332e-06, "loss": 0.8016, "step": 14132 }, { "epoch": 0.8439627373701183, "grad_norm": 1.5994672775268555, "learning_rate": 1.734456903987791e-06, "loss": 0.8299, "step": 14133 }, { "epoch": 0.8440224531231338, "grad_norm": 1.6921007633209229, "learning_rate": 1.7337933780107494e-06, "loss": 0.7907, "step": 14134 }, { "epoch": 0.8440821688761495, "grad_norm": 1.6381996870040894, "learning_rate": 1.7331298520337073e-06, "loss": 0.8174, "step": 14135 }, { "epoch": 0.8441418846291652, "grad_norm": 1.7864632606506348, "learning_rate": 1.7324663260566652e-06, "loss": 0.8143, "step": 14136 }, { "epoch": 0.8442016003821808, "grad_norm": 2.7460620403289795, "learning_rate": 1.7318028000796233e-06, "loss": 0.7936, "step": 14137 }, { "epoch": 0.8442613161351965, "grad_norm": 1.9840196371078491, "learning_rate": 1.7311392741025812e-06, "loss": 0.7905, "step": 14138 }, { "epoch": 0.8443210318882121, "grad_norm": 2.2644565105438232, "learning_rate": 1.7304757481255393e-06, "loss": 0.8382, "step": 14139 }, { "epoch": 0.8443807476412277, "grad_norm": 4.084012031555176, "learning_rate": 1.7298122221484972e-06, "loss": 0.8206, "step": 14140 }, { "epoch": 0.8444404633942434, "grad_norm": 2.3294901847839355, "learning_rate": 1.729148696171455e-06, "loss": 0.7987, "step": 14141 }, { "epoch": 0.8445001791472591, "grad_norm": 2.2659552097320557, "learning_rate": 1.7284851701944134e-06, "loss": 0.8298, "step": 14142 }, { "epoch": 0.8445598949002747, "grad_norm": 2.5898783206939697, "learning_rate": 1.7278216442173713e-06, "loss": 0.8111, "step": 14143 }, { "epoch": 0.8446196106532904, "grad_norm": 3.352374315261841, "learning_rate": 1.7271581182403294e-06, "loss": 0.8348, "step": 14144 }, { "epoch": 0.8446793264063059, "grad_norm": 2.1697020530700684, "learning_rate": 1.7264945922632873e-06, "loss": 0.798, "step": 14145 }, { "epoch": 0.8447390421593216, "grad_norm": 2.8152811527252197, "learning_rate": 1.7258310662862451e-06, "loss": 0.8042, "step": 14146 }, { "epoch": 0.8447987579123373, "grad_norm": 2.1657614707946777, "learning_rate": 1.7251675403092032e-06, "loss": 0.8243, "step": 14147 }, { "epoch": 0.8448584736653529, "grad_norm": 2.0765016078948975, "learning_rate": 1.7245040143321611e-06, "loss": 0.8106, "step": 14148 }, { "epoch": 0.8449181894183686, "grad_norm": 2.116262197494507, "learning_rate": 1.7238404883551194e-06, "loss": 0.7836, "step": 14149 }, { "epoch": 0.8449779051713842, "grad_norm": 2.58406400680542, "learning_rate": 1.7231769623780773e-06, "loss": 0.8304, "step": 14150 }, { "epoch": 0.8450376209243998, "grad_norm": 1.9955309629440308, "learning_rate": 1.7225134364010352e-06, "loss": 0.822, "step": 14151 }, { "epoch": 0.8450973366774155, "grad_norm": 1.7376585006713867, "learning_rate": 1.7218499104239933e-06, "loss": 0.8161, "step": 14152 }, { "epoch": 0.8451570524304312, "grad_norm": 2.2340734004974365, "learning_rate": 1.7211863844469512e-06, "loss": 0.8238, "step": 14153 }, { "epoch": 0.8452167681834468, "grad_norm": 1.9081398248672485, "learning_rate": 1.7205228584699093e-06, "loss": 0.8274, "step": 14154 }, { "epoch": 0.8452764839364625, "grad_norm": 4.303963661193848, "learning_rate": 1.7198593324928672e-06, "loss": 0.8133, "step": 14155 }, { "epoch": 0.845336199689478, "grad_norm": 2.1375794410705566, "learning_rate": 1.719195806515825e-06, "loss": 0.7847, "step": 14156 }, { "epoch": 0.8453959154424937, "grad_norm": 1.7931089401245117, "learning_rate": 1.7185322805387832e-06, "loss": 0.7998, "step": 14157 }, { "epoch": 0.8454556311955094, "grad_norm": 1.9941951036453247, "learning_rate": 1.717868754561741e-06, "loss": 0.8356, "step": 14158 }, { "epoch": 0.845515346948525, "grad_norm": 2.28806734085083, "learning_rate": 1.7172052285846994e-06, "loss": 0.8195, "step": 14159 }, { "epoch": 0.8455750627015407, "grad_norm": 1.5141088962554932, "learning_rate": 1.7165417026076573e-06, "loss": 0.8021, "step": 14160 }, { "epoch": 0.8456347784545563, "grad_norm": 2.1065304279327393, "learning_rate": 1.7158781766306152e-06, "loss": 0.7874, "step": 14161 }, { "epoch": 0.8456944942075719, "grad_norm": 2.623246908187866, "learning_rate": 1.7152146506535733e-06, "loss": 0.8314, "step": 14162 }, { "epoch": 0.8457542099605876, "grad_norm": 1.77444589138031, "learning_rate": 1.7145511246765312e-06, "loss": 0.8055, "step": 14163 }, { "epoch": 0.8458139257136033, "grad_norm": 2.597217321395874, "learning_rate": 1.7138875986994893e-06, "loss": 0.8145, "step": 14164 }, { "epoch": 0.8458736414666189, "grad_norm": 2.3513894081115723, "learning_rate": 1.7132240727224471e-06, "loss": 0.8315, "step": 14165 }, { "epoch": 0.8459333572196346, "grad_norm": 2.116917371749878, "learning_rate": 1.712560546745405e-06, "loss": 0.8444, "step": 14166 }, { "epoch": 0.8459930729726501, "grad_norm": 2.2913689613342285, "learning_rate": 1.7118970207683633e-06, "loss": 0.7939, "step": 14167 }, { "epoch": 0.8460527887256658, "grad_norm": 1.9279371500015259, "learning_rate": 1.7112334947913212e-06, "loss": 0.8426, "step": 14168 }, { "epoch": 0.8461125044786815, "grad_norm": 1.7095140218734741, "learning_rate": 1.7105699688142793e-06, "loss": 0.8131, "step": 14169 }, { "epoch": 0.8461722202316971, "grad_norm": 6.858376502990723, "learning_rate": 1.7099064428372372e-06, "loss": 0.8135, "step": 14170 }, { "epoch": 0.8462319359847128, "grad_norm": 1.890575885772705, "learning_rate": 1.7092429168601951e-06, "loss": 0.8194, "step": 14171 }, { "epoch": 0.8462916517377285, "grad_norm": 2.996210813522339, "learning_rate": 1.7085793908831532e-06, "loss": 0.8585, "step": 14172 }, { "epoch": 0.846351367490744, "grad_norm": 3.223313093185425, "learning_rate": 1.707915864906111e-06, "loss": 0.8045, "step": 14173 }, { "epoch": 0.8464110832437597, "grad_norm": 2.1783194541931152, "learning_rate": 1.7072523389290694e-06, "loss": 0.8013, "step": 14174 }, { "epoch": 0.8464707989967754, "grad_norm": 2.111961603164673, "learning_rate": 1.7065888129520273e-06, "loss": 0.8099, "step": 14175 }, { "epoch": 0.846530514749791, "grad_norm": 2.454772472381592, "learning_rate": 1.7059252869749852e-06, "loss": 0.858, "step": 14176 }, { "epoch": 0.8465902305028067, "grad_norm": 2.531322956085205, "learning_rate": 1.7052617609979433e-06, "loss": 0.8266, "step": 14177 }, { "epoch": 0.8466499462558222, "grad_norm": 1.9498639106750488, "learning_rate": 1.7045982350209012e-06, "loss": 0.7658, "step": 14178 }, { "epoch": 0.8467096620088379, "grad_norm": 2.0976860523223877, "learning_rate": 1.7039347090438593e-06, "loss": 0.835, "step": 14179 }, { "epoch": 0.8467693777618536, "grad_norm": 1.9900810718536377, "learning_rate": 1.7032711830668172e-06, "loss": 0.8475, "step": 14180 }, { "epoch": 0.8468290935148692, "grad_norm": 2.0718636512756348, "learning_rate": 1.702607657089775e-06, "loss": 0.7979, "step": 14181 }, { "epoch": 0.8468888092678849, "grad_norm": 3.1608824729919434, "learning_rate": 1.7019441311127334e-06, "loss": 0.7776, "step": 14182 }, { "epoch": 0.8469485250209006, "grad_norm": 1.9699195623397827, "learning_rate": 1.7012806051356913e-06, "loss": 0.8426, "step": 14183 }, { "epoch": 0.8470082407739161, "grad_norm": 2.6244661808013916, "learning_rate": 1.7006170791586494e-06, "loss": 0.8213, "step": 14184 }, { "epoch": 0.8470679565269318, "grad_norm": 3.1187968254089355, "learning_rate": 1.6999535531816072e-06, "loss": 0.8139, "step": 14185 }, { "epoch": 0.8471276722799475, "grad_norm": 4.285581588745117, "learning_rate": 1.6992900272045651e-06, "loss": 0.835, "step": 14186 }, { "epoch": 0.8471873880329631, "grad_norm": 2.804082155227661, "learning_rate": 1.6986265012275232e-06, "loss": 0.8359, "step": 14187 }, { "epoch": 0.8472471037859788, "grad_norm": 3.0532331466674805, "learning_rate": 1.6979629752504811e-06, "loss": 0.8326, "step": 14188 }, { "epoch": 0.8473068195389943, "grad_norm": 1.9328645467758179, "learning_rate": 1.6972994492734392e-06, "loss": 0.8229, "step": 14189 }, { "epoch": 0.84736653529201, "grad_norm": 2.775653839111328, "learning_rate": 1.6966359232963971e-06, "loss": 0.82, "step": 14190 }, { "epoch": 0.8474262510450257, "grad_norm": 1.9264535903930664, "learning_rate": 1.695972397319355e-06, "loss": 0.7964, "step": 14191 }, { "epoch": 0.8474859667980413, "grad_norm": 3.3557138442993164, "learning_rate": 1.6953088713423133e-06, "loss": 0.852, "step": 14192 }, { "epoch": 0.847545682551057, "grad_norm": 2.2657523155212402, "learning_rate": 1.6946453453652712e-06, "loss": 0.8154, "step": 14193 }, { "epoch": 0.8476053983040727, "grad_norm": 2.3318216800689697, "learning_rate": 1.6939818193882293e-06, "loss": 0.8539, "step": 14194 }, { "epoch": 0.8476651140570882, "grad_norm": 2.6660051345825195, "learning_rate": 1.6933182934111872e-06, "loss": 0.836, "step": 14195 }, { "epoch": 0.8477248298101039, "grad_norm": 2.160863161087036, "learning_rate": 1.692654767434145e-06, "loss": 0.8379, "step": 14196 }, { "epoch": 0.8477845455631196, "grad_norm": 2.395005464553833, "learning_rate": 1.6919912414571032e-06, "loss": 0.8331, "step": 14197 }, { "epoch": 0.8478442613161352, "grad_norm": 2.7299587726593018, "learning_rate": 1.691327715480061e-06, "loss": 0.8253, "step": 14198 }, { "epoch": 0.8479039770691509, "grad_norm": 2.4269204139709473, "learning_rate": 1.6906641895030194e-06, "loss": 0.7943, "step": 14199 }, { "epoch": 0.8479636928221664, "grad_norm": 1.9685088396072388, "learning_rate": 1.6900006635259773e-06, "loss": 0.8419, "step": 14200 }, { "epoch": 0.8479636928221664, "eval_text_loss": 0.888041615486145, "eval_text_runtime": 15.1711, "eval_text_samples_per_second": 263.66, "eval_text_steps_per_second": 0.527, "step": 14200 }, { "epoch": 0.8479636928221664, "eval_image_loss": 0.5896385312080383, "eval_image_runtime": 4.9932, "eval_image_samples_per_second": 801.088, "eval_image_steps_per_second": 1.602, "step": 14200 }, { "epoch": 0.8479636928221664, "eval_video_loss": 1.0165584087371826, "eval_video_runtime": 78.0747, "eval_video_samples_per_second": 51.233, "eval_video_steps_per_second": 0.102, "step": 14200 }, { "epoch": 0.8480234085751821, "grad_norm": 3.338207483291626, "learning_rate": 1.6893371375489352e-06, "loss": 0.8012, "step": 14201 }, { "epoch": 0.8480831243281978, "grad_norm": 3.373598098754883, "learning_rate": 1.6886736115718933e-06, "loss": 0.8024, "step": 14202 }, { "epoch": 0.8481428400812134, "grad_norm": 2.3147172927856445, "learning_rate": 1.6880100855948512e-06, "loss": 0.8477, "step": 14203 }, { "epoch": 0.8482025558342291, "grad_norm": 2.1042697429656982, "learning_rate": 1.6873465596178093e-06, "loss": 0.806, "step": 14204 }, { "epoch": 0.8482622715872448, "grad_norm": 1.637595534324646, "learning_rate": 1.6866830336407671e-06, "loss": 0.8018, "step": 14205 }, { "epoch": 0.8483219873402603, "grad_norm": 2.410649299621582, "learning_rate": 1.686019507663725e-06, "loss": 0.817, "step": 14206 }, { "epoch": 0.848381703093276, "grad_norm": 1.9560253620147705, "learning_rate": 1.6853559816866833e-06, "loss": 0.8113, "step": 14207 }, { "epoch": 0.8484414188462917, "grad_norm": 2.2827165126800537, "learning_rate": 1.6846924557096412e-06, "loss": 0.7996, "step": 14208 }, { "epoch": 0.8485011345993073, "grad_norm": 3.107604742050171, "learning_rate": 1.6840289297325993e-06, "loss": 0.8299, "step": 14209 }, { "epoch": 0.848560850352323, "grad_norm": 2.3575146198272705, "learning_rate": 1.6833654037555572e-06, "loss": 0.7912, "step": 14210 }, { "epoch": 0.8486205661053385, "grad_norm": 2.3021345138549805, "learning_rate": 1.6827018777785151e-06, "loss": 0.8431, "step": 14211 }, { "epoch": 0.8486802818583542, "grad_norm": 2.4559848308563232, "learning_rate": 1.6820383518014732e-06, "loss": 0.8246, "step": 14212 }, { "epoch": 0.8487399976113699, "grad_norm": 3.2540595531463623, "learning_rate": 1.681374825824431e-06, "loss": 0.8256, "step": 14213 }, { "epoch": 0.8487997133643855, "grad_norm": 1.7395256757736206, "learning_rate": 1.6807112998473892e-06, "loss": 0.8008, "step": 14214 }, { "epoch": 0.8488594291174012, "grad_norm": 1.7030839920043945, "learning_rate": 1.680047773870347e-06, "loss": 0.8211, "step": 14215 }, { "epoch": 0.8489191448704169, "grad_norm": 2.4081077575683594, "learning_rate": 1.679384247893305e-06, "loss": 0.8528, "step": 14216 }, { "epoch": 0.8489788606234324, "grad_norm": 1.6715240478515625, "learning_rate": 1.6787207219162633e-06, "loss": 0.794, "step": 14217 }, { "epoch": 0.8490385763764481, "grad_norm": 2.3212502002716064, "learning_rate": 1.6780571959392212e-06, "loss": 0.8225, "step": 14218 }, { "epoch": 0.8490982921294637, "grad_norm": 2.4397175312042236, "learning_rate": 1.6773936699621793e-06, "loss": 0.7796, "step": 14219 }, { "epoch": 0.8491580078824794, "grad_norm": 3.436927556991577, "learning_rate": 1.6767301439851372e-06, "loss": 0.8244, "step": 14220 }, { "epoch": 0.8492177236354951, "grad_norm": 2.6742591857910156, "learning_rate": 1.676066618008095e-06, "loss": 0.8136, "step": 14221 }, { "epoch": 0.8492774393885106, "grad_norm": 2.701831817626953, "learning_rate": 1.6754030920310532e-06, "loss": 0.8186, "step": 14222 }, { "epoch": 0.8493371551415263, "grad_norm": 2.7999749183654785, "learning_rate": 1.674739566054011e-06, "loss": 0.7847, "step": 14223 }, { "epoch": 0.849396870894542, "grad_norm": 1.9486314058303833, "learning_rate": 1.6740760400769694e-06, "loss": 0.7846, "step": 14224 }, { "epoch": 0.8494565866475576, "grad_norm": 2.9088103771209717, "learning_rate": 1.6734125140999272e-06, "loss": 0.8135, "step": 14225 }, { "epoch": 0.8495163024005733, "grad_norm": 2.0003159046173096, "learning_rate": 1.6727489881228851e-06, "loss": 0.8179, "step": 14226 }, { "epoch": 0.849576018153589, "grad_norm": 2.152369737625122, "learning_rate": 1.6720854621458432e-06, "loss": 0.7969, "step": 14227 }, { "epoch": 0.8496357339066045, "grad_norm": 2.257014751434326, "learning_rate": 1.6714219361688011e-06, "loss": 0.7852, "step": 14228 }, { "epoch": 0.8496954496596202, "grad_norm": 2.1701271533966064, "learning_rate": 1.6707584101917592e-06, "loss": 0.7994, "step": 14229 }, { "epoch": 0.8497551654126358, "grad_norm": 1.6363493204116821, "learning_rate": 1.6700948842147171e-06, "loss": 0.8455, "step": 14230 }, { "epoch": 0.8498148811656515, "grad_norm": 1.7565221786499023, "learning_rate": 1.669431358237675e-06, "loss": 0.8185, "step": 14231 }, { "epoch": 0.8498745969186672, "grad_norm": 1.824684739112854, "learning_rate": 1.6687678322606333e-06, "loss": 0.8025, "step": 14232 }, { "epoch": 0.8499343126716827, "grad_norm": 2.81318736076355, "learning_rate": 1.6681043062835912e-06, "loss": 0.8126, "step": 14233 }, { "epoch": 0.8499940284246984, "grad_norm": 3.593670606613159, "learning_rate": 1.6674407803065493e-06, "loss": 0.8237, "step": 14234 }, { "epoch": 0.8500537441777141, "grad_norm": 5.407565116882324, "learning_rate": 1.6667772543295072e-06, "loss": 0.7984, "step": 14235 }, { "epoch": 0.8501134599307297, "grad_norm": 1.8574575185775757, "learning_rate": 1.666113728352465e-06, "loss": 0.8194, "step": 14236 }, { "epoch": 0.8501731756837454, "grad_norm": 2.395256280899048, "learning_rate": 1.6654502023754232e-06, "loss": 0.7885, "step": 14237 }, { "epoch": 0.8502328914367611, "grad_norm": 2.160106658935547, "learning_rate": 1.664786676398381e-06, "loss": 0.8411, "step": 14238 }, { "epoch": 0.8502926071897766, "grad_norm": 2.6056759357452393, "learning_rate": 1.6641231504213392e-06, "loss": 0.828, "step": 14239 }, { "epoch": 0.8503523229427923, "grad_norm": 4.0140156745910645, "learning_rate": 1.663459624444297e-06, "loss": 0.8134, "step": 14240 }, { "epoch": 0.850412038695808, "grad_norm": 3.3659579753875732, "learning_rate": 1.662796098467255e-06, "loss": 0.8061, "step": 14241 }, { "epoch": 0.8504717544488236, "grad_norm": 8.91827392578125, "learning_rate": 1.6621325724902133e-06, "loss": 0.834, "step": 14242 }, { "epoch": 0.8505314702018393, "grad_norm": 2.084944725036621, "learning_rate": 1.6614690465131711e-06, "loss": 0.8713, "step": 14243 }, { "epoch": 0.850591185954855, "grad_norm": 1.8324511051177979, "learning_rate": 1.6608055205361292e-06, "loss": 0.8019, "step": 14244 }, { "epoch": 0.8506509017078705, "grad_norm": 2.647627592086792, "learning_rate": 1.6601419945590871e-06, "loss": 0.7991, "step": 14245 }, { "epoch": 0.8507106174608862, "grad_norm": 3.443519115447998, "learning_rate": 1.659478468582045e-06, "loss": 0.7972, "step": 14246 }, { "epoch": 0.8507703332139018, "grad_norm": 4.29343843460083, "learning_rate": 1.6588149426050031e-06, "loss": 0.8063, "step": 14247 }, { "epoch": 0.8508300489669175, "grad_norm": 4.660404205322266, "learning_rate": 1.658151416627961e-06, "loss": 0.838, "step": 14248 }, { "epoch": 0.8508897647199332, "grad_norm": 1.5328174829483032, "learning_rate": 1.6574878906509193e-06, "loss": 0.831, "step": 14249 }, { "epoch": 0.8509494804729487, "grad_norm": 1.9141141176223755, "learning_rate": 1.6568243646738772e-06, "loss": 0.8172, "step": 14250 }, { "epoch": 0.8510091962259644, "grad_norm": 2.2700657844543457, "learning_rate": 1.656160838696835e-06, "loss": 0.8223, "step": 14251 }, { "epoch": 0.85106891197898, "grad_norm": 2.7979681491851807, "learning_rate": 1.6554973127197932e-06, "loss": 0.8049, "step": 14252 }, { "epoch": 0.8511286277319957, "grad_norm": 1.7350671291351318, "learning_rate": 1.654833786742751e-06, "loss": 0.7858, "step": 14253 }, { "epoch": 0.8511883434850114, "grad_norm": 3.3152823448181152, "learning_rate": 1.6541702607657092e-06, "loss": 0.8285, "step": 14254 }, { "epoch": 0.851248059238027, "grad_norm": 2.0987327098846436, "learning_rate": 1.653506734788667e-06, "loss": 0.7961, "step": 14255 }, { "epoch": 0.8513077749910426, "grad_norm": 2.6994168758392334, "learning_rate": 1.652843208811625e-06, "loss": 0.787, "step": 14256 }, { "epoch": 0.8513674907440583, "grad_norm": 1.8801528215408325, "learning_rate": 1.6521796828345833e-06, "loss": 0.8004, "step": 14257 }, { "epoch": 0.8514272064970739, "grad_norm": 6.6272101402282715, "learning_rate": 1.6515161568575412e-06, "loss": 0.8377, "step": 14258 }, { "epoch": 0.8514869222500896, "grad_norm": 2.458827257156372, "learning_rate": 1.6508526308804993e-06, "loss": 0.8451, "step": 14259 }, { "epoch": 0.8515466380031053, "grad_norm": 1.741098165512085, "learning_rate": 1.6501891049034572e-06, "loss": 0.8304, "step": 14260 }, { "epoch": 0.8516063537561208, "grad_norm": 1.7948545217514038, "learning_rate": 1.649525578926415e-06, "loss": 0.8374, "step": 14261 }, { "epoch": 0.8516660695091365, "grad_norm": 7.731895923614502, "learning_rate": 1.6488620529493732e-06, "loss": 0.8486, "step": 14262 }, { "epoch": 0.8517257852621521, "grad_norm": 1.6904629468917847, "learning_rate": 1.648198526972331e-06, "loss": 0.8113, "step": 14263 }, { "epoch": 0.8517855010151678, "grad_norm": 2.0859248638153076, "learning_rate": 1.6475350009952891e-06, "loss": 0.7875, "step": 14264 }, { "epoch": 0.8518452167681835, "grad_norm": 1.9071344137191772, "learning_rate": 1.646871475018247e-06, "loss": 0.8058, "step": 14265 }, { "epoch": 0.8519049325211991, "grad_norm": 2.6406707763671875, "learning_rate": 1.646207949041205e-06, "loss": 0.7844, "step": 14266 }, { "epoch": 0.8519646482742147, "grad_norm": 1.8736013174057007, "learning_rate": 1.6455444230641632e-06, "loss": 0.8106, "step": 14267 }, { "epoch": 0.8520243640272304, "grad_norm": 3.383326768875122, "learning_rate": 1.6448808970871211e-06, "loss": 0.8378, "step": 14268 }, { "epoch": 0.852084079780246, "grad_norm": 1.8443349599838257, "learning_rate": 1.6442173711100792e-06, "loss": 0.7942, "step": 14269 }, { "epoch": 0.8521437955332617, "grad_norm": 2.5001180171966553, "learning_rate": 1.6435538451330371e-06, "loss": 0.8092, "step": 14270 }, { "epoch": 0.8522035112862774, "grad_norm": 1.9114561080932617, "learning_rate": 1.642890319155995e-06, "loss": 0.7929, "step": 14271 }, { "epoch": 0.8522632270392929, "grad_norm": 2.2248551845550537, "learning_rate": 1.642226793178953e-06, "loss": 0.8081, "step": 14272 }, { "epoch": 0.8523229427923086, "grad_norm": 2.3591599464416504, "learning_rate": 1.641563267201911e-06, "loss": 0.8048, "step": 14273 }, { "epoch": 0.8523826585453242, "grad_norm": 2.615402936935425, "learning_rate": 1.6408997412248693e-06, "loss": 0.8159, "step": 14274 }, { "epoch": 0.8524423742983399, "grad_norm": 1.9666916131973267, "learning_rate": 1.6402362152478272e-06, "loss": 0.8503, "step": 14275 }, { "epoch": 0.8525020900513556, "grad_norm": 2.018085241317749, "learning_rate": 1.639572689270785e-06, "loss": 0.797, "step": 14276 }, { "epoch": 0.8525618058043712, "grad_norm": 2.1921942234039307, "learning_rate": 1.6389091632937432e-06, "loss": 0.8349, "step": 14277 }, { "epoch": 0.8526215215573868, "grad_norm": 2.006443738937378, "learning_rate": 1.638245637316701e-06, "loss": 0.7681, "step": 14278 }, { "epoch": 0.8526812373104025, "grad_norm": 1.82640540599823, "learning_rate": 1.6375821113396592e-06, "loss": 0.8503, "step": 14279 }, { "epoch": 0.8527409530634181, "grad_norm": 2.6485490798950195, "learning_rate": 1.636918585362617e-06, "loss": 0.816, "step": 14280 }, { "epoch": 0.8528006688164338, "grad_norm": 2.579855442047119, "learning_rate": 1.636255059385575e-06, "loss": 0.8396, "step": 14281 }, { "epoch": 0.8528603845694495, "grad_norm": 2.7904839515686035, "learning_rate": 1.6355915334085333e-06, "loss": 0.7918, "step": 14282 }, { "epoch": 0.852920100322465, "grad_norm": 1.511965036392212, "learning_rate": 1.6349280074314911e-06, "loss": 0.8124, "step": 14283 }, { "epoch": 0.8529798160754807, "grad_norm": 1.6562916040420532, "learning_rate": 1.634264481454449e-06, "loss": 0.8332, "step": 14284 }, { "epoch": 0.8530395318284963, "grad_norm": 2.052673578262329, "learning_rate": 1.6336009554774071e-06, "loss": 0.8249, "step": 14285 }, { "epoch": 0.853099247581512, "grad_norm": 2.4684627056121826, "learning_rate": 1.632937429500365e-06, "loss": 0.8368, "step": 14286 }, { "epoch": 0.8531589633345277, "grad_norm": 2.3959598541259766, "learning_rate": 1.6322739035233231e-06, "loss": 0.8312, "step": 14287 }, { "epoch": 0.8532186790875433, "grad_norm": 2.2655627727508545, "learning_rate": 1.631610377546281e-06, "loss": 0.7854, "step": 14288 }, { "epoch": 0.8532783948405589, "grad_norm": 3.0574333667755127, "learning_rate": 1.630946851569239e-06, "loss": 0.8142, "step": 14289 }, { "epoch": 0.8533381105935746, "grad_norm": 2.617605686187744, "learning_rate": 1.630283325592197e-06, "loss": 0.8143, "step": 14290 }, { "epoch": 0.8533978263465902, "grad_norm": 2.2221567630767822, "learning_rate": 1.6296197996151549e-06, "loss": 0.8139, "step": 14291 }, { "epoch": 0.8534575420996059, "grad_norm": 1.8746901750564575, "learning_rate": 1.6289562736381132e-06, "loss": 0.8389, "step": 14292 }, { "epoch": 0.8535172578526216, "grad_norm": 2.742280960083008, "learning_rate": 1.628292747661071e-06, "loss": 0.807, "step": 14293 }, { "epoch": 0.8535769736056371, "grad_norm": 2.0016698837280273, "learning_rate": 1.627629221684029e-06, "loss": 0.817, "step": 14294 }, { "epoch": 0.8536366893586528, "grad_norm": 2.1058382987976074, "learning_rate": 1.626965695706987e-06, "loss": 0.8163, "step": 14295 }, { "epoch": 0.8536964051116684, "grad_norm": 2.3248753547668457, "learning_rate": 1.626302169729945e-06, "loss": 0.8104, "step": 14296 }, { "epoch": 0.8537561208646841, "grad_norm": 2.5367326736450195, "learning_rate": 1.625638643752903e-06, "loss": 0.8219, "step": 14297 }, { "epoch": 0.8538158366176998, "grad_norm": 2.8648643493652344, "learning_rate": 1.624975117775861e-06, "loss": 0.8489, "step": 14298 }, { "epoch": 0.8538755523707154, "grad_norm": 1.9555861949920654, "learning_rate": 1.6243115917988188e-06, "loss": 0.7947, "step": 14299 }, { "epoch": 0.853935268123731, "grad_norm": 2.039116382598877, "learning_rate": 1.6236480658217772e-06, "loss": 0.8259, "step": 14300 }, { "epoch": 0.8539949838767467, "grad_norm": 2.1252903938293457, "learning_rate": 1.622984539844735e-06, "loss": 0.8318, "step": 14301 }, { "epoch": 0.8540546996297623, "grad_norm": 1.9077098369598389, "learning_rate": 1.6223210138676931e-06, "loss": 0.8001, "step": 14302 }, { "epoch": 0.854114415382778, "grad_norm": 1.8060358762741089, "learning_rate": 1.621657487890651e-06, "loss": 0.803, "step": 14303 }, { "epoch": 0.8541741311357937, "grad_norm": 1.6809051036834717, "learning_rate": 1.620993961913609e-06, "loss": 0.8121, "step": 14304 }, { "epoch": 0.8542338468888093, "grad_norm": 3.0445444583892822, "learning_rate": 1.620330435936567e-06, "loss": 0.8508, "step": 14305 }, { "epoch": 0.8542935626418249, "grad_norm": 1.4798736572265625, "learning_rate": 1.619666909959525e-06, "loss": 0.8086, "step": 14306 }, { "epoch": 0.8543532783948405, "grad_norm": 4.4481987953186035, "learning_rate": 1.6190033839824832e-06, "loss": 0.8311, "step": 14307 }, { "epoch": 0.8544129941478562, "grad_norm": 2.154679298400879, "learning_rate": 1.6183398580054411e-06, "loss": 0.809, "step": 14308 }, { "epoch": 0.8544727099008719, "grad_norm": 2.4089925289154053, "learning_rate": 1.617676332028399e-06, "loss": 0.7743, "step": 14309 }, { "epoch": 0.8545324256538875, "grad_norm": 2.476624011993408, "learning_rate": 1.617012806051357e-06, "loss": 0.8291, "step": 14310 }, { "epoch": 0.8545921414069031, "grad_norm": 4.72876501083374, "learning_rate": 1.616349280074315e-06, "loss": 0.8685, "step": 14311 }, { "epoch": 0.8546518571599188, "grad_norm": 1.778693437576294, "learning_rate": 1.615685754097273e-06, "loss": 0.789, "step": 14312 }, { "epoch": 0.8547115729129344, "grad_norm": 2.7425453662872314, "learning_rate": 1.615022228120231e-06, "loss": 0.8256, "step": 14313 }, { "epoch": 0.8547712886659501, "grad_norm": 2.193969964981079, "learning_rate": 1.6143587021431889e-06, "loss": 0.8317, "step": 14314 }, { "epoch": 0.8548310044189658, "grad_norm": 1.8885709047317505, "learning_rate": 1.613695176166147e-06, "loss": 0.8117, "step": 14315 }, { "epoch": 0.8548907201719814, "grad_norm": 2.515185832977295, "learning_rate": 1.6130316501891049e-06, "loss": 0.8233, "step": 14316 }, { "epoch": 0.854950435924997, "grad_norm": 1.725853443145752, "learning_rate": 1.6123681242120632e-06, "loss": 0.8285, "step": 14317 }, { "epoch": 0.8550101516780126, "grad_norm": 2.2093865871429443, "learning_rate": 1.611704598235021e-06, "loss": 0.8288, "step": 14318 }, { "epoch": 0.8550698674310283, "grad_norm": 1.601460337638855, "learning_rate": 1.611041072257979e-06, "loss": 0.8047, "step": 14319 }, { "epoch": 0.855129583184044, "grad_norm": 2.1158101558685303, "learning_rate": 1.610377546280937e-06, "loss": 0.8434, "step": 14320 }, { "epoch": 0.8551892989370596, "grad_norm": 2.165462017059326, "learning_rate": 1.609714020303895e-06, "loss": 0.8115, "step": 14321 }, { "epoch": 0.8552490146900752, "grad_norm": 2.8927738666534424, "learning_rate": 1.609050494326853e-06, "loss": 0.7944, "step": 14322 }, { "epoch": 0.8553087304430909, "grad_norm": 1.8101056814193726, "learning_rate": 1.608386968349811e-06, "loss": 0.7932, "step": 14323 }, { "epoch": 0.8553684461961065, "grad_norm": 2.4206013679504395, "learning_rate": 1.6077234423727688e-06, "loss": 0.8208, "step": 14324 }, { "epoch": 0.8554281619491222, "grad_norm": 2.371243953704834, "learning_rate": 1.6070599163957271e-06, "loss": 0.8325, "step": 14325 }, { "epoch": 0.8554878777021379, "grad_norm": 1.8967525959014893, "learning_rate": 1.606396390418685e-06, "loss": 0.8021, "step": 14326 }, { "epoch": 0.8555475934551535, "grad_norm": 1.8785771131515503, "learning_rate": 1.6057328644416431e-06, "loss": 0.7689, "step": 14327 }, { "epoch": 0.8556073092081691, "grad_norm": 1.4915851354599, "learning_rate": 1.605069338464601e-06, "loss": 0.8155, "step": 14328 }, { "epoch": 0.8556670249611847, "grad_norm": 1.5799005031585693, "learning_rate": 1.604405812487559e-06, "loss": 0.8347, "step": 14329 }, { "epoch": 0.8557267407142004, "grad_norm": 2.863396167755127, "learning_rate": 1.603742286510517e-06, "loss": 0.8294, "step": 14330 }, { "epoch": 0.8557864564672161, "grad_norm": 10.058581352233887, "learning_rate": 1.6030787605334749e-06, "loss": 0.8147, "step": 14331 }, { "epoch": 0.8558461722202317, "grad_norm": 3.620687246322632, "learning_rate": 1.6024152345564332e-06, "loss": 0.7787, "step": 14332 }, { "epoch": 0.8559058879732473, "grad_norm": 2.320910692214966, "learning_rate": 1.601751708579391e-06, "loss": 0.815, "step": 14333 }, { "epoch": 0.855965603726263, "grad_norm": 3.2546322345733643, "learning_rate": 1.601088182602349e-06, "loss": 0.7914, "step": 14334 }, { "epoch": 0.8560253194792786, "grad_norm": 2.0076684951782227, "learning_rate": 1.600424656625307e-06, "loss": 0.7801, "step": 14335 }, { "epoch": 0.8560850352322943, "grad_norm": 1.633592963218689, "learning_rate": 1.599761130648265e-06, "loss": 0.8028, "step": 14336 }, { "epoch": 0.85614475098531, "grad_norm": 2.14152193069458, "learning_rate": 1.599097604671223e-06, "loss": 0.8169, "step": 14337 }, { "epoch": 0.8562044667383256, "grad_norm": 3.1427102088928223, "learning_rate": 1.598434078694181e-06, "loss": 0.8402, "step": 14338 }, { "epoch": 0.8562641824913412, "grad_norm": 2.413553476333618, "learning_rate": 1.5977705527171388e-06, "loss": 0.8213, "step": 14339 }, { "epoch": 0.8563238982443568, "grad_norm": 3.90002703666687, "learning_rate": 1.597107026740097e-06, "loss": 0.8109, "step": 14340 }, { "epoch": 0.8563836139973725, "grad_norm": 2.046579599380493, "learning_rate": 1.5964435007630548e-06, "loss": 0.8175, "step": 14341 }, { "epoch": 0.8564433297503882, "grad_norm": 2.4461939334869385, "learning_rate": 1.5957799747860131e-06, "loss": 0.7862, "step": 14342 }, { "epoch": 0.8565030455034038, "grad_norm": 2.436217784881592, "learning_rate": 1.595116448808971e-06, "loss": 0.865, "step": 14343 }, { "epoch": 0.8565627612564194, "grad_norm": 11.650754928588867, "learning_rate": 1.594452922831929e-06, "loss": 0.7943, "step": 14344 }, { "epoch": 0.8566224770094351, "grad_norm": 2.5232298374176025, "learning_rate": 1.593789396854887e-06, "loss": 0.8515, "step": 14345 }, { "epoch": 0.8566821927624507, "grad_norm": 2.409048318862915, "learning_rate": 1.593125870877845e-06, "loss": 0.826, "step": 14346 }, { "epoch": 0.8567419085154664, "grad_norm": 4.2747931480407715, "learning_rate": 1.592462344900803e-06, "loss": 0.8191, "step": 14347 }, { "epoch": 0.856801624268482, "grad_norm": 3.3551461696624756, "learning_rate": 1.591798818923761e-06, "loss": 0.7914, "step": 14348 }, { "epoch": 0.8568613400214977, "grad_norm": 2.0726640224456787, "learning_rate": 1.5911352929467188e-06, "loss": 0.7855, "step": 14349 }, { "epoch": 0.8569210557745133, "grad_norm": 13.028108596801758, "learning_rate": 1.590471766969677e-06, "loss": 0.8024, "step": 14350 }, { "epoch": 0.8569807715275289, "grad_norm": 2.411891460418701, "learning_rate": 1.589808240992635e-06, "loss": 0.8112, "step": 14351 }, { "epoch": 0.8570404872805446, "grad_norm": 2.099613904953003, "learning_rate": 1.589144715015593e-06, "loss": 0.8263, "step": 14352 }, { "epoch": 0.8571002030335603, "grad_norm": 2.5685250759124756, "learning_rate": 1.588481189038551e-06, "loss": 0.806, "step": 14353 }, { "epoch": 0.8571599187865759, "grad_norm": 2.5747079849243164, "learning_rate": 1.5878176630615089e-06, "loss": 0.8183, "step": 14354 }, { "epoch": 0.8572196345395915, "grad_norm": 1.942439317703247, "learning_rate": 1.587154137084467e-06, "loss": 0.8394, "step": 14355 }, { "epoch": 0.8572793502926072, "grad_norm": 2.019962787628174, "learning_rate": 1.5864906111074249e-06, "loss": 0.8316, "step": 14356 }, { "epoch": 0.8573390660456228, "grad_norm": 1.6695139408111572, "learning_rate": 1.5858270851303832e-06, "loss": 0.8009, "step": 14357 }, { "epoch": 0.8573987817986385, "grad_norm": 1.9812231063842773, "learning_rate": 1.585163559153341e-06, "loss": 0.8048, "step": 14358 }, { "epoch": 0.8574584975516542, "grad_norm": 1.9773799180984497, "learning_rate": 1.584500033176299e-06, "loss": 0.8142, "step": 14359 }, { "epoch": 0.8575182133046698, "grad_norm": 1.8403912782669067, "learning_rate": 1.583836507199257e-06, "loss": 0.8164, "step": 14360 }, { "epoch": 0.8575779290576854, "grad_norm": 3.698293447494507, "learning_rate": 1.583172981222215e-06, "loss": 0.8204, "step": 14361 }, { "epoch": 0.857637644810701, "grad_norm": 2.690863609313965, "learning_rate": 1.582509455245173e-06, "loss": 0.8098, "step": 14362 }, { "epoch": 0.8576973605637167, "grad_norm": 2.3441195487976074, "learning_rate": 1.581845929268131e-06, "loss": 0.8083, "step": 14363 }, { "epoch": 0.8577570763167324, "grad_norm": 2.4218358993530273, "learning_rate": 1.5811824032910888e-06, "loss": 0.8383, "step": 14364 }, { "epoch": 0.857816792069748, "grad_norm": 2.7243268489837646, "learning_rate": 1.5805188773140471e-06, "loss": 0.8223, "step": 14365 }, { "epoch": 0.8578765078227636, "grad_norm": 2.962339162826538, "learning_rate": 1.5798553513370048e-06, "loss": 0.8305, "step": 14366 }, { "epoch": 0.8579362235757793, "grad_norm": 3.129098892211914, "learning_rate": 1.5791918253599631e-06, "loss": 0.7829, "step": 14367 }, { "epoch": 0.8579959393287949, "grad_norm": 2.8627195358276367, "learning_rate": 1.578528299382921e-06, "loss": 0.8111, "step": 14368 }, { "epoch": 0.8580556550818106, "grad_norm": 1.9009695053100586, "learning_rate": 1.5778647734058789e-06, "loss": 0.8204, "step": 14369 }, { "epoch": 0.8581153708348263, "grad_norm": 2.0117197036743164, "learning_rate": 1.577201247428837e-06, "loss": 0.8106, "step": 14370 }, { "epoch": 0.8581750865878419, "grad_norm": 3.1634044647216797, "learning_rate": 1.5765377214517949e-06, "loss": 0.7727, "step": 14371 }, { "epoch": 0.8582348023408575, "grad_norm": 1.7619708776474, "learning_rate": 1.575874195474753e-06, "loss": 0.8, "step": 14372 }, { "epoch": 0.8582945180938731, "grad_norm": 1.8846596479415894, "learning_rate": 1.5752106694977109e-06, "loss": 0.8381, "step": 14373 }, { "epoch": 0.8583542338468888, "grad_norm": 2.8696656227111816, "learning_rate": 1.5745471435206688e-06, "loss": 0.798, "step": 14374 }, { "epoch": 0.8584139495999045, "grad_norm": 1.9977792501449585, "learning_rate": 1.573883617543627e-06, "loss": 0.7793, "step": 14375 }, { "epoch": 0.8584736653529201, "grad_norm": 2.3089704513549805, "learning_rate": 1.573220091566585e-06, "loss": 0.8324, "step": 14376 }, { "epoch": 0.8585333811059358, "grad_norm": 1.9606077671051025, "learning_rate": 1.572556565589543e-06, "loss": 0.8198, "step": 14377 }, { "epoch": 0.8585930968589514, "grad_norm": 2.443296194076538, "learning_rate": 1.571893039612501e-06, "loss": 0.8022, "step": 14378 }, { "epoch": 0.858652812611967, "grad_norm": 1.9553887844085693, "learning_rate": 1.5712295136354588e-06, "loss": 0.832, "step": 14379 }, { "epoch": 0.8587125283649827, "grad_norm": 1.7394068241119385, "learning_rate": 1.570565987658417e-06, "loss": 0.828, "step": 14380 }, { "epoch": 0.8587722441179984, "grad_norm": 5.07254695892334, "learning_rate": 1.5699024616813748e-06, "loss": 0.8234, "step": 14381 }, { "epoch": 0.858831959871014, "grad_norm": 2.210531234741211, "learning_rate": 1.5692389357043331e-06, "loss": 0.813, "step": 14382 }, { "epoch": 0.8588916756240296, "grad_norm": 1.9843806028366089, "learning_rate": 1.568575409727291e-06, "loss": 0.8046, "step": 14383 }, { "epoch": 0.8589513913770452, "grad_norm": 3.757883071899414, "learning_rate": 1.567911883750249e-06, "loss": 0.83, "step": 14384 }, { "epoch": 0.8590111071300609, "grad_norm": 1.887048363685608, "learning_rate": 1.567248357773207e-06, "loss": 0.8072, "step": 14385 }, { "epoch": 0.8590708228830766, "grad_norm": 1.6514713764190674, "learning_rate": 1.566584831796165e-06, "loss": 0.8194, "step": 14386 }, { "epoch": 0.8591305386360922, "grad_norm": 1.6585986614227295, "learning_rate": 1.565921305819123e-06, "loss": 0.8012, "step": 14387 }, { "epoch": 0.8591902543891079, "grad_norm": 2.349686861038208, "learning_rate": 1.5652577798420809e-06, "loss": 0.833, "step": 14388 }, { "epoch": 0.8592499701421235, "grad_norm": 1.6124593019485474, "learning_rate": 1.5645942538650388e-06, "loss": 0.81, "step": 14389 }, { "epoch": 0.8593096858951391, "grad_norm": 2.0172319412231445, "learning_rate": 1.563930727887997e-06, "loss": 0.8236, "step": 14390 }, { "epoch": 0.8593694016481548, "grad_norm": 2.137650728225708, "learning_rate": 1.563267201910955e-06, "loss": 0.8114, "step": 14391 }, { "epoch": 0.8594291174011705, "grad_norm": 2.1297762393951416, "learning_rate": 1.562603675933913e-06, "loss": 0.7967, "step": 14392 }, { "epoch": 0.8594888331541861, "grad_norm": 2.419172525405884, "learning_rate": 1.561940149956871e-06, "loss": 0.8243, "step": 14393 }, { "epoch": 0.8595485489072017, "grad_norm": 2.550117015838623, "learning_rate": 1.5612766239798289e-06, "loss": 0.828, "step": 14394 }, { "epoch": 0.8596082646602173, "grad_norm": 2.419370174407959, "learning_rate": 1.560613098002787e-06, "loss": 0.8333, "step": 14395 }, { "epoch": 0.859667980413233, "grad_norm": 8.180871963500977, "learning_rate": 1.5599495720257449e-06, "loss": 0.8561, "step": 14396 }, { "epoch": 0.8597276961662487, "grad_norm": 1.8377505540847778, "learning_rate": 1.559286046048703e-06, "loss": 0.8095, "step": 14397 }, { "epoch": 0.8597874119192643, "grad_norm": 2.0479021072387695, "learning_rate": 1.5586225200716608e-06, "loss": 0.8228, "step": 14398 }, { "epoch": 0.85984712767228, "grad_norm": 2.7040109634399414, "learning_rate": 1.5579589940946187e-06, "loss": 0.8359, "step": 14399 }, { "epoch": 0.8599068434252956, "grad_norm": 3.218484878540039, "learning_rate": 1.557295468117577e-06, "loss": 0.7878, "step": 14400 }, { "epoch": 0.8599068434252956, "eval_text_loss": 0.8871539831161499, "eval_text_runtime": 15.1667, "eval_text_samples_per_second": 263.735, "eval_text_steps_per_second": 0.527, "step": 14400 }, { "epoch": 0.8599068434252956, "eval_image_loss": 0.5875741243362427, "eval_image_runtime": 4.9857, "eval_image_samples_per_second": 802.3, "eval_image_steps_per_second": 1.605, "step": 14400 }, { "epoch": 0.8599068434252956, "eval_video_loss": 1.0148231983184814, "eval_video_runtime": 77.1472, "eval_video_samples_per_second": 51.849, "eval_video_steps_per_second": 0.104, "step": 14400 } ], "logging_steps": 1, "max_steps": 16746, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.9559377169850093e+20, "train_batch_size": 1, "trial_name": null, "trial_params": null }