{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7177, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013934369121438027, "grad_norm": 64.16678619384766, "learning_rate": 0.0, "loss": 0.736572265625, "step": 1 }, { "epoch": 0.00027868738242876054, "grad_norm": 78.11810302734375, "learning_rate": 1.953125e-08, "loss": 0.74578857421875, "step": 2 }, { "epoch": 0.0004180310736431408, "grad_norm": 75.42152404785156, "learning_rate": 3.90625e-08, "loss": 0.74273681640625, "step": 3 }, { "epoch": 0.0005573747648575211, "grad_norm": 75.51959228515625, "learning_rate": 5.859375000000001e-08, "loss": 0.74188232421875, "step": 4 }, { "epoch": 0.0006967184560719013, "grad_norm": 81.06597900390625, "learning_rate": 7.8125e-08, "loss": 0.74884033203125, "step": 5 }, { "epoch": 0.0008360621472862816, "grad_norm": 73.4911880493164, "learning_rate": 9.765625e-08, "loss": 0.74163818359375, "step": 6 }, { "epoch": 0.0009754058385006619, "grad_norm": 81.29713439941406, "learning_rate": 1.1718750000000002e-07, "loss": 0.74615478515625, "step": 7 }, { "epoch": 0.0011147495297150422, "grad_norm": 82.55047607421875, "learning_rate": 1.3671875000000001e-07, "loss": 0.7489013671875, "step": 8 }, { "epoch": 0.0012540932209294225, "grad_norm": 76.46623992919922, "learning_rate": 1.5625e-07, "loss": 0.74310302734375, "step": 9 }, { "epoch": 0.0013934369121438026, "grad_norm": 78.74652099609375, "learning_rate": 1.7578125e-07, "loss": 0.7469482421875, "step": 10 }, { "epoch": 0.001532780603358183, "grad_norm": 79.44668579101562, "learning_rate": 1.953125e-07, "loss": 0.7471923828125, "step": 11 }, { "epoch": 0.0016721242945725633, "grad_norm": 76.36875915527344, "learning_rate": 2.1484375e-07, "loss": 0.7430419921875, "step": 12 }, { "epoch": 0.0018114679857869436, "grad_norm": 75.70982360839844, "learning_rate": 2.3437500000000003e-07, "loss": 0.7415771484375, "step": 13 }, { "epoch": 0.0019508116770013237, "grad_norm": 81.48867797851562, "learning_rate": 2.5390625000000003e-07, "loss": 0.74609375, "step": 14 }, { "epoch": 0.0020901553682157042, "grad_norm": 75.59644317626953, "learning_rate": 2.7343750000000003e-07, "loss": 0.7418212890625, "step": 15 }, { "epoch": 0.0022294990594300844, "grad_norm": 78.09611511230469, "learning_rate": 2.9296875000000003e-07, "loss": 0.743408203125, "step": 16 }, { "epoch": 0.0023688427506444645, "grad_norm": 75.6493148803711, "learning_rate": 3.125e-07, "loss": 0.7432861328125, "step": 17 }, { "epoch": 0.002508186441858845, "grad_norm": 79.82585144042969, "learning_rate": 3.3203125e-07, "loss": 0.74517822265625, "step": 18 }, { "epoch": 0.002647530133073225, "grad_norm": 80.86809539794922, "learning_rate": 3.515625e-07, "loss": 0.74664306640625, "step": 19 }, { "epoch": 0.0027868738242876052, "grad_norm": 70.23112487792969, "learning_rate": 3.7109375e-07, "loss": 0.73846435546875, "step": 20 }, { "epoch": 0.0029262175155019858, "grad_norm": 76.74502563476562, "learning_rate": 3.90625e-07, "loss": 0.740234375, "step": 21 }, { "epoch": 0.003065561206716366, "grad_norm": 76.78258514404297, "learning_rate": 4.1015625e-07, "loss": 0.73760986328125, "step": 22 }, { "epoch": 0.003204904897930746, "grad_norm": 78.1328125, "learning_rate": 4.296875e-07, "loss": 0.7362060546875, "step": 23 }, { "epoch": 0.0033442485891451265, "grad_norm": 75.04594421386719, "learning_rate": 4.4921875e-07, "loss": 0.73394775390625, "step": 24 }, { "epoch": 0.0034835922803595066, "grad_norm": 71.99366760253906, "learning_rate": 4.6875000000000006e-07, "loss": 0.73101806640625, "step": 25 }, { "epoch": 0.003622935971573887, "grad_norm": 74.66387939453125, "learning_rate": 4.8828125e-07, "loss": 0.73187255859375, "step": 26 }, { "epoch": 0.0037622796627882673, "grad_norm": 75.00493621826172, "learning_rate": 5.078125000000001e-07, "loss": 0.7301025390625, "step": 27 }, { "epoch": 0.0039016233540026474, "grad_norm": 80.12986755371094, "learning_rate": 5.2734375e-07, "loss": 0.73455810546875, "step": 28 }, { "epoch": 0.0040409670452170275, "grad_norm": 73.67736053466797, "learning_rate": 5.468750000000001e-07, "loss": 0.7291259765625, "step": 29 }, { "epoch": 0.0041803107364314085, "grad_norm": 76.56310272216797, "learning_rate": 5.6640625e-07, "loss": 0.7193603515625, "step": 30 }, { "epoch": 0.004319654427645789, "grad_norm": 74.40933227539062, "learning_rate": 5.859375000000001e-07, "loss": 0.71722412109375, "step": 31 }, { "epoch": 0.004458998118860169, "grad_norm": 80.15144348144531, "learning_rate": 6.0546875e-07, "loss": 0.72491455078125, "step": 32 }, { "epoch": 0.004598341810074549, "grad_norm": 76.9186782836914, "learning_rate": 6.25e-07, "loss": 0.70068359375, "step": 33 }, { "epoch": 0.004737685501288929, "grad_norm": 68.6341323852539, "learning_rate": 6.445312500000001e-07, "loss": 0.6964111328125, "step": 34 }, { "epoch": 0.004877029192503309, "grad_norm": 73.10980224609375, "learning_rate": 6.640625e-07, "loss": 0.6944580078125, "step": 35 }, { "epoch": 0.00501637288371769, "grad_norm": 73.49235534667969, "learning_rate": 6.835937500000001e-07, "loss": 0.69305419921875, "step": 36 }, { "epoch": 0.00515571657493207, "grad_norm": 70.26813507080078, "learning_rate": 7.03125e-07, "loss": 0.6907958984375, "step": 37 }, { "epoch": 0.00529506026614645, "grad_norm": 69.66151428222656, "learning_rate": 7.226562500000001e-07, "loss": 0.6881103515625, "step": 38 }, { "epoch": 0.00543440395736083, "grad_norm": 73.57600402832031, "learning_rate": 7.421875e-07, "loss": 0.686767578125, "step": 39 }, { "epoch": 0.0055737476485752105, "grad_norm": 75.51813507080078, "learning_rate": 7.617187500000001e-07, "loss": 0.68670654296875, "step": 40 }, { "epoch": 0.005713091339789591, "grad_norm": 67.97862243652344, "learning_rate": 7.8125e-07, "loss": 0.676025390625, "step": 41 }, { "epoch": 0.0058524350310039715, "grad_norm": 66.8155288696289, "learning_rate": 8.007812500000001e-07, "loss": 0.65948486328125, "step": 42 }, { "epoch": 0.005991778722218352, "grad_norm": 61.018043518066406, "learning_rate": 8.203125e-07, "loss": 0.66082763671875, "step": 43 }, { "epoch": 0.006131122413432732, "grad_norm": 67.31378936767578, "learning_rate": 8.398437500000001e-07, "loss": 0.65576171875, "step": 44 }, { "epoch": 0.006270466104647112, "grad_norm": 55.338356018066406, "learning_rate": 8.59375e-07, "loss": 0.65521240234375, "step": 45 }, { "epoch": 0.006409809795861492, "grad_norm": 63.91973114013672, "learning_rate": 8.789062500000001e-07, "loss": 0.61065673828125, "step": 46 }, { "epoch": 0.006549153487075873, "grad_norm": 49.84105682373047, "learning_rate": 8.984375e-07, "loss": 0.608154296875, "step": 47 }, { "epoch": 0.006688497178290253, "grad_norm": 52.61445236206055, "learning_rate": 9.179687500000001e-07, "loss": 0.59478759765625, "step": 48 }, { "epoch": 0.006827840869504633, "grad_norm": 59.19639205932617, "learning_rate": 9.375000000000001e-07, "loss": 0.59100341796875, "step": 49 }, { "epoch": 0.006967184560719013, "grad_norm": 50.10200119018555, "learning_rate": 9.570312500000002e-07, "loss": 0.59136962890625, "step": 50 }, { "epoch": 0.007106528251933393, "grad_norm": 52.9692497253418, "learning_rate": 9.765625e-07, "loss": 0.59173583984375, "step": 51 }, { "epoch": 0.007245871943147774, "grad_norm": 51.447059631347656, "learning_rate": 9.9609375e-07, "loss": 0.58148193359375, "step": 52 }, { "epoch": 0.0073852156343621545, "grad_norm": 53.07025909423828, "learning_rate": 1.0156250000000001e-06, "loss": 0.5748291015625, "step": 53 }, { "epoch": 0.007524559325576535, "grad_norm": 47.2376708984375, "learning_rate": 1.0351562500000002e-06, "loss": 0.58465576171875, "step": 54 }, { "epoch": 0.007663903016790915, "grad_norm": 50.11313247680664, "learning_rate": 1.0546875e-06, "loss": 0.5751953125, "step": 55 }, { "epoch": 0.007803246708005295, "grad_norm": 43.212467193603516, "learning_rate": 1.07421875e-06, "loss": 0.57861328125, "step": 56 }, { "epoch": 0.007942590399219676, "grad_norm": 42.519813537597656, "learning_rate": 1.0937500000000001e-06, "loss": 0.56329345703125, "step": 57 }, { "epoch": 0.008081934090434055, "grad_norm": 48.66292190551758, "learning_rate": 1.1132812500000002e-06, "loss": 0.538909912109375, "step": 58 }, { "epoch": 0.008221277781648436, "grad_norm": 47.776859283447266, "learning_rate": 1.1328125e-06, "loss": 0.528289794921875, "step": 59 }, { "epoch": 0.008360621472862817, "grad_norm": 43.20143508911133, "learning_rate": 1.15234375e-06, "loss": 0.517822265625, "step": 60 }, { "epoch": 0.008499965164077196, "grad_norm": 37.38132095336914, "learning_rate": 1.1718750000000001e-06, "loss": 0.512451171875, "step": 61 }, { "epoch": 0.008639308855291577, "grad_norm": 34.61582946777344, "learning_rate": 1.1914062500000002e-06, "loss": 0.494781494140625, "step": 62 }, { "epoch": 0.008778652546505956, "grad_norm": 27.765260696411133, "learning_rate": 1.2109375e-06, "loss": 0.502655029296875, "step": 63 }, { "epoch": 0.008917996237720337, "grad_norm": 30.568239212036133, "learning_rate": 1.23046875e-06, "loss": 0.46356201171875, "step": 64 }, { "epoch": 0.009057339928934717, "grad_norm": 29.76983642578125, "learning_rate": 1.25e-06, "loss": 0.4561767578125, "step": 65 }, { "epoch": 0.009196683620149098, "grad_norm": 31.269006729125977, "learning_rate": 1.2695312500000002e-06, "loss": 0.432830810546875, "step": 66 }, { "epoch": 0.009336027311363479, "grad_norm": 34.95226287841797, "learning_rate": 1.2890625000000002e-06, "loss": 0.414764404296875, "step": 67 }, { "epoch": 0.009475371002577858, "grad_norm": 34.382999420166016, "learning_rate": 1.30859375e-06, "loss": 0.40020751953125, "step": 68 }, { "epoch": 0.009614714693792239, "grad_norm": 23.99138832092285, "learning_rate": 1.328125e-06, "loss": 0.45294189453125, "step": 69 }, { "epoch": 0.009754058385006618, "grad_norm": 30.9902400970459, "learning_rate": 1.3476562500000001e-06, "loss": 0.40045166015625, "step": 70 }, { "epoch": 0.009893402076220999, "grad_norm": 21.87000274658203, "learning_rate": 1.3671875000000002e-06, "loss": 0.443603515625, "step": 71 }, { "epoch": 0.01003274576743538, "grad_norm": 30.492660522460938, "learning_rate": 1.38671875e-06, "loss": 0.38690185546875, "step": 72 }, { "epoch": 0.01017208945864976, "grad_norm": 16.470136642456055, "learning_rate": 1.40625e-06, "loss": 0.46771240234375, "step": 73 }, { "epoch": 0.01031143314986414, "grad_norm": 26.022357940673828, "learning_rate": 1.4257812500000001e-06, "loss": 0.406982421875, "step": 74 }, { "epoch": 0.01045077684107852, "grad_norm": 30.566171646118164, "learning_rate": 1.4453125000000002e-06, "loss": 0.3624267578125, "step": 75 }, { "epoch": 0.0105901205322929, "grad_norm": 15.924283981323242, "learning_rate": 1.46484375e-06, "loss": 0.44195556640625, "step": 76 }, { "epoch": 0.010729464223507281, "grad_norm": 21.775548934936523, "learning_rate": 1.484375e-06, "loss": 0.39288330078125, "step": 77 }, { "epoch": 0.01086880791472166, "grad_norm": 17.986019134521484, "learning_rate": 1.5039062500000001e-06, "loss": 0.41082763671875, "step": 78 }, { "epoch": 0.011008151605936042, "grad_norm": 18.08913230895996, "learning_rate": 1.5234375000000002e-06, "loss": 0.396697998046875, "step": 79 }, { "epoch": 0.011147495297150421, "grad_norm": 16.654502868652344, "learning_rate": 1.54296875e-06, "loss": 0.394073486328125, "step": 80 }, { "epoch": 0.011286838988364802, "grad_norm": 8.258893966674805, "learning_rate": 1.5625e-06, "loss": 0.451019287109375, "step": 81 }, { "epoch": 0.011426182679579183, "grad_norm": 10.671232223510742, "learning_rate": 1.5820312500000001e-06, "loss": 0.42132568359375, "step": 82 }, { "epoch": 0.011565526370793562, "grad_norm": 4.965851306915283, "learning_rate": 1.6015625000000002e-06, "loss": 0.460601806640625, "step": 83 }, { "epoch": 0.011704870062007943, "grad_norm": 10.879104614257812, "learning_rate": 1.6210937500000002e-06, "loss": 0.400970458984375, "step": 84 }, { "epoch": 0.011844213753222322, "grad_norm": 12.677193641662598, "learning_rate": 1.640625e-06, "loss": 0.379241943359375, "step": 85 }, { "epoch": 0.011983557444436703, "grad_norm": 12.79642105102539, "learning_rate": 1.6601562500000001e-06, "loss": 0.3718719482421875, "step": 86 }, { "epoch": 0.012122901135651083, "grad_norm": 14.9633150100708, "learning_rate": 1.6796875000000002e-06, "loss": 0.3425140380859375, "step": 87 }, { "epoch": 0.012262244826865464, "grad_norm": 8.175371170043945, "learning_rate": 1.6992187500000002e-06, "loss": 0.393157958984375, "step": 88 }, { "epoch": 0.012401588518079844, "grad_norm": 10.147003173828125, "learning_rate": 1.71875e-06, "loss": 0.3652801513671875, "step": 89 }, { "epoch": 0.012540932209294224, "grad_norm": 10.65830135345459, "learning_rate": 1.7382812500000001e-06, "loss": 0.3467559814453125, "step": 90 }, { "epoch": 0.012680275900508605, "grad_norm": 7.1768269538879395, "learning_rate": 1.7578125000000002e-06, "loss": 0.372772216796875, "step": 91 }, { "epoch": 0.012819619591722984, "grad_norm": 10.363563537597656, "learning_rate": 1.7773437500000002e-06, "loss": 0.3337249755859375, "step": 92 }, { "epoch": 0.012958963282937365, "grad_norm": 4.504032135009766, "learning_rate": 1.796875e-06, "loss": 0.4416046142578125, "step": 93 }, { "epoch": 0.013098306974151746, "grad_norm": 9.561735153198242, "learning_rate": 1.81640625e-06, "loss": 0.334625244140625, "step": 94 }, { "epoch": 0.013237650665366125, "grad_norm": 9.19607162475586, "learning_rate": 1.8359375000000002e-06, "loss": 0.344573974609375, "step": 95 }, { "epoch": 0.013376994356580506, "grad_norm": 12.47248363494873, "learning_rate": 1.8554687500000002e-06, "loss": 0.3126373291015625, "step": 96 }, { "epoch": 0.013516338047794885, "grad_norm": 7.128753662109375, "learning_rate": 1.8750000000000003e-06, "loss": 0.357757568359375, "step": 97 }, { "epoch": 0.013655681739009266, "grad_norm": 6.948103427886963, "learning_rate": 1.89453125e-06, "loss": 0.359710693359375, "step": 98 }, { "epoch": 0.013795025430223647, "grad_norm": 3.027979612350464, "learning_rate": 1.9140625000000004e-06, "loss": 0.4005889892578125, "step": 99 }, { "epoch": 0.013934369121438027, "grad_norm": 4.2701311111450195, "learning_rate": 1.93359375e-06, "loss": 0.3657684326171875, "step": 100 }, { "epoch": 0.014073712812652408, "grad_norm": 8.378669738769531, "learning_rate": 1.953125e-06, "loss": 0.3080596923828125, "step": 101 }, { "epoch": 0.014213056503866787, "grad_norm": 4.524616718292236, "learning_rate": 1.97265625e-06, "loss": 0.4355621337890625, "step": 102 }, { "epoch": 0.014352400195081168, "grad_norm": 6.453117847442627, "learning_rate": 1.9921875e-06, "loss": 0.3604736328125, "step": 103 }, { "epoch": 0.014491743886295549, "grad_norm": 4.1606526374816895, "learning_rate": 2.01171875e-06, "loss": 0.3887481689453125, "step": 104 }, { "epoch": 0.014631087577509928, "grad_norm": 7.4613447189331055, "learning_rate": 2.0312500000000002e-06, "loss": 0.334747314453125, "step": 105 }, { "epoch": 0.014770431268724309, "grad_norm": 5.260101795196533, "learning_rate": 2.0507812500000003e-06, "loss": 0.36346435546875, "step": 106 }, { "epoch": 0.014909774959938688, "grad_norm": 6.694543361663818, "learning_rate": 2.0703125000000003e-06, "loss": 0.3333587646484375, "step": 107 }, { "epoch": 0.01504911865115307, "grad_norm": 8.572212219238281, "learning_rate": 2.08984375e-06, "loss": 0.2905426025390625, "step": 108 }, { "epoch": 0.01518846234236745, "grad_norm": 5.092799663543701, "learning_rate": 2.109375e-06, "loss": 0.3564300537109375, "step": 109 }, { "epoch": 0.01532780603358183, "grad_norm": 8.18907642364502, "learning_rate": 2.12890625e-06, "loss": 0.3307952880859375, "step": 110 }, { "epoch": 0.01546714972479621, "grad_norm": 8.09101390838623, "learning_rate": 2.1484375e-06, "loss": 0.310699462890625, "step": 111 }, { "epoch": 0.01560649341601059, "grad_norm": 9.025193214416504, "learning_rate": 2.16796875e-06, "loss": 0.3403778076171875, "step": 112 }, { "epoch": 0.01574583710722497, "grad_norm": 8.151870727539062, "learning_rate": 2.1875000000000002e-06, "loss": 0.3805084228515625, "step": 113 }, { "epoch": 0.01588518079843935, "grad_norm": 18.23855972290039, "learning_rate": 2.2070312500000003e-06, "loss": 0.3521575927734375, "step": 114 }, { "epoch": 0.016024524489653733, "grad_norm": 23.496559143066406, "learning_rate": 2.2265625000000003e-06, "loss": 0.296905517578125, "step": 115 }, { "epoch": 0.01616386818086811, "grad_norm": 10.481046676635742, "learning_rate": 2.2460937500000004e-06, "loss": 0.372283935546875, "step": 116 }, { "epoch": 0.01630321187208249, "grad_norm": 13.092636108398438, "learning_rate": 2.265625e-06, "loss": 0.2867279052734375, "step": 117 }, { "epoch": 0.016442555563296872, "grad_norm": 13.39275074005127, "learning_rate": 2.28515625e-06, "loss": 0.2902984619140625, "step": 118 }, { "epoch": 0.016581899254511253, "grad_norm": 6.9973039627075195, "learning_rate": 2.3046875e-06, "loss": 0.335601806640625, "step": 119 }, { "epoch": 0.016721242945725634, "grad_norm": 13.756980895996094, "learning_rate": 2.32421875e-06, "loss": 0.35699462890625, "step": 120 }, { "epoch": 0.01686058663694001, "grad_norm": 9.71623706817627, "learning_rate": 2.3437500000000002e-06, "loss": 0.3831787109375, "step": 121 }, { "epoch": 0.016999930328154392, "grad_norm": 8.340652465820312, "learning_rate": 2.3632812500000003e-06, "loss": 0.29046630859375, "step": 122 }, { "epoch": 0.017139274019368773, "grad_norm": 8.058320999145508, "learning_rate": 2.3828125000000003e-06, "loss": 0.2821502685546875, "step": 123 }, { "epoch": 0.017278617710583154, "grad_norm": 12.088471412658691, "learning_rate": 2.4023437500000004e-06, "loss": 0.3235321044921875, "step": 124 }, { "epoch": 0.017417961401797532, "grad_norm": 11.611265182495117, "learning_rate": 2.421875e-06, "loss": 0.2783660888671875, "step": 125 }, { "epoch": 0.017557305093011913, "grad_norm": 14.269041061401367, "learning_rate": 2.44140625e-06, "loss": 0.2833251953125, "step": 126 }, { "epoch": 0.017696648784226294, "grad_norm": 10.783553123474121, "learning_rate": 2.4609375e-06, "loss": 0.3194427490234375, "step": 127 }, { "epoch": 0.017835992475440675, "grad_norm": 8.093642234802246, "learning_rate": 2.48046875e-06, "loss": 0.30011749267578125, "step": 128 }, { "epoch": 0.017975336166655056, "grad_norm": 7.978979110717773, "learning_rate": 2.5e-06, "loss": 0.272369384765625, "step": 129 }, { "epoch": 0.018114679857869433, "grad_norm": 8.40949535369873, "learning_rate": 2.5195312500000003e-06, "loss": 0.24237823486328125, "step": 130 }, { "epoch": 0.018254023549083814, "grad_norm": 14.062163352966309, "learning_rate": 2.5390625000000003e-06, "loss": 0.3158721923828125, "step": 131 }, { "epoch": 0.018393367240298195, "grad_norm": 13.730401039123535, "learning_rate": 2.5585937500000004e-06, "loss": 0.35125732421875, "step": 132 }, { "epoch": 0.018532710931512576, "grad_norm": 7.665028095245361, "learning_rate": 2.5781250000000004e-06, "loss": 0.2335662841796875, "step": 133 }, { "epoch": 0.018672054622726957, "grad_norm": 25.655967712402344, "learning_rate": 2.59765625e-06, "loss": 0.274383544921875, "step": 134 }, { "epoch": 0.018811398313941335, "grad_norm": 11.628829956054688, "learning_rate": 2.6171875e-06, "loss": 0.24365997314453125, "step": 135 }, { "epoch": 0.018950742005155716, "grad_norm": 13.183489799499512, "learning_rate": 2.63671875e-06, "loss": 0.34625244140625, "step": 136 }, { "epoch": 0.019090085696370097, "grad_norm": 15.993921279907227, "learning_rate": 2.65625e-06, "loss": 0.28668212890625, "step": 137 }, { "epoch": 0.019229429387584478, "grad_norm": 5.366482257843018, "learning_rate": 2.6757812500000002e-06, "loss": 0.3024444580078125, "step": 138 }, { "epoch": 0.01936877307879886, "grad_norm": 9.3362455368042, "learning_rate": 2.6953125000000003e-06, "loss": 0.31127166748046875, "step": 139 }, { "epoch": 0.019508116770013236, "grad_norm": 6.8927083015441895, "learning_rate": 2.7148437500000003e-06, "loss": 0.2441253662109375, "step": 140 }, { "epoch": 0.019647460461227617, "grad_norm": 8.679638862609863, "learning_rate": 2.7343750000000004e-06, "loss": 0.3441619873046875, "step": 141 }, { "epoch": 0.019786804152441998, "grad_norm": 10.963214874267578, "learning_rate": 2.75390625e-06, "loss": 0.26200103759765625, "step": 142 }, { "epoch": 0.01992614784365638, "grad_norm": 7.737168312072754, "learning_rate": 2.7734375e-06, "loss": 0.33831024169921875, "step": 143 }, { "epoch": 0.02006549153487076, "grad_norm": 5.857471466064453, "learning_rate": 2.79296875e-06, "loss": 0.27858734130859375, "step": 144 }, { "epoch": 0.020204835226085138, "grad_norm": 7.794816493988037, "learning_rate": 2.8125e-06, "loss": 0.26551055908203125, "step": 145 }, { "epoch": 0.02034417891729952, "grad_norm": 6.134165287017822, "learning_rate": 2.8320312500000002e-06, "loss": 0.24561309814453125, "step": 146 }, { "epoch": 0.0204835226085139, "grad_norm": 5.158183574676514, "learning_rate": 2.8515625000000003e-06, "loss": 0.2899017333984375, "step": 147 }, { "epoch": 0.02062286629972828, "grad_norm": 10.853693008422852, "learning_rate": 2.8710937500000003e-06, "loss": 0.25759124755859375, "step": 148 }, { "epoch": 0.02076220999094266, "grad_norm": 9.474838256835938, "learning_rate": 2.8906250000000004e-06, "loss": 0.23094940185546875, "step": 149 }, { "epoch": 0.02090155368215704, "grad_norm": 5.673110008239746, "learning_rate": 2.9101562500000004e-06, "loss": 0.24353790283203125, "step": 150 }, { "epoch": 0.02104089737337142, "grad_norm": 7.019323825836182, "learning_rate": 2.9296875e-06, "loss": 0.29332733154296875, "step": 151 }, { "epoch": 0.0211802410645858, "grad_norm": 14.10360336303711, "learning_rate": 2.94921875e-06, "loss": 0.31320953369140625, "step": 152 }, { "epoch": 0.021319584755800182, "grad_norm": 5.911868095397949, "learning_rate": 2.96875e-06, "loss": 0.19628143310546875, "step": 153 }, { "epoch": 0.021458928447014563, "grad_norm": 9.704218864440918, "learning_rate": 2.9882812500000002e-06, "loss": 0.25865936279296875, "step": 154 }, { "epoch": 0.02159827213822894, "grad_norm": 5.03364896774292, "learning_rate": 3.0078125000000003e-06, "loss": 0.25307464599609375, "step": 155 }, { "epoch": 0.02173761582944332, "grad_norm": 8.831242561340332, "learning_rate": 3.0273437500000003e-06, "loss": 0.250823974609375, "step": 156 }, { "epoch": 0.021876959520657702, "grad_norm": 5.938481330871582, "learning_rate": 3.0468750000000004e-06, "loss": 0.23351287841796875, "step": 157 }, { "epoch": 0.022016303211872083, "grad_norm": 18.44980812072754, "learning_rate": 3.0664062500000004e-06, "loss": 0.32921600341796875, "step": 158 }, { "epoch": 0.022155646903086464, "grad_norm": 9.713719367980957, "learning_rate": 3.0859375e-06, "loss": 0.2267608642578125, "step": 159 }, { "epoch": 0.022294990594300842, "grad_norm": 12.605870246887207, "learning_rate": 3.10546875e-06, "loss": 0.30535125732421875, "step": 160 }, { "epoch": 0.022434334285515223, "grad_norm": 6.077558994293213, "learning_rate": 3.125e-06, "loss": 0.311798095703125, "step": 161 }, { "epoch": 0.022573677976729604, "grad_norm": 8.05876350402832, "learning_rate": 3.14453125e-06, "loss": 0.20165252685546875, "step": 162 }, { "epoch": 0.022713021667943985, "grad_norm": 13.68863582611084, "learning_rate": 3.1640625000000003e-06, "loss": 0.2602081298828125, "step": 163 }, { "epoch": 0.022852365359158366, "grad_norm": 7.653500556945801, "learning_rate": 3.1835937500000003e-06, "loss": 0.194732666015625, "step": 164 }, { "epoch": 0.022991709050372743, "grad_norm": 26.284509658813477, "learning_rate": 3.2031250000000004e-06, "loss": 0.24428558349609375, "step": 165 }, { "epoch": 0.023131052741587124, "grad_norm": 19.634876251220703, "learning_rate": 3.2226562500000004e-06, "loss": 0.251983642578125, "step": 166 }, { "epoch": 0.023270396432801505, "grad_norm": 10.470183372497559, "learning_rate": 3.2421875000000005e-06, "loss": 0.2534637451171875, "step": 167 }, { "epoch": 0.023409740124015886, "grad_norm": 11.401005744934082, "learning_rate": 3.26171875e-06, "loss": 0.182373046875, "step": 168 }, { "epoch": 0.023549083815230267, "grad_norm": 10.243048667907715, "learning_rate": 3.28125e-06, "loss": 0.198883056640625, "step": 169 }, { "epoch": 0.023688427506444645, "grad_norm": 17.51605987548828, "learning_rate": 3.30078125e-06, "loss": 0.18087005615234375, "step": 170 }, { "epoch": 0.023827771197659026, "grad_norm": 33.123748779296875, "learning_rate": 3.3203125000000002e-06, "loss": 0.30068206787109375, "step": 171 }, { "epoch": 0.023967114888873407, "grad_norm": 8.253579139709473, "learning_rate": 3.3398437500000003e-06, "loss": 0.18779754638671875, "step": 172 }, { "epoch": 0.024106458580087788, "grad_norm": 22.1197452545166, "learning_rate": 3.3593750000000003e-06, "loss": 0.22255706787109375, "step": 173 }, { "epoch": 0.024245802271302165, "grad_norm": 28.968589782714844, "learning_rate": 3.3789062500000004e-06, "loss": 0.247344970703125, "step": 174 }, { "epoch": 0.024385145962516546, "grad_norm": 22.001848220825195, "learning_rate": 3.3984375000000004e-06, "loss": 0.27017974853515625, "step": 175 }, { "epoch": 0.024524489653730927, "grad_norm": 14.729316711425781, "learning_rate": 3.41796875e-06, "loss": 0.219970703125, "step": 176 }, { "epoch": 0.024663833344945308, "grad_norm": 15.690092086791992, "learning_rate": 3.4375e-06, "loss": 0.18291473388671875, "step": 177 }, { "epoch": 0.02480317703615969, "grad_norm": 8.319366455078125, "learning_rate": 3.45703125e-06, "loss": 0.186279296875, "step": 178 }, { "epoch": 0.024942520727374067, "grad_norm": 9.988335609436035, "learning_rate": 3.4765625000000002e-06, "loss": 0.18799591064453125, "step": 179 }, { "epoch": 0.025081864418588447, "grad_norm": 13.121956825256348, "learning_rate": 3.4960937500000003e-06, "loss": 0.15827178955078125, "step": 180 }, { "epoch": 0.02522120810980283, "grad_norm": 7.678782939910889, "learning_rate": 3.5156250000000003e-06, "loss": 0.2038116455078125, "step": 181 }, { "epoch": 0.02536055180101721, "grad_norm": 8.080988883972168, "learning_rate": 3.5351562500000004e-06, "loss": 0.23712158203125, "step": 182 }, { "epoch": 0.02549989549223159, "grad_norm": 9.981840133666992, "learning_rate": 3.5546875000000004e-06, "loss": 0.28643035888671875, "step": 183 }, { "epoch": 0.025639239183445968, "grad_norm": 14.38892650604248, "learning_rate": 3.5742187500000005e-06, "loss": 0.24761199951171875, "step": 184 }, { "epoch": 0.02577858287466035, "grad_norm": 17.52321434020996, "learning_rate": 3.59375e-06, "loss": 0.202972412109375, "step": 185 }, { "epoch": 0.02591792656587473, "grad_norm": 27.765369415283203, "learning_rate": 3.61328125e-06, "loss": 0.2608795166015625, "step": 186 }, { "epoch": 0.02605727025708911, "grad_norm": 10.026653289794922, "learning_rate": 3.6328125e-06, "loss": 0.212738037109375, "step": 187 }, { "epoch": 0.026196613948303492, "grad_norm": 26.451148986816406, "learning_rate": 3.6523437500000003e-06, "loss": 0.243194580078125, "step": 188 }, { "epoch": 0.02633595763951787, "grad_norm": 23.387102127075195, "learning_rate": 3.6718750000000003e-06, "loss": 0.20406341552734375, "step": 189 }, { "epoch": 0.02647530133073225, "grad_norm": 18.26962661743164, "learning_rate": 3.6914062500000004e-06, "loss": 0.2520904541015625, "step": 190 }, { "epoch": 0.02661464502194663, "grad_norm": 19.624685287475586, "learning_rate": 3.7109375000000004e-06, "loss": 0.179046630859375, "step": 191 }, { "epoch": 0.026753988713161012, "grad_norm": 27.19849395751953, "learning_rate": 3.7304687500000005e-06, "loss": 0.2687950134277344, "step": 192 }, { "epoch": 0.026893332404375393, "grad_norm": 17.608795166015625, "learning_rate": 3.7500000000000005e-06, "loss": 0.29642486572265625, "step": 193 }, { "epoch": 0.02703267609558977, "grad_norm": 10.298958778381348, "learning_rate": 3.76953125e-06, "loss": 0.19585418701171875, "step": 194 }, { "epoch": 0.02717201978680415, "grad_norm": 20.677471160888672, "learning_rate": 3.7890625e-06, "loss": 0.24173736572265625, "step": 195 }, { "epoch": 0.027311363478018533, "grad_norm": 7.10650634765625, "learning_rate": 3.8085937500000002e-06, "loss": 0.17553329467773438, "step": 196 }, { "epoch": 0.027450707169232914, "grad_norm": 7.355569362640381, "learning_rate": 3.828125000000001e-06, "loss": 0.239044189453125, "step": 197 }, { "epoch": 0.027590050860447295, "grad_norm": 19.91388702392578, "learning_rate": 3.84765625e-06, "loss": 0.20315933227539062, "step": 198 }, { "epoch": 0.027729394551661672, "grad_norm": 21.650339126586914, "learning_rate": 3.8671875e-06, "loss": 0.2072601318359375, "step": 199 }, { "epoch": 0.027868738242876053, "grad_norm": 8.592239379882812, "learning_rate": 3.88671875e-06, "loss": 0.224853515625, "step": 200 }, { "epoch": 0.028008081934090434, "grad_norm": 8.059208869934082, "learning_rate": 3.90625e-06, "loss": 0.17461395263671875, "step": 201 }, { "epoch": 0.028147425625304815, "grad_norm": 7.282668113708496, "learning_rate": 3.92578125e-06, "loss": 0.1397705078125, "step": 202 }, { "epoch": 0.028286769316519196, "grad_norm": 4.5309576988220215, "learning_rate": 3.9453125e-06, "loss": 0.13838958740234375, "step": 203 }, { "epoch": 0.028426113007733574, "grad_norm": 8.087711334228516, "learning_rate": 3.96484375e-06, "loss": 0.1696319580078125, "step": 204 }, { "epoch": 0.028565456698947955, "grad_norm": 13.639384269714355, "learning_rate": 3.984375e-06, "loss": 0.17914581298828125, "step": 205 }, { "epoch": 0.028704800390162336, "grad_norm": 8.188125610351562, "learning_rate": 4.00390625e-06, "loss": 0.14945220947265625, "step": 206 }, { "epoch": 0.028844144081376717, "grad_norm": 26.527408599853516, "learning_rate": 4.0234375e-06, "loss": 0.205780029296875, "step": 207 }, { "epoch": 0.028983487772591097, "grad_norm": 23.56951332092285, "learning_rate": 4.0429687500000004e-06, "loss": 0.19709014892578125, "step": 208 }, { "epoch": 0.029122831463805475, "grad_norm": 10.572108268737793, "learning_rate": 4.0625000000000005e-06, "loss": 0.13140106201171875, "step": 209 }, { "epoch": 0.029262175155019856, "grad_norm": 47.23286437988281, "learning_rate": 4.0820312500000005e-06, "loss": 0.24827957153320312, "step": 210 }, { "epoch": 0.029401518846234237, "grad_norm": 50.28873825073242, "learning_rate": 4.101562500000001e-06, "loss": 0.1993255615234375, "step": 211 }, { "epoch": 0.029540862537448618, "grad_norm": 30.949167251586914, "learning_rate": 4.121093750000001e-06, "loss": 0.2003936767578125, "step": 212 }, { "epoch": 0.029680206228663, "grad_norm": 12.913866996765137, "learning_rate": 4.140625000000001e-06, "loss": 0.2065582275390625, "step": 213 }, { "epoch": 0.029819549919877376, "grad_norm": 26.34454345703125, "learning_rate": 4.160156250000001e-06, "loss": 0.232940673828125, "step": 214 }, { "epoch": 0.029958893611091757, "grad_norm": 36.74723434448242, "learning_rate": 4.1796875e-06, "loss": 0.2563018798828125, "step": 215 }, { "epoch": 0.03009823730230614, "grad_norm": 8.40907096862793, "learning_rate": 4.19921875e-06, "loss": 0.23477935791015625, "step": 216 }, { "epoch": 0.03023758099352052, "grad_norm": 35.041080474853516, "learning_rate": 4.21875e-06, "loss": 0.19935989379882812, "step": 217 }, { "epoch": 0.0303769246847349, "grad_norm": 58.098052978515625, "learning_rate": 4.23828125e-06, "loss": 0.252288818359375, "step": 218 }, { "epoch": 0.030516268375949278, "grad_norm": 21.75027847290039, "learning_rate": 4.2578125e-06, "loss": 0.14652252197265625, "step": 219 }, { "epoch": 0.03065561206716366, "grad_norm": 7.649435520172119, "learning_rate": 4.27734375e-06, "loss": 0.16462326049804688, "step": 220 }, { "epoch": 0.03079495575837804, "grad_norm": 27.686798095703125, "learning_rate": 4.296875e-06, "loss": 0.21265602111816406, "step": 221 }, { "epoch": 0.03093429944959242, "grad_norm": 55.40580368041992, "learning_rate": 4.31640625e-06, "loss": 0.31345367431640625, "step": 222 }, { "epoch": 0.031073643140806798, "grad_norm": 38.11203384399414, "learning_rate": 4.3359375e-06, "loss": 0.2816200256347656, "step": 223 }, { "epoch": 0.03121298683202118, "grad_norm": 15.952048301696777, "learning_rate": 4.35546875e-06, "loss": 0.18428421020507812, "step": 224 }, { "epoch": 0.031352330523235564, "grad_norm": 9.59803295135498, "learning_rate": 4.3750000000000005e-06, "loss": 0.20664215087890625, "step": 225 }, { "epoch": 0.03149167421444994, "grad_norm": 30.150196075439453, "learning_rate": 4.3945312500000005e-06, "loss": 0.21961212158203125, "step": 226 }, { "epoch": 0.03163101790566432, "grad_norm": 30.41047477722168, "learning_rate": 4.4140625000000006e-06, "loss": 0.190093994140625, "step": 227 }, { "epoch": 0.0317703615968787, "grad_norm": 19.84634017944336, "learning_rate": 4.433593750000001e-06, "loss": 0.21884918212890625, "step": 228 }, { "epoch": 0.03190970528809308, "grad_norm": 14.282132148742676, "learning_rate": 4.453125000000001e-06, "loss": 0.2345123291015625, "step": 229 }, { "epoch": 0.032049048979307465, "grad_norm": 15.013458251953125, "learning_rate": 4.472656250000001e-06, "loss": 0.23382186889648438, "step": 230 }, { "epoch": 0.03218839267052184, "grad_norm": 10.294239044189453, "learning_rate": 4.492187500000001e-06, "loss": 0.18857574462890625, "step": 231 }, { "epoch": 0.03232773636173622, "grad_norm": 8.76961612701416, "learning_rate": 4.51171875e-06, "loss": 0.22595977783203125, "step": 232 }, { "epoch": 0.032467080052950605, "grad_norm": 22.80904769897461, "learning_rate": 4.53125e-06, "loss": 0.17462539672851562, "step": 233 }, { "epoch": 0.03260642374416498, "grad_norm": 53.55217742919922, "learning_rate": 4.55078125e-06, "loss": 0.28200531005859375, "step": 234 }, { "epoch": 0.032745767435379367, "grad_norm": 12.586470603942871, "learning_rate": 4.5703125e-06, "loss": 0.19762420654296875, "step": 235 }, { "epoch": 0.032885111126593744, "grad_norm": 6.7917799949646, "learning_rate": 4.58984375e-06, "loss": 0.16175079345703125, "step": 236 }, { "epoch": 0.03302445481780812, "grad_norm": 8.80307388305664, "learning_rate": 4.609375e-06, "loss": 0.15021514892578125, "step": 237 }, { "epoch": 0.033163798509022506, "grad_norm": 7.3317437171936035, "learning_rate": 4.62890625e-06, "loss": 0.15477371215820312, "step": 238 }, { "epoch": 0.033303142200236883, "grad_norm": 3.542001724243164, "learning_rate": 4.6484375e-06, "loss": 0.12714385986328125, "step": 239 }, { "epoch": 0.03344248589145127, "grad_norm": 7.790466785430908, "learning_rate": 4.66796875e-06, "loss": 0.19305801391601562, "step": 240 }, { "epoch": 0.033581829582665645, "grad_norm": 10.1496000289917, "learning_rate": 4.6875000000000004e-06, "loss": 0.1514129638671875, "step": 241 }, { "epoch": 0.03372117327388002, "grad_norm": 7.17873477935791, "learning_rate": 4.7070312500000005e-06, "loss": 0.133575439453125, "step": 242 }, { "epoch": 0.03386051696509441, "grad_norm": 13.9468994140625, "learning_rate": 4.7265625000000005e-06, "loss": 0.2058868408203125, "step": 243 }, { "epoch": 0.033999860656308785, "grad_norm": 9.799782752990723, "learning_rate": 4.746093750000001e-06, "loss": 0.20214080810546875, "step": 244 }, { "epoch": 0.03413920434752317, "grad_norm": 21.76659393310547, "learning_rate": 4.765625000000001e-06, "loss": 0.19138717651367188, "step": 245 }, { "epoch": 0.03427854803873755, "grad_norm": 4.233003616333008, "learning_rate": 4.785156250000001e-06, "loss": 0.10499954223632812, "step": 246 }, { "epoch": 0.034417891729951924, "grad_norm": 6.674178123474121, "learning_rate": 4.804687500000001e-06, "loss": 0.15364837646484375, "step": 247 }, { "epoch": 0.03455723542116631, "grad_norm": 20.620044708251953, "learning_rate": 4.824218750000001e-06, "loss": 0.16588592529296875, "step": 248 }, { "epoch": 0.034696579112380686, "grad_norm": 12.134761810302734, "learning_rate": 4.84375e-06, "loss": 0.20935440063476562, "step": 249 }, { "epoch": 0.034835922803595064, "grad_norm": 12.495858192443848, "learning_rate": 4.86328125e-06, "loss": 0.14084243774414062, "step": 250 }, { "epoch": 0.03497526649480945, "grad_norm": 14.199263572692871, "learning_rate": 4.8828125e-06, "loss": 0.17259979248046875, "step": 251 }, { "epoch": 0.035114610186023826, "grad_norm": 15.897130966186523, "learning_rate": 4.90234375e-06, "loss": 0.24346160888671875, "step": 252 }, { "epoch": 0.03525395387723821, "grad_norm": 8.740765571594238, "learning_rate": 4.921875e-06, "loss": 0.19855117797851562, "step": 253 }, { "epoch": 0.03539329756845259, "grad_norm": 6.639811038970947, "learning_rate": 4.94140625e-06, "loss": 0.15350723266601562, "step": 254 }, { "epoch": 0.035532641259666965, "grad_norm": 19.83184814453125, "learning_rate": 4.9609375e-06, "loss": 0.1979217529296875, "step": 255 }, { "epoch": 0.03567198495088135, "grad_norm": 14.044180870056152, "learning_rate": 4.98046875e-06, "loss": 0.19338607788085938, "step": 256 }, { "epoch": 0.03581132864209573, "grad_norm": 53.72457504272461, "learning_rate": 5e-06, "loss": 0.2259063720703125, "step": 257 }, { "epoch": 0.03595067233331011, "grad_norm": 53.63716506958008, "learning_rate": 5.0195312500000005e-06, "loss": 0.20551300048828125, "step": 258 }, { "epoch": 0.03609001602452449, "grad_norm": 20.92974853515625, "learning_rate": 5.0390625000000005e-06, "loss": 0.196868896484375, "step": 259 }, { "epoch": 0.03622935971573887, "grad_norm": 11.921292304992676, "learning_rate": 5.0585937500000006e-06, "loss": 0.1698780059814453, "step": 260 }, { "epoch": 0.03636870340695325, "grad_norm": 25.159324645996094, "learning_rate": 5.078125000000001e-06, "loss": 0.18630218505859375, "step": 261 }, { "epoch": 0.03650804709816763, "grad_norm": 25.97695541381836, "learning_rate": 5.097656250000001e-06, "loss": 0.2048797607421875, "step": 262 }, { "epoch": 0.03664739078938201, "grad_norm": 11.72561264038086, "learning_rate": 5.117187500000001e-06, "loss": 0.19612884521484375, "step": 263 }, { "epoch": 0.03678673448059639, "grad_norm": 19.11922264099121, "learning_rate": 5.136718750000001e-06, "loss": 0.20626068115234375, "step": 264 }, { "epoch": 0.03692607817181077, "grad_norm": 38.69328308105469, "learning_rate": 5.156250000000001e-06, "loss": 0.23847198486328125, "step": 265 }, { "epoch": 0.03706542186302515, "grad_norm": 19.797775268554688, "learning_rate": 5.17578125e-06, "loss": 0.15404891967773438, "step": 266 }, { "epoch": 0.03720476555423953, "grad_norm": 5.233186721801758, "learning_rate": 5.1953125e-06, "loss": 0.15129852294921875, "step": 267 }, { "epoch": 0.037344109245453914, "grad_norm": 21.956586837768555, "learning_rate": 5.21484375e-06, "loss": 0.19769287109375, "step": 268 }, { "epoch": 0.03748345293666829, "grad_norm": 14.212251663208008, "learning_rate": 5.234375e-06, "loss": 0.159942626953125, "step": 269 }, { "epoch": 0.03762279662788267, "grad_norm": 9.091423034667969, "learning_rate": 5.25390625e-06, "loss": 0.16857147216796875, "step": 270 }, { "epoch": 0.037762140319097054, "grad_norm": 23.42886734008789, "learning_rate": 5.2734375e-06, "loss": 0.15615081787109375, "step": 271 }, { "epoch": 0.03790148401031143, "grad_norm": 27.10823631286621, "learning_rate": 5.29296875e-06, "loss": 0.17751312255859375, "step": 272 }, { "epoch": 0.038040827701525816, "grad_norm": 15.833884239196777, "learning_rate": 5.3125e-06, "loss": 0.13651657104492188, "step": 273 }, { "epoch": 0.03818017139274019, "grad_norm": 5.514795780181885, "learning_rate": 5.3320312500000004e-06, "loss": 0.15313339233398438, "step": 274 }, { "epoch": 0.03831951508395457, "grad_norm": 14.085675239562988, "learning_rate": 5.3515625000000005e-06, "loss": 0.15729141235351562, "step": 275 }, { "epoch": 0.038458858775168955, "grad_norm": 3.1260826587677, "learning_rate": 5.3710937500000005e-06, "loss": 0.11077499389648438, "step": 276 }, { "epoch": 0.03859820246638333, "grad_norm": 6.795664310455322, "learning_rate": 5.390625000000001e-06, "loss": 0.16930389404296875, "step": 277 }, { "epoch": 0.03873754615759772, "grad_norm": 21.760046005249023, "learning_rate": 5.410156250000001e-06, "loss": 0.16713333129882812, "step": 278 }, { "epoch": 0.038876889848812095, "grad_norm": 23.410438537597656, "learning_rate": 5.429687500000001e-06, "loss": 0.1700286865234375, "step": 279 }, { "epoch": 0.03901623354002647, "grad_norm": 8.62121295928955, "learning_rate": 5.449218750000001e-06, "loss": 0.13473892211914062, "step": 280 }, { "epoch": 0.03915557723124086, "grad_norm": 14.364507675170898, "learning_rate": 5.468750000000001e-06, "loss": 0.17862319946289062, "step": 281 }, { "epoch": 0.039294920922455234, "grad_norm": 10.962474822998047, "learning_rate": 5.488281250000001e-06, "loss": 0.1347827911376953, "step": 282 }, { "epoch": 0.03943426461366962, "grad_norm": 11.488435745239258, "learning_rate": 5.5078125e-06, "loss": 0.16532135009765625, "step": 283 }, { "epoch": 0.039573608304883996, "grad_norm": 16.36956024169922, "learning_rate": 5.52734375e-06, "loss": 0.21630477905273438, "step": 284 }, { "epoch": 0.039712951996098374, "grad_norm": 13.905577659606934, "learning_rate": 5.546875e-06, "loss": 0.15787315368652344, "step": 285 }, { "epoch": 0.03985229568731276, "grad_norm": 3.8835666179656982, "learning_rate": 5.56640625e-06, "loss": 0.14754104614257812, "step": 286 }, { "epoch": 0.039991639378527136, "grad_norm": 3.531787395477295, "learning_rate": 5.5859375e-06, "loss": 0.1446380615234375, "step": 287 }, { "epoch": 0.04013098306974152, "grad_norm": 3.16595458984375, "learning_rate": 5.60546875e-06, "loss": 0.12882232666015625, "step": 288 }, { "epoch": 0.0402703267609559, "grad_norm": 5.779922962188721, "learning_rate": 5.625e-06, "loss": 0.1298198699951172, "step": 289 }, { "epoch": 0.040409670452170275, "grad_norm": 6.994025230407715, "learning_rate": 5.64453125e-06, "loss": 0.16271209716796875, "step": 290 }, { "epoch": 0.04054901414338466, "grad_norm": 5.753183364868164, "learning_rate": 5.6640625000000005e-06, "loss": 0.1735687255859375, "step": 291 }, { "epoch": 0.04068835783459904, "grad_norm": 6.453915596008301, "learning_rate": 5.6835937500000005e-06, "loss": 0.17578506469726562, "step": 292 }, { "epoch": 0.04082770152581342, "grad_norm": 23.33189582824707, "learning_rate": 5.7031250000000006e-06, "loss": 0.20135879516601562, "step": 293 }, { "epoch": 0.0409670452170278, "grad_norm": 8.047999382019043, "learning_rate": 5.722656250000001e-06, "loss": 0.15416717529296875, "step": 294 }, { "epoch": 0.04110638890824218, "grad_norm": 15.640710830688477, "learning_rate": 5.742187500000001e-06, "loss": 0.17469215393066406, "step": 295 }, { "epoch": 0.04124573259945656, "grad_norm": 20.259214401245117, "learning_rate": 5.761718750000001e-06, "loss": 0.18639183044433594, "step": 296 }, { "epoch": 0.04138507629067094, "grad_norm": 18.27798843383789, "learning_rate": 5.781250000000001e-06, "loss": 0.1436920166015625, "step": 297 }, { "epoch": 0.04152441998188532, "grad_norm": 6.387821674346924, "learning_rate": 5.800781250000001e-06, "loss": 0.11783218383789062, "step": 298 }, { "epoch": 0.0416637636730997, "grad_norm": 4.6905035972595215, "learning_rate": 5.820312500000001e-06, "loss": 0.11679267883300781, "step": 299 }, { "epoch": 0.04180310736431408, "grad_norm": 16.780841827392578, "learning_rate": 5.83984375e-06, "loss": 0.18936920166015625, "step": 300 }, { "epoch": 0.04194245105552846, "grad_norm": 12.191939353942871, "learning_rate": 5.859375e-06, "loss": 0.199371337890625, "step": 301 }, { "epoch": 0.04208179474674284, "grad_norm": 17.601823806762695, "learning_rate": 5.87890625e-06, "loss": 0.1762847900390625, "step": 302 }, { "epoch": 0.042221138437957224, "grad_norm": 16.153066635131836, "learning_rate": 5.8984375e-06, "loss": 0.14146804809570312, "step": 303 }, { "epoch": 0.0423604821291716, "grad_norm": 7.551718711853027, "learning_rate": 5.91796875e-06, "loss": 0.1886749267578125, "step": 304 }, { "epoch": 0.04249982582038598, "grad_norm": 13.354232788085938, "learning_rate": 5.9375e-06, "loss": 0.1639404296875, "step": 305 }, { "epoch": 0.042639169511600364, "grad_norm": 18.16570472717285, "learning_rate": 5.95703125e-06, "loss": 0.19491195678710938, "step": 306 }, { "epoch": 0.04277851320281474, "grad_norm": 5.649717807769775, "learning_rate": 5.9765625000000004e-06, "loss": 0.17539596557617188, "step": 307 }, { "epoch": 0.042917856894029126, "grad_norm": 11.974588394165039, "learning_rate": 5.9960937500000005e-06, "loss": 0.1301727294921875, "step": 308 }, { "epoch": 0.0430572005852435, "grad_norm": 6.784336090087891, "learning_rate": 6.0156250000000005e-06, "loss": 0.08415985107421875, "step": 309 }, { "epoch": 0.04319654427645788, "grad_norm": 7.089005947113037, "learning_rate": 6.035156250000001e-06, "loss": 0.15939712524414062, "step": 310 }, { "epoch": 0.043335887967672265, "grad_norm": 11.734260559082031, "learning_rate": 6.054687500000001e-06, "loss": 0.1483306884765625, "step": 311 }, { "epoch": 0.04347523165888664, "grad_norm": 12.053906440734863, "learning_rate": 6.074218750000001e-06, "loss": 0.18137741088867188, "step": 312 }, { "epoch": 0.04361457535010103, "grad_norm": 4.816310882568359, "learning_rate": 6.093750000000001e-06, "loss": 0.12856674194335938, "step": 313 }, { "epoch": 0.043753919041315405, "grad_norm": 13.388944625854492, "learning_rate": 6.113281250000001e-06, "loss": 0.14677047729492188, "step": 314 }, { "epoch": 0.04389326273252978, "grad_norm": 3.81319522857666, "learning_rate": 6.132812500000001e-06, "loss": 0.12343597412109375, "step": 315 }, { "epoch": 0.04403260642374417, "grad_norm": 3.7697436809539795, "learning_rate": 6.152343750000001e-06, "loss": 0.135894775390625, "step": 316 }, { "epoch": 0.044171950114958544, "grad_norm": 9.140280723571777, "learning_rate": 6.171875e-06, "loss": 0.1689128875732422, "step": 317 }, { "epoch": 0.04431129380617293, "grad_norm": 8.38207721710205, "learning_rate": 6.19140625e-06, "loss": 0.13138771057128906, "step": 318 }, { "epoch": 0.044450637497387306, "grad_norm": 4.0850019454956055, "learning_rate": 6.2109375e-06, "loss": 0.13132858276367188, "step": 319 }, { "epoch": 0.044589981188601684, "grad_norm": 4.770200252532959, "learning_rate": 6.23046875e-06, "loss": 0.1194915771484375, "step": 320 }, { "epoch": 0.04472932487981607, "grad_norm": 3.041670322418213, "learning_rate": 6.25e-06, "loss": 0.11346626281738281, "step": 321 }, { "epoch": 0.044868668571030446, "grad_norm": 5.085007667541504, "learning_rate": 6.26953125e-06, "loss": 0.1211700439453125, "step": 322 }, { "epoch": 0.04500801226224483, "grad_norm": 17.25979232788086, "learning_rate": 6.2890625e-06, "loss": 0.14879989624023438, "step": 323 }, { "epoch": 0.04514735595345921, "grad_norm": 6.613240718841553, "learning_rate": 6.3085937500000005e-06, "loss": 0.11127853393554688, "step": 324 }, { "epoch": 0.045286699644673585, "grad_norm": 17.149194717407227, "learning_rate": 6.3281250000000005e-06, "loss": 0.1532115936279297, "step": 325 }, { "epoch": 0.04542604333588797, "grad_norm": 22.35675048828125, "learning_rate": 6.3476562500000006e-06, "loss": 0.17509078979492188, "step": 326 }, { "epoch": 0.04556538702710235, "grad_norm": 7.268908500671387, "learning_rate": 6.367187500000001e-06, "loss": 0.1220703125, "step": 327 }, { "epoch": 0.04570473071831673, "grad_norm": 7.406549453735352, "learning_rate": 6.386718750000001e-06, "loss": 0.1507720947265625, "step": 328 }, { "epoch": 0.04584407440953111, "grad_norm": 5.767739772796631, "learning_rate": 6.406250000000001e-06, "loss": 0.14178466796875, "step": 329 }, { "epoch": 0.045983418100745486, "grad_norm": 10.598320007324219, "learning_rate": 6.425781250000001e-06, "loss": 0.16968154907226562, "step": 330 }, { "epoch": 0.04612276179195987, "grad_norm": 15.640664100646973, "learning_rate": 6.445312500000001e-06, "loss": 0.1546478271484375, "step": 331 }, { "epoch": 0.04626210548317425, "grad_norm": 8.036147117614746, "learning_rate": 6.464843750000001e-06, "loss": 0.1279449462890625, "step": 332 }, { "epoch": 0.04640144917438863, "grad_norm": 4.867136478424072, "learning_rate": 6.484375000000001e-06, "loss": 0.11184310913085938, "step": 333 }, { "epoch": 0.04654079286560301, "grad_norm": 5.279112339019775, "learning_rate": 6.50390625e-06, "loss": 0.1315441131591797, "step": 334 }, { "epoch": 0.04668013655681739, "grad_norm": 20.22799301147461, "learning_rate": 6.5234375e-06, "loss": 0.15189743041992188, "step": 335 }, { "epoch": 0.04681948024803177, "grad_norm": 13.200963973999023, "learning_rate": 6.54296875e-06, "loss": 0.1396026611328125, "step": 336 }, { "epoch": 0.04695882393924615, "grad_norm": 5.872977256774902, "learning_rate": 6.5625e-06, "loss": 0.1094512939453125, "step": 337 }, { "epoch": 0.047098167630460534, "grad_norm": 9.333824157714844, "learning_rate": 6.58203125e-06, "loss": 0.12036895751953125, "step": 338 }, { "epoch": 0.04723751132167491, "grad_norm": 7.6798272132873535, "learning_rate": 6.6015625e-06, "loss": 0.11874961853027344, "step": 339 }, { "epoch": 0.04737685501288929, "grad_norm": 10.81562614440918, "learning_rate": 6.6210937500000004e-06, "loss": 0.12563323974609375, "step": 340 }, { "epoch": 0.047516198704103674, "grad_norm": 3.767956256866455, "learning_rate": 6.6406250000000005e-06, "loss": 0.1338348388671875, "step": 341 }, { "epoch": 0.04765554239531805, "grad_norm": 9.248411178588867, "learning_rate": 6.6601562500000005e-06, "loss": 0.1604156494140625, "step": 342 }, { "epoch": 0.047794886086532436, "grad_norm": 4.164698123931885, "learning_rate": 6.679687500000001e-06, "loss": 0.125762939453125, "step": 343 }, { "epoch": 0.04793422977774681, "grad_norm": 5.212998867034912, "learning_rate": 6.699218750000001e-06, "loss": 0.1324310302734375, "step": 344 }, { "epoch": 0.04807357346896119, "grad_norm": 12.63713550567627, "learning_rate": 6.718750000000001e-06, "loss": 0.11566734313964844, "step": 345 }, { "epoch": 0.048212917160175575, "grad_norm": 11.38140869140625, "learning_rate": 6.738281250000001e-06, "loss": 0.10749053955078125, "step": 346 }, { "epoch": 0.04835226085138995, "grad_norm": 5.455295562744141, "learning_rate": 6.757812500000001e-06, "loss": 0.17278671264648438, "step": 347 }, { "epoch": 0.04849160454260433, "grad_norm": 4.361754417419434, "learning_rate": 6.777343750000001e-06, "loss": 0.12764358520507812, "step": 348 }, { "epoch": 0.048630948233818715, "grad_norm": 7.216391563415527, "learning_rate": 6.796875000000001e-06, "loss": 0.16329383850097656, "step": 349 }, { "epoch": 0.04877029192503309, "grad_norm": 9.436443328857422, "learning_rate": 6.816406250000001e-06, "loss": 0.11617279052734375, "step": 350 }, { "epoch": 0.04890963561624748, "grad_norm": 3.2413413524627686, "learning_rate": 6.8359375e-06, "loss": 0.11961746215820312, "step": 351 }, { "epoch": 0.049048979307461854, "grad_norm": 7.151019096374512, "learning_rate": 6.85546875e-06, "loss": 0.10349655151367188, "step": 352 }, { "epoch": 0.04918832299867623, "grad_norm": 3.1572275161743164, "learning_rate": 6.875e-06, "loss": 0.09917259216308594, "step": 353 }, { "epoch": 0.049327666689890616, "grad_norm": 27.449222564697266, "learning_rate": 6.89453125e-06, "loss": 0.1640758514404297, "step": 354 }, { "epoch": 0.049467010381104994, "grad_norm": 31.594717025756836, "learning_rate": 6.9140625e-06, "loss": 0.1423816680908203, "step": 355 }, { "epoch": 0.04960635407231938, "grad_norm": 16.414283752441406, "learning_rate": 6.93359375e-06, "loss": 0.173248291015625, "step": 356 }, { "epoch": 0.049745697763533755, "grad_norm": 17.32240104675293, "learning_rate": 6.9531250000000004e-06, "loss": 0.17392349243164062, "step": 357 }, { "epoch": 0.04988504145474813, "grad_norm": 11.572318077087402, "learning_rate": 6.9726562500000005e-06, "loss": 0.1385784149169922, "step": 358 }, { "epoch": 0.05002438514596252, "grad_norm": 13.5459566116333, "learning_rate": 6.9921875000000006e-06, "loss": 0.12563705444335938, "step": 359 }, { "epoch": 0.050163728837176895, "grad_norm": 6.018731594085693, "learning_rate": 7.011718750000001e-06, "loss": 0.15261077880859375, "step": 360 }, { "epoch": 0.05030307252839128, "grad_norm": 13.522477149963379, "learning_rate": 7.031250000000001e-06, "loss": 0.1319255828857422, "step": 361 }, { "epoch": 0.05044241621960566, "grad_norm": 17.376407623291016, "learning_rate": 7.050781250000001e-06, "loss": 0.14013099670410156, "step": 362 }, { "epoch": 0.050581759910820034, "grad_norm": 26.152128219604492, "learning_rate": 7.070312500000001e-06, "loss": 0.1705322265625, "step": 363 }, { "epoch": 0.05072110360203442, "grad_norm": 23.52955436706543, "learning_rate": 7.089843750000001e-06, "loss": 0.15259552001953125, "step": 364 }, { "epoch": 0.050860447293248796, "grad_norm": 4.0682854652404785, "learning_rate": 7.109375000000001e-06, "loss": 0.10903549194335938, "step": 365 }, { "epoch": 0.05099979098446318, "grad_norm": 21.361454010009766, "learning_rate": 7.128906250000001e-06, "loss": 0.14567184448242188, "step": 366 }, { "epoch": 0.05113913467567756, "grad_norm": 21.228822708129883, "learning_rate": 7.148437500000001e-06, "loss": 0.1343364715576172, "step": 367 }, { "epoch": 0.051278478366891936, "grad_norm": 3.8869946002960205, "learning_rate": 7.16796875e-06, "loss": 0.1420764923095703, "step": 368 }, { "epoch": 0.05141782205810632, "grad_norm": 23.418996810913086, "learning_rate": 7.1875e-06, "loss": 0.18169403076171875, "step": 369 }, { "epoch": 0.0515571657493207, "grad_norm": 24.128589630126953, "learning_rate": 7.20703125e-06, "loss": 0.1653594970703125, "step": 370 }, { "epoch": 0.05169650944053508, "grad_norm": 9.271363258361816, "learning_rate": 7.2265625e-06, "loss": 0.16015625, "step": 371 }, { "epoch": 0.05183585313174946, "grad_norm": 11.875570297241211, "learning_rate": 7.24609375e-06, "loss": 0.10261917114257812, "step": 372 }, { "epoch": 0.05197519682296384, "grad_norm": 50.75835418701172, "learning_rate": 7.265625e-06, "loss": 0.18976211547851562, "step": 373 }, { "epoch": 0.05211454051417822, "grad_norm": 35.40769577026367, "learning_rate": 7.2851562500000005e-06, "loss": 0.14932632446289062, "step": 374 }, { "epoch": 0.0522538842053926, "grad_norm": 8.691764831542969, "learning_rate": 7.3046875000000005e-06, "loss": 0.13496780395507812, "step": 375 }, { "epoch": 0.052393227896606984, "grad_norm": 5.712652683258057, "learning_rate": 7.3242187500000006e-06, "loss": 0.16598892211914062, "step": 376 }, { "epoch": 0.05253257158782136, "grad_norm": 3.2809722423553467, "learning_rate": 7.343750000000001e-06, "loss": 0.12497520446777344, "step": 377 }, { "epoch": 0.05267191527903574, "grad_norm": 2.9415817260742188, "learning_rate": 7.363281250000001e-06, "loss": 0.13199615478515625, "step": 378 }, { "epoch": 0.05281125897025012, "grad_norm": 17.679826736450195, "learning_rate": 7.382812500000001e-06, "loss": 0.1691875457763672, "step": 379 }, { "epoch": 0.0529506026614645, "grad_norm": 4.21730899810791, "learning_rate": 7.402343750000001e-06, "loss": 0.13083839416503906, "step": 380 }, { "epoch": 0.053089946352678885, "grad_norm": 2.387911319732666, "learning_rate": 7.421875000000001e-06, "loss": 0.11188888549804688, "step": 381 }, { "epoch": 0.05322929004389326, "grad_norm": 4.009186744689941, "learning_rate": 7.441406250000001e-06, "loss": 0.13056182861328125, "step": 382 }, { "epoch": 0.05336863373510764, "grad_norm": 3.947474479675293, "learning_rate": 7.460937500000001e-06, "loss": 0.11488914489746094, "step": 383 }, { "epoch": 0.053507977426322025, "grad_norm": 4.933940887451172, "learning_rate": 7.480468750000001e-06, "loss": 0.11656951904296875, "step": 384 }, { "epoch": 0.0536473211175364, "grad_norm": 4.959610462188721, "learning_rate": 7.500000000000001e-06, "loss": 0.11410331726074219, "step": 385 }, { "epoch": 0.053786664808750786, "grad_norm": 6.914300441741943, "learning_rate": 7.51953125e-06, "loss": 0.15717506408691406, "step": 386 }, { "epoch": 0.053926008499965164, "grad_norm": 9.718318939208984, "learning_rate": 7.5390625e-06, "loss": 0.11223030090332031, "step": 387 }, { "epoch": 0.05406535219117954, "grad_norm": 22.949628829956055, "learning_rate": 7.55859375e-06, "loss": 0.17186355590820312, "step": 388 }, { "epoch": 0.054204695882393926, "grad_norm": 14.418889999389648, "learning_rate": 7.578125e-06, "loss": 0.12104034423828125, "step": 389 }, { "epoch": 0.0543440395736083, "grad_norm": 9.907075881958008, "learning_rate": 7.5976562500000004e-06, "loss": 0.1267871856689453, "step": 390 }, { "epoch": 0.05448338326482269, "grad_norm": 2.3994088172912598, "learning_rate": 7.6171875000000005e-06, "loss": 0.1160125732421875, "step": 391 }, { "epoch": 0.054622726956037065, "grad_norm": 7.6671576499938965, "learning_rate": 7.63671875e-06, "loss": 0.1330108642578125, "step": 392 }, { "epoch": 0.05476207064725144, "grad_norm": 3.7342159748077393, "learning_rate": 7.656250000000001e-06, "loss": 0.13245582580566406, "step": 393 }, { "epoch": 0.05490141433846583, "grad_norm": 16.18381118774414, "learning_rate": 7.67578125e-06, "loss": 0.1389636993408203, "step": 394 }, { "epoch": 0.055040758029680205, "grad_norm": 18.350284576416016, "learning_rate": 7.6953125e-06, "loss": 0.1558856964111328, "step": 395 }, { "epoch": 0.05518010172089459, "grad_norm": 2.443239450454712, "learning_rate": 7.71484375e-06, "loss": 0.10216903686523438, "step": 396 }, { "epoch": 0.05531944541210897, "grad_norm": 9.202642440795898, "learning_rate": 7.734375e-06, "loss": 0.1598377227783203, "step": 397 }, { "epoch": 0.055458789103323344, "grad_norm": 19.799238204956055, "learning_rate": 7.753906250000001e-06, "loss": 0.15776443481445312, "step": 398 }, { "epoch": 0.05559813279453773, "grad_norm": 6.996795654296875, "learning_rate": 7.7734375e-06, "loss": 0.11672782897949219, "step": 399 }, { "epoch": 0.055737476485752106, "grad_norm": 5.478125095367432, "learning_rate": 7.792968750000001e-06, "loss": 0.09966850280761719, "step": 400 }, { "epoch": 0.05587682017696649, "grad_norm": 15.466421127319336, "learning_rate": 7.8125e-06, "loss": 0.16741180419921875, "step": 401 }, { "epoch": 0.05601616386818087, "grad_norm": 2.855491876602173, "learning_rate": 7.832031250000001e-06, "loss": 0.09989356994628906, "step": 402 }, { "epoch": 0.056155507559395246, "grad_norm": 8.92283821105957, "learning_rate": 7.8515625e-06, "loss": 0.1524372100830078, "step": 403 }, { "epoch": 0.05629485125060963, "grad_norm": 6.378612995147705, "learning_rate": 7.871093750000001e-06, "loss": 0.10200881958007812, "step": 404 }, { "epoch": 0.05643419494182401, "grad_norm": 6.979927062988281, "learning_rate": 7.890625e-06, "loss": 0.1280813217163086, "step": 405 }, { "epoch": 0.05657353863303839, "grad_norm": 7.913893222808838, "learning_rate": 7.910156250000001e-06, "loss": 0.11832427978515625, "step": 406 }, { "epoch": 0.05671288232425277, "grad_norm": 4.105276584625244, "learning_rate": 7.9296875e-06, "loss": 0.13958358764648438, "step": 407 }, { "epoch": 0.05685222601546715, "grad_norm": 3.9391839504241943, "learning_rate": 7.949218750000001e-06, "loss": 0.11365509033203125, "step": 408 }, { "epoch": 0.05699156970668153, "grad_norm": 2.3323938846588135, "learning_rate": 7.96875e-06, "loss": 0.09966468811035156, "step": 409 }, { "epoch": 0.05713091339789591, "grad_norm": 3.498328924179077, "learning_rate": 7.988281250000001e-06, "loss": 0.10223388671875, "step": 410 }, { "epoch": 0.057270257089110294, "grad_norm": 3.3963608741760254, "learning_rate": 8.0078125e-06, "loss": 0.11821746826171875, "step": 411 }, { "epoch": 0.05740960078032467, "grad_norm": 9.489019393920898, "learning_rate": 8.02734375e-06, "loss": 0.12239837646484375, "step": 412 }, { "epoch": 0.05754894447153905, "grad_norm": 5.398226737976074, "learning_rate": 8.046875e-06, "loss": 0.14681625366210938, "step": 413 }, { "epoch": 0.05768828816275343, "grad_norm": 10.474808692932129, "learning_rate": 8.06640625e-06, "loss": 0.12370681762695312, "step": 414 }, { "epoch": 0.05782763185396781, "grad_norm": 10.341968536376953, "learning_rate": 8.085937500000001e-06, "loss": 0.13318634033203125, "step": 415 }, { "epoch": 0.057966975545182195, "grad_norm": 2.4845309257507324, "learning_rate": 8.10546875e-06, "loss": 0.11198043823242188, "step": 416 }, { "epoch": 0.05810631923639657, "grad_norm": 7.077255725860596, "learning_rate": 8.125000000000001e-06, "loss": 0.11313056945800781, "step": 417 }, { "epoch": 0.05824566292761095, "grad_norm": 5.854893684387207, "learning_rate": 8.14453125e-06, "loss": 0.12729644775390625, "step": 418 }, { "epoch": 0.058385006618825334, "grad_norm": 7.183177471160889, "learning_rate": 8.164062500000001e-06, "loss": 0.11773490905761719, "step": 419 }, { "epoch": 0.05852435031003971, "grad_norm": 10.94343376159668, "learning_rate": 8.18359375e-06, "loss": 0.11059188842773438, "step": 420 }, { "epoch": 0.058663694001254096, "grad_norm": 3.26096510887146, "learning_rate": 8.203125000000001e-06, "loss": 0.11528396606445312, "step": 421 }, { "epoch": 0.058803037692468474, "grad_norm": 6.356678485870361, "learning_rate": 8.22265625e-06, "loss": 0.09600639343261719, "step": 422 }, { "epoch": 0.05894238138368285, "grad_norm": 9.848954200744629, "learning_rate": 8.242187500000001e-06, "loss": 0.135040283203125, "step": 423 }, { "epoch": 0.059081725074897236, "grad_norm": 3.618823766708374, "learning_rate": 8.26171875e-06, "loss": 0.13683700561523438, "step": 424 }, { "epoch": 0.05922106876611161, "grad_norm": 7.617012977600098, "learning_rate": 8.281250000000001e-06, "loss": 0.16410446166992188, "step": 425 }, { "epoch": 0.059360412457326, "grad_norm": 5.738245964050293, "learning_rate": 8.30078125e-06, "loss": 0.18029022216796875, "step": 426 }, { "epoch": 0.059499756148540375, "grad_norm": 5.942124843597412, "learning_rate": 8.320312500000001e-06, "loss": 0.12903404235839844, "step": 427 }, { "epoch": 0.05963909983975475, "grad_norm": 7.8353705406188965, "learning_rate": 8.33984375e-06, "loss": 0.1434783935546875, "step": 428 }, { "epoch": 0.05977844353096914, "grad_norm": 6.4422197341918945, "learning_rate": 8.359375e-06, "loss": 0.13741302490234375, "step": 429 }, { "epoch": 0.059917787222183515, "grad_norm": 3.711513042449951, "learning_rate": 8.37890625e-06, "loss": 0.08208560943603516, "step": 430 }, { "epoch": 0.0600571309133979, "grad_norm": 3.132326364517212, "learning_rate": 8.3984375e-06, "loss": 0.10668373107910156, "step": 431 }, { "epoch": 0.06019647460461228, "grad_norm": 2.7884442806243896, "learning_rate": 8.417968750000001e-06, "loss": 0.109100341796875, "step": 432 }, { "epoch": 0.060335818295826654, "grad_norm": 12.568721771240234, "learning_rate": 8.4375e-06, "loss": 0.10981941223144531, "step": 433 }, { "epoch": 0.06047516198704104, "grad_norm": 18.144149780273438, "learning_rate": 8.457031250000001e-06, "loss": 0.15161514282226562, "step": 434 }, { "epoch": 0.060614505678255416, "grad_norm": 7.4828715324401855, "learning_rate": 8.4765625e-06, "loss": 0.10838890075683594, "step": 435 }, { "epoch": 0.0607538493694698, "grad_norm": 2.618628740310669, "learning_rate": 8.496093750000001e-06, "loss": 0.10389328002929688, "step": 436 }, { "epoch": 0.06089319306068418, "grad_norm": 7.747107028961182, "learning_rate": 8.515625e-06, "loss": 0.11337089538574219, "step": 437 }, { "epoch": 0.061032536751898556, "grad_norm": 4.672323226928711, "learning_rate": 8.535156250000001e-06, "loss": 0.10003852844238281, "step": 438 }, { "epoch": 0.06117188044311294, "grad_norm": 7.688747406005859, "learning_rate": 8.5546875e-06, "loss": 0.10384178161621094, "step": 439 }, { "epoch": 0.06131122413432732, "grad_norm": 11.883987426757812, "learning_rate": 8.574218750000001e-06, "loss": 0.14785003662109375, "step": 440 }, { "epoch": 0.0614505678255417, "grad_norm": 5.825545787811279, "learning_rate": 8.59375e-06, "loss": 0.11618995666503906, "step": 441 }, { "epoch": 0.06158991151675608, "grad_norm": 5.729133129119873, "learning_rate": 8.613281250000001e-06, "loss": 0.1250743865966797, "step": 442 }, { "epoch": 0.06172925520797046, "grad_norm": 8.057960510253906, "learning_rate": 8.6328125e-06, "loss": 0.14338302612304688, "step": 443 }, { "epoch": 0.06186859889918484, "grad_norm": 4.9827423095703125, "learning_rate": 8.652343750000002e-06, "loss": 0.14757728576660156, "step": 444 }, { "epoch": 0.06200794259039922, "grad_norm": 4.74862813949585, "learning_rate": 8.671875e-06, "loss": 0.1381378173828125, "step": 445 }, { "epoch": 0.062147286281613597, "grad_norm": 5.475962162017822, "learning_rate": 8.69140625e-06, "loss": 0.1077880859375, "step": 446 }, { "epoch": 0.06228662997282798, "grad_norm": 2.953033208847046, "learning_rate": 8.7109375e-06, "loss": 0.13126182556152344, "step": 447 }, { "epoch": 0.06242597366404236, "grad_norm": 3.404369354248047, "learning_rate": 8.73046875e-06, "loss": 0.10086441040039062, "step": 448 }, { "epoch": 0.06256531735525674, "grad_norm": 2.8300018310546875, "learning_rate": 8.750000000000001e-06, "loss": 0.1105499267578125, "step": 449 }, { "epoch": 0.06270466104647113, "grad_norm": 11.484071731567383, "learning_rate": 8.76953125e-06, "loss": 0.1699676513671875, "step": 450 }, { "epoch": 0.0628440047376855, "grad_norm": 6.368845462799072, "learning_rate": 8.789062500000001e-06, "loss": 0.1310868263244629, "step": 451 }, { "epoch": 0.06298334842889988, "grad_norm": 16.4228458404541, "learning_rate": 8.80859375e-06, "loss": 0.1685318946838379, "step": 452 }, { "epoch": 0.06312269212011426, "grad_norm": 3.3729896545410156, "learning_rate": 8.828125000000001e-06, "loss": 0.08585071563720703, "step": 453 }, { "epoch": 0.06326203581132864, "grad_norm": 4.431811332702637, "learning_rate": 8.84765625e-06, "loss": 0.09554195404052734, "step": 454 }, { "epoch": 0.06340137950254303, "grad_norm": 8.768561363220215, "learning_rate": 8.867187500000001e-06, "loss": 0.102935791015625, "step": 455 }, { "epoch": 0.0635407231937574, "grad_norm": 4.584352493286133, "learning_rate": 8.88671875e-06, "loss": 0.09372138977050781, "step": 456 }, { "epoch": 0.06368006688497178, "grad_norm": 2.4284677505493164, "learning_rate": 8.906250000000001e-06, "loss": 0.109283447265625, "step": 457 }, { "epoch": 0.06381941057618616, "grad_norm": 2.7724809646606445, "learning_rate": 8.92578125e-06, "loss": 0.1288280487060547, "step": 458 }, { "epoch": 0.06395875426740054, "grad_norm": 3.040001153945923, "learning_rate": 8.945312500000001e-06, "loss": 0.11046981811523438, "step": 459 }, { "epoch": 0.06409809795861493, "grad_norm": 9.727893829345703, "learning_rate": 8.96484375e-06, "loss": 0.1340198516845703, "step": 460 }, { "epoch": 0.06423744164982931, "grad_norm": 5.08907413482666, "learning_rate": 8.984375000000002e-06, "loss": 0.13259553909301758, "step": 461 }, { "epoch": 0.06437678534104369, "grad_norm": 4.432329177856445, "learning_rate": 9.00390625e-06, "loss": 0.0964508056640625, "step": 462 }, { "epoch": 0.06451612903225806, "grad_norm": 3.4104249477386475, "learning_rate": 9.0234375e-06, "loss": 0.08536720275878906, "step": 463 }, { "epoch": 0.06465547272347244, "grad_norm": 2.6989359855651855, "learning_rate": 9.042968750000001e-06, "loss": 0.09273147583007812, "step": 464 }, { "epoch": 0.06479481641468683, "grad_norm": 2.4118692874908447, "learning_rate": 9.0625e-06, "loss": 0.08761024475097656, "step": 465 }, { "epoch": 0.06493416010590121, "grad_norm": 3.3979265689849854, "learning_rate": 9.082031250000001e-06, "loss": 0.12953758239746094, "step": 466 }, { "epoch": 0.06507350379711559, "grad_norm": 4.73321008682251, "learning_rate": 9.1015625e-06, "loss": 0.13414764404296875, "step": 467 }, { "epoch": 0.06521284748832996, "grad_norm": 5.537256717681885, "learning_rate": 9.121093750000001e-06, "loss": 0.10346603393554688, "step": 468 }, { "epoch": 0.06535219117954434, "grad_norm": 3.179276943206787, "learning_rate": 9.140625e-06, "loss": 0.11625099182128906, "step": 469 }, { "epoch": 0.06549153487075873, "grad_norm": 7.527850151062012, "learning_rate": 9.160156250000001e-06, "loss": 0.10420703887939453, "step": 470 }, { "epoch": 0.06563087856197311, "grad_norm": 4.370924472808838, "learning_rate": 9.1796875e-06, "loss": 0.11959266662597656, "step": 471 }, { "epoch": 0.06577022225318749, "grad_norm": 4.625570297241211, "learning_rate": 9.199218750000001e-06, "loss": 0.13494873046875, "step": 472 }, { "epoch": 0.06590956594440187, "grad_norm": 9.09106159210205, "learning_rate": 9.21875e-06, "loss": 0.14458465576171875, "step": 473 }, { "epoch": 0.06604890963561624, "grad_norm": 2.7147252559661865, "learning_rate": 9.238281250000001e-06, "loss": 0.10429191589355469, "step": 474 }, { "epoch": 0.06618825332683063, "grad_norm": 12.15719985961914, "learning_rate": 9.2578125e-06, "loss": 0.12290763854980469, "step": 475 }, { "epoch": 0.06632759701804501, "grad_norm": 8.992039680480957, "learning_rate": 9.277343750000001e-06, "loss": 0.11193466186523438, "step": 476 }, { "epoch": 0.06646694070925939, "grad_norm": 9.103899955749512, "learning_rate": 9.296875e-06, "loss": 0.1355609893798828, "step": 477 }, { "epoch": 0.06660628440047377, "grad_norm": 9.120779991149902, "learning_rate": 9.316406250000002e-06, "loss": 0.09985828399658203, "step": 478 }, { "epoch": 0.06674562809168814, "grad_norm": 12.220949172973633, "learning_rate": 9.3359375e-06, "loss": 0.13946533203125, "step": 479 }, { "epoch": 0.06688497178290254, "grad_norm": 5.57617712020874, "learning_rate": 9.35546875e-06, "loss": 0.13969135284423828, "step": 480 }, { "epoch": 0.06702431547411691, "grad_norm": 7.006617069244385, "learning_rate": 9.375000000000001e-06, "loss": 0.12479400634765625, "step": 481 }, { "epoch": 0.06716365916533129, "grad_norm": 4.939764499664307, "learning_rate": 9.39453125e-06, "loss": 0.11557531356811523, "step": 482 }, { "epoch": 0.06730300285654567, "grad_norm": 4.817184925079346, "learning_rate": 9.414062500000001e-06, "loss": 0.136688232421875, "step": 483 }, { "epoch": 0.06744234654776005, "grad_norm": 9.128363609313965, "learning_rate": 9.43359375e-06, "loss": 0.11755561828613281, "step": 484 }, { "epoch": 0.06758169023897444, "grad_norm": 7.698535442352295, "learning_rate": 9.453125000000001e-06, "loss": 0.1313800811767578, "step": 485 }, { "epoch": 0.06772103393018881, "grad_norm": 3.8278017044067383, "learning_rate": 9.47265625e-06, "loss": 0.1163787841796875, "step": 486 }, { "epoch": 0.06786037762140319, "grad_norm": 3.5997111797332764, "learning_rate": 9.492187500000001e-06, "loss": 0.09950828552246094, "step": 487 }, { "epoch": 0.06799972131261757, "grad_norm": 3.734302043914795, "learning_rate": 9.51171875e-06, "loss": 0.10017776489257812, "step": 488 }, { "epoch": 0.06813906500383195, "grad_norm": 1.2702679634094238, "learning_rate": 9.531250000000001e-06, "loss": 0.09468269348144531, "step": 489 }, { "epoch": 0.06827840869504634, "grad_norm": 2.581217050552368, "learning_rate": 9.55078125e-06, "loss": 0.10832786560058594, "step": 490 }, { "epoch": 0.06841775238626072, "grad_norm": 2.5726301670074463, "learning_rate": 9.570312500000001e-06, "loss": 0.11065101623535156, "step": 491 }, { "epoch": 0.0685570960774751, "grad_norm": 1.9561649560928345, "learning_rate": 9.58984375e-06, "loss": 0.11000823974609375, "step": 492 }, { "epoch": 0.06869643976868947, "grad_norm": 2.3419435024261475, "learning_rate": 9.609375000000001e-06, "loss": 0.10045814514160156, "step": 493 }, { "epoch": 0.06883578345990385, "grad_norm": 10.367630958557129, "learning_rate": 9.62890625e-06, "loss": 0.15293502807617188, "step": 494 }, { "epoch": 0.06897512715111823, "grad_norm": 3.146028518676758, "learning_rate": 9.648437500000002e-06, "loss": 0.0829472541809082, "step": 495 }, { "epoch": 0.06911447084233262, "grad_norm": 2.3311386108398438, "learning_rate": 9.66796875e-06, "loss": 0.08582878112792969, "step": 496 }, { "epoch": 0.069253814533547, "grad_norm": 1.314138650894165, "learning_rate": 9.6875e-06, "loss": 0.0747365951538086, "step": 497 }, { "epoch": 0.06939315822476137, "grad_norm": 4.106570720672607, "learning_rate": 9.707031250000001e-06, "loss": 0.10406684875488281, "step": 498 }, { "epoch": 0.06953250191597575, "grad_norm": 3.477018356323242, "learning_rate": 9.7265625e-06, "loss": 0.12084388732910156, "step": 499 }, { "epoch": 0.06967184560719013, "grad_norm": 3.087303876876831, "learning_rate": 9.746093750000001e-06, "loss": 0.1273670196533203, "step": 500 }, { "epoch": 0.06981118929840452, "grad_norm": 4.667466163635254, "learning_rate": 9.765625e-06, "loss": 0.11314201354980469, "step": 501 }, { "epoch": 0.0699505329896189, "grad_norm": 2.9917664527893066, "learning_rate": 9.785156250000001e-06, "loss": 0.1376190185546875, "step": 502 }, { "epoch": 0.07008987668083327, "grad_norm": 2.6732401847839355, "learning_rate": 9.8046875e-06, "loss": 0.10599899291992188, "step": 503 }, { "epoch": 0.07022922037204765, "grad_norm": 1.9942916631698608, "learning_rate": 9.824218750000001e-06, "loss": 0.10249042510986328, "step": 504 }, { "epoch": 0.07036856406326203, "grad_norm": 11.294593811035156, "learning_rate": 9.84375e-06, "loss": 0.13546180725097656, "step": 505 }, { "epoch": 0.07050790775447642, "grad_norm": 12.13893985748291, "learning_rate": 9.863281250000001e-06, "loss": 0.12302589416503906, "step": 506 }, { "epoch": 0.0706472514456908, "grad_norm": 4.009533405303955, "learning_rate": 9.8828125e-06, "loss": 0.10701370239257812, "step": 507 }, { "epoch": 0.07078659513690518, "grad_norm": 8.31468391418457, "learning_rate": 9.902343750000001e-06, "loss": 0.12748336791992188, "step": 508 }, { "epoch": 0.07092593882811955, "grad_norm": 6.132339000701904, "learning_rate": 9.921875e-06, "loss": 0.08867597579956055, "step": 509 }, { "epoch": 0.07106528251933393, "grad_norm": 5.8179121017456055, "learning_rate": 9.941406250000002e-06, "loss": 0.10871124267578125, "step": 510 }, { "epoch": 0.07120462621054832, "grad_norm": 12.321744918823242, "learning_rate": 9.9609375e-06, "loss": 0.13195037841796875, "step": 511 }, { "epoch": 0.0713439699017627, "grad_norm": 9.185007095336914, "learning_rate": 9.980468750000002e-06, "loss": 0.12718582153320312, "step": 512 }, { "epoch": 0.07148331359297708, "grad_norm": 3.411240577697754, "learning_rate": 1e-05, "loss": 0.11948204040527344, "step": 513 }, { "epoch": 0.07162265728419145, "grad_norm": 3.1739957332611084, "learning_rate": 9.999999444557077e-06, "loss": 0.09686279296875, "step": 514 }, { "epoch": 0.07176200097540583, "grad_norm": 3.137723207473755, "learning_rate": 9.999997778228428e-06, "loss": 0.1110382080078125, "step": 515 }, { "epoch": 0.07190134466662022, "grad_norm": 7.657570838928223, "learning_rate": 9.999995001014424e-06, "loss": 0.12385272979736328, "step": 516 }, { "epoch": 0.0720406883578346, "grad_norm": 7.167331695556641, "learning_rate": 9.999991112915685e-06, "loss": 0.12115287780761719, "step": 517 }, { "epoch": 0.07218003204904898, "grad_norm": 10.727917671203613, "learning_rate": 9.999986113933071e-06, "loss": 0.16209983825683594, "step": 518 }, { "epoch": 0.07231937574026336, "grad_norm": 2.867664337158203, "learning_rate": 9.999980004067694e-06, "loss": 0.13766860961914062, "step": 519 }, { "epoch": 0.07245871943147773, "grad_norm": 9.698603630065918, "learning_rate": 9.99997278332091e-06, "loss": 0.12203598022460938, "step": 520 }, { "epoch": 0.07259806312269212, "grad_norm": 10.597928047180176, "learning_rate": 9.999964451694328e-06, "loss": 0.1389312744140625, "step": 521 }, { "epoch": 0.0727374068139065, "grad_norm": 4.540619850158691, "learning_rate": 9.999955009189795e-06, "loss": 0.10507583618164062, "step": 522 }, { "epoch": 0.07287675050512088, "grad_norm": 4.16182279586792, "learning_rate": 9.999944455809408e-06, "loss": 0.14247703552246094, "step": 523 }, { "epoch": 0.07301609419633526, "grad_norm": 2.4326012134552, "learning_rate": 9.999932791555516e-06, "loss": 0.10679054260253906, "step": 524 }, { "epoch": 0.07315543788754963, "grad_norm": 3.483944892883301, "learning_rate": 9.999920016430706e-06, "loss": 0.1350250244140625, "step": 525 }, { "epoch": 0.07329478157876403, "grad_norm": 3.145601987838745, "learning_rate": 9.99990613043782e-06, "loss": 0.14048385620117188, "step": 526 }, { "epoch": 0.0734341252699784, "grad_norm": 2.7989754676818848, "learning_rate": 9.999891133579941e-06, "loss": 0.10354042053222656, "step": 527 }, { "epoch": 0.07357346896119278, "grad_norm": 1.6842589378356934, "learning_rate": 9.999875025860401e-06, "loss": 0.0895843505859375, "step": 528 }, { "epoch": 0.07371281265240716, "grad_norm": 8.061688423156738, "learning_rate": 9.99985780728278e-06, "loss": 0.16471481323242188, "step": 529 }, { "epoch": 0.07385215634362154, "grad_norm": 6.634884357452393, "learning_rate": 9.999839477850903e-06, "loss": 0.119293212890625, "step": 530 }, { "epoch": 0.07399150003483593, "grad_norm": 2.4588654041290283, "learning_rate": 9.999820037568844e-06, "loss": 0.10683345794677734, "step": 531 }, { "epoch": 0.0741308437260503, "grad_norm": 4.102080345153809, "learning_rate": 9.999799486440917e-06, "loss": 0.12251853942871094, "step": 532 }, { "epoch": 0.07427018741726468, "grad_norm": 2.6940746307373047, "learning_rate": 9.999777824471694e-06, "loss": 0.09251594543457031, "step": 533 }, { "epoch": 0.07440953110847906, "grad_norm": 4.40526008605957, "learning_rate": 9.999755051665985e-06, "loss": 0.094085693359375, "step": 534 }, { "epoch": 0.07454887479969344, "grad_norm": 6.812039852142334, "learning_rate": 9.99973116802885e-06, "loss": 0.10603523254394531, "step": 535 }, { "epoch": 0.07468821849090783, "grad_norm": 1.7236942052841187, "learning_rate": 9.999706173565594e-06, "loss": 0.09881210327148438, "step": 536 }, { "epoch": 0.0748275621821222, "grad_norm": 5.690102577209473, "learning_rate": 9.999680068281773e-06, "loss": 0.11844062805175781, "step": 537 }, { "epoch": 0.07496690587333658, "grad_norm": 8.261590003967285, "learning_rate": 9.999652852183184e-06, "loss": 0.13910293579101562, "step": 538 }, { "epoch": 0.07510624956455096, "grad_norm": 4.19952917098999, "learning_rate": 9.999624525275875e-06, "loss": 0.12147140502929688, "step": 539 }, { "epoch": 0.07524559325576534, "grad_norm": 8.107975959777832, "learning_rate": 9.99959508756614e-06, "loss": 0.12774276733398438, "step": 540 }, { "epoch": 0.07538493694697973, "grad_norm": 3.248178482055664, "learning_rate": 9.99956453906052e-06, "loss": 0.09564590454101562, "step": 541 }, { "epoch": 0.07552428063819411, "grad_norm": 4.747007369995117, "learning_rate": 9.999532879765801e-06, "loss": 0.12437248229980469, "step": 542 }, { "epoch": 0.07566362432940849, "grad_norm": 1.875176191329956, "learning_rate": 9.999500109689018e-06, "loss": 0.11395454406738281, "step": 543 }, { "epoch": 0.07580296802062286, "grad_norm": 4.812331199645996, "learning_rate": 9.999466228837452e-06, "loss": 0.09535789489746094, "step": 544 }, { "epoch": 0.07594231171183724, "grad_norm": 8.641239166259766, "learning_rate": 9.999431237218629e-06, "loss": 0.12883758544921875, "step": 545 }, { "epoch": 0.07608165540305163, "grad_norm": 9.561633110046387, "learning_rate": 9.999395134840323e-06, "loss": 0.14813232421875, "step": 546 }, { "epoch": 0.07622099909426601, "grad_norm": 7.926206588745117, "learning_rate": 9.999357921710557e-06, "loss": 0.09956932067871094, "step": 547 }, { "epoch": 0.07636034278548039, "grad_norm": 9.948467254638672, "learning_rate": 9.999319597837599e-06, "loss": 0.13651466369628906, "step": 548 }, { "epoch": 0.07649968647669476, "grad_norm": 11.142829895019531, "learning_rate": 9.99928016322996e-06, "loss": 0.11629104614257812, "step": 549 }, { "epoch": 0.07663903016790914, "grad_norm": 3.0710973739624023, "learning_rate": 9.999239617896406e-06, "loss": 0.1387195587158203, "step": 550 }, { "epoch": 0.07677837385912353, "grad_norm": 10.35086441040039, "learning_rate": 9.999197961845943e-06, "loss": 0.12053298950195312, "step": 551 }, { "epoch": 0.07691771755033791, "grad_norm": 6.159016132354736, "learning_rate": 9.999155195087826e-06, "loss": 0.11011886596679688, "step": 552 }, { "epoch": 0.07705706124155229, "grad_norm": 4.572666168212891, "learning_rate": 9.999111317631559e-06, "loss": 0.12804317474365234, "step": 553 }, { "epoch": 0.07719640493276667, "grad_norm": 4.164255619049072, "learning_rate": 9.999066329486888e-06, "loss": 0.13587379455566406, "step": 554 }, { "epoch": 0.07733574862398104, "grad_norm": 2.6403815746307373, "learning_rate": 9.999020230663809e-06, "loss": 0.0854635238647461, "step": 555 }, { "epoch": 0.07747509231519543, "grad_norm": 1.9019668102264404, "learning_rate": 9.998973021172564e-06, "loss": 0.10347175598144531, "step": 556 }, { "epoch": 0.07761443600640981, "grad_norm": 1.545079231262207, "learning_rate": 9.998924701023645e-06, "loss": 0.12252235412597656, "step": 557 }, { "epoch": 0.07775377969762419, "grad_norm": 2.355318307876587, "learning_rate": 9.998875270227781e-06, "loss": 0.11280632019042969, "step": 558 }, { "epoch": 0.07789312338883857, "grad_norm": 1.7653857469558716, "learning_rate": 9.99882472879596e-06, "loss": 0.10586166381835938, "step": 559 }, { "epoch": 0.07803246708005294, "grad_norm": 5.28569221496582, "learning_rate": 9.998773076739409e-06, "loss": 0.13096046447753906, "step": 560 }, { "epoch": 0.07817181077126734, "grad_norm": 2.731588363647461, "learning_rate": 9.998720314069606e-06, "loss": 0.0926656723022461, "step": 561 }, { "epoch": 0.07831115446248171, "grad_norm": 7.169831275939941, "learning_rate": 9.99866644079827e-06, "loss": 0.140594482421875, "step": 562 }, { "epoch": 0.07845049815369609, "grad_norm": 6.222179889678955, "learning_rate": 9.998611456937373e-06, "loss": 0.12844276428222656, "step": 563 }, { "epoch": 0.07858984184491047, "grad_norm": 3.1984126567840576, "learning_rate": 9.99855536249913e-06, "loss": 0.15152931213378906, "step": 564 }, { "epoch": 0.07872918553612485, "grad_norm": 2.4286036491394043, "learning_rate": 9.998498157496004e-06, "loss": 0.11175346374511719, "step": 565 }, { "epoch": 0.07886852922733924, "grad_norm": 1.62360680103302, "learning_rate": 9.998439841940706e-06, "loss": 0.09375762939453125, "step": 566 }, { "epoch": 0.07900787291855361, "grad_norm": 3.631646156311035, "learning_rate": 9.998380415846191e-06, "loss": 0.10641288757324219, "step": 567 }, { "epoch": 0.07914721660976799, "grad_norm": 8.570578575134277, "learning_rate": 9.998319879225662e-06, "loss": 0.16476821899414062, "step": 568 }, { "epoch": 0.07928656030098237, "grad_norm": 3.012768268585205, "learning_rate": 9.998258232092571e-06, "loss": 0.13264083862304688, "step": 569 }, { "epoch": 0.07942590399219675, "grad_norm": 6.910480499267578, "learning_rate": 9.998195474460613e-06, "loss": 0.11327743530273438, "step": 570 }, { "epoch": 0.07956524768341114, "grad_norm": 3.627100944519043, "learning_rate": 9.998131606343729e-06, "loss": 0.1319427490234375, "step": 571 }, { "epoch": 0.07970459137462552, "grad_norm": 3.1204333305358887, "learning_rate": 9.998066627756113e-06, "loss": 0.1310405731201172, "step": 572 }, { "epoch": 0.0798439350658399, "grad_norm": 4.16229248046875, "learning_rate": 9.9980005387122e-06, "loss": 0.11428260803222656, "step": 573 }, { "epoch": 0.07998327875705427, "grad_norm": 2.1048569679260254, "learning_rate": 9.997933339226675e-06, "loss": 0.12359237670898438, "step": 574 }, { "epoch": 0.08012262244826865, "grad_norm": 2.1653833389282227, "learning_rate": 9.997865029314464e-06, "loss": 0.13133621215820312, "step": 575 }, { "epoch": 0.08026196613948304, "grad_norm": 4.95909309387207, "learning_rate": 9.997795608990749e-06, "loss": 0.15745162963867188, "step": 576 }, { "epoch": 0.08040130983069742, "grad_norm": 1.1377111673355103, "learning_rate": 9.99772507827095e-06, "loss": 0.10693740844726562, "step": 577 }, { "epoch": 0.0805406535219118, "grad_norm": 2.5671260356903076, "learning_rate": 9.997653437170739e-06, "loss": 0.12531089782714844, "step": 578 }, { "epoch": 0.08067999721312617, "grad_norm": 1.343429446220398, "learning_rate": 9.997580685706032e-06, "loss": 0.09321022033691406, "step": 579 }, { "epoch": 0.08081934090434055, "grad_norm": 2.8364076614379883, "learning_rate": 9.997506823892993e-06, "loss": 0.1119847297668457, "step": 580 }, { "epoch": 0.08095868459555494, "grad_norm": 3.4614803791046143, "learning_rate": 9.997431851748034e-06, "loss": 0.11912918090820312, "step": 581 }, { "epoch": 0.08109802828676932, "grad_norm": 1.4463012218475342, "learning_rate": 9.99735576928781e-06, "loss": 0.10640239715576172, "step": 582 }, { "epoch": 0.0812373719779837, "grad_norm": 8.015786170959473, "learning_rate": 9.997278576529228e-06, "loss": 0.1288299560546875, "step": 583 }, { "epoch": 0.08137671566919807, "grad_norm": 1.561755657196045, "learning_rate": 9.997200273489434e-06, "loss": 0.09158515930175781, "step": 584 }, { "epoch": 0.08151605936041245, "grad_norm": 1.3187988996505737, "learning_rate": 9.997120860185827e-06, "loss": 0.093475341796875, "step": 585 }, { "epoch": 0.08165540305162684, "grad_norm": 1.204382061958313, "learning_rate": 9.997040336636052e-06, "loss": 0.0930023193359375, "step": 586 }, { "epoch": 0.08179474674284122, "grad_norm": 4.9692606925964355, "learning_rate": 9.996958702857997e-06, "loss": 0.12473678588867188, "step": 587 }, { "epoch": 0.0819340904340556, "grad_norm": 8.026773452758789, "learning_rate": 9.996875958869803e-06, "loss": 0.10525703430175781, "step": 588 }, { "epoch": 0.08207343412526998, "grad_norm": 2.8801398277282715, "learning_rate": 9.996792104689849e-06, "loss": 0.10811710357666016, "step": 589 }, { "epoch": 0.08221277781648435, "grad_norm": 10.80790901184082, "learning_rate": 9.99670714033677e-06, "loss": 0.12412071228027344, "step": 590 }, { "epoch": 0.08235212150769874, "grad_norm": 9.846858978271484, "learning_rate": 9.996621065829442e-06, "loss": 0.1140604019165039, "step": 591 }, { "epoch": 0.08249146519891312, "grad_norm": 5.818487644195557, "learning_rate": 9.996533881186986e-06, "loss": 0.11863327026367188, "step": 592 }, { "epoch": 0.0826308088901275, "grad_norm": 3.6853702068328857, "learning_rate": 9.996445586428776e-06, "loss": 0.10928726196289062, "step": 593 }, { "epoch": 0.08277015258134188, "grad_norm": 3.8903262615203857, "learning_rate": 9.996356181574425e-06, "loss": 0.09885406494140625, "step": 594 }, { "epoch": 0.08290949627255625, "grad_norm": 5.756770133972168, "learning_rate": 9.9962656666438e-06, "loss": 0.11503028869628906, "step": 595 }, { "epoch": 0.08304883996377065, "grad_norm": 4.048373699188232, "learning_rate": 9.996174041657012e-06, "loss": 0.11046600341796875, "step": 596 }, { "epoch": 0.08318818365498502, "grad_norm": 3.9111390113830566, "learning_rate": 9.996081306634416e-06, "loss": 0.10268974304199219, "step": 597 }, { "epoch": 0.0833275273461994, "grad_norm": 1.7418559789657593, "learning_rate": 9.995987461596617e-06, "loss": 0.0951070785522461, "step": 598 }, { "epoch": 0.08346687103741378, "grad_norm": 2.7358577251434326, "learning_rate": 9.995892506564461e-06, "loss": 0.09775066375732422, "step": 599 }, { "epoch": 0.08360621472862816, "grad_norm": 3.7450761795043945, "learning_rate": 9.995796441559052e-06, "loss": 0.11677360534667969, "step": 600 }, { "epoch": 0.08374555841984255, "grad_norm": 1.4735296964645386, "learning_rate": 9.995699266601728e-06, "loss": 0.095245361328125, "step": 601 }, { "epoch": 0.08388490211105692, "grad_norm": 2.860142230987549, "learning_rate": 9.995600981714082e-06, "loss": 0.11009407043457031, "step": 602 }, { "epoch": 0.0840242458022713, "grad_norm": 1.966826319694519, "learning_rate": 9.995501586917949e-06, "loss": 0.12100410461425781, "step": 603 }, { "epoch": 0.08416358949348568, "grad_norm": 1.3374533653259277, "learning_rate": 9.99540108223541e-06, "loss": 0.09165716171264648, "step": 604 }, { "epoch": 0.08430293318470006, "grad_norm": 2.150784969329834, "learning_rate": 9.9952994676888e-06, "loss": 0.08594703674316406, "step": 605 }, { "epoch": 0.08444227687591445, "grad_norm": 4.801421165466309, "learning_rate": 9.995196743300693e-06, "loss": 0.11618232727050781, "step": 606 }, { "epoch": 0.08458162056712883, "grad_norm": 2.792433738708496, "learning_rate": 9.995092909093911e-06, "loss": 0.09934234619140625, "step": 607 }, { "epoch": 0.0847209642583432, "grad_norm": 2.372406482696533, "learning_rate": 9.994987965091525e-06, "loss": 0.10601806640625, "step": 608 }, { "epoch": 0.08486030794955758, "grad_norm": 3.385664939880371, "learning_rate": 9.994881911316849e-06, "loss": 0.11513710021972656, "step": 609 }, { "epoch": 0.08499965164077196, "grad_norm": 2.1993255615234375, "learning_rate": 9.99477474779345e-06, "loss": 0.09685730934143066, "step": 610 }, { "epoch": 0.08513899533198635, "grad_norm": 1.972917079925537, "learning_rate": 9.994666474545133e-06, "loss": 0.10904693603515625, "step": 611 }, { "epoch": 0.08527833902320073, "grad_norm": 4.740463733673096, "learning_rate": 9.994557091595956e-06, "loss": 0.11053085327148438, "step": 612 }, { "epoch": 0.0854176827144151, "grad_norm": 5.6237688064575195, "learning_rate": 9.99444659897022e-06, "loss": 0.09964370727539062, "step": 613 }, { "epoch": 0.08555702640562948, "grad_norm": 2.9608898162841797, "learning_rate": 9.994334996692476e-06, "loss": 0.1201629638671875, "step": 614 }, { "epoch": 0.08569637009684386, "grad_norm": 1.8704092502593994, "learning_rate": 9.994222284787519e-06, "loss": 0.11091041564941406, "step": 615 }, { "epoch": 0.08583571378805825, "grad_norm": 3.276474952697754, "learning_rate": 9.99410846328039e-06, "loss": 0.1142578125, "step": 616 }, { "epoch": 0.08597505747927263, "grad_norm": 0.974380373954773, "learning_rate": 9.993993532196376e-06, "loss": 0.10235786437988281, "step": 617 }, { "epoch": 0.086114401170487, "grad_norm": 1.9812670946121216, "learning_rate": 9.993877491561015e-06, "loss": 0.10399627685546875, "step": 618 }, { "epoch": 0.08625374486170138, "grad_norm": 1.8413561582565308, "learning_rate": 9.99376034140009e-06, "loss": 0.09668922424316406, "step": 619 }, { "epoch": 0.08639308855291576, "grad_norm": 1.6309592723846436, "learning_rate": 9.993642081739623e-06, "loss": 0.09733963012695312, "step": 620 }, { "epoch": 0.08653243224413015, "grad_norm": 2.4667487144470215, "learning_rate": 9.993522712605895e-06, "loss": 0.09309768676757812, "step": 621 }, { "epoch": 0.08667177593534453, "grad_norm": 3.9772067070007324, "learning_rate": 9.993402234025422e-06, "loss": 0.11234283447265625, "step": 622 }, { "epoch": 0.08681111962655891, "grad_norm": 1.7372419834136963, "learning_rate": 9.993280646024975e-06, "loss": 0.12358999252319336, "step": 623 }, { "epoch": 0.08695046331777329, "grad_norm": 1.9285026788711548, "learning_rate": 9.993157948631566e-06, "loss": 0.10853958129882812, "step": 624 }, { "epoch": 0.08708980700898766, "grad_norm": 2.037374258041382, "learning_rate": 9.993034141872459e-06, "loss": 0.11864662170410156, "step": 625 }, { "epoch": 0.08722915070020205, "grad_norm": 2.2781243324279785, "learning_rate": 9.992909225775157e-06, "loss": 0.11007881164550781, "step": 626 }, { "epoch": 0.08736849439141643, "grad_norm": 1.525865077972412, "learning_rate": 9.992783200367414e-06, "loss": 0.11215782165527344, "step": 627 }, { "epoch": 0.08750783808263081, "grad_norm": 2.858285427093506, "learning_rate": 9.992656065677234e-06, "loss": 0.10587692260742188, "step": 628 }, { "epoch": 0.08764718177384519, "grad_norm": 1.7687267065048218, "learning_rate": 9.992527821732858e-06, "loss": 0.07219886779785156, "step": 629 }, { "epoch": 0.08778652546505956, "grad_norm": 2.7056398391723633, "learning_rate": 9.992398468562782e-06, "loss": 0.0839996337890625, "step": 630 }, { "epoch": 0.08792586915627396, "grad_norm": 5.781732559204102, "learning_rate": 9.992268006195744e-06, "loss": 0.1220703125, "step": 631 }, { "epoch": 0.08806521284748833, "grad_norm": 3.182830333709717, "learning_rate": 9.992136434660733e-06, "loss": 0.11283683776855469, "step": 632 }, { "epoch": 0.08820455653870271, "grad_norm": 3.909979820251465, "learning_rate": 9.992003753986976e-06, "loss": 0.10323905944824219, "step": 633 }, { "epoch": 0.08834390022991709, "grad_norm": 3.2606606483459473, "learning_rate": 9.991869964203955e-06, "loss": 0.10074996948242188, "step": 634 }, { "epoch": 0.08848324392113147, "grad_norm": 2.4856340885162354, "learning_rate": 9.991735065341394e-06, "loss": 0.100433349609375, "step": 635 }, { "epoch": 0.08862258761234586, "grad_norm": 4.138876438140869, "learning_rate": 9.991599057429266e-06, "loss": 0.09566497802734375, "step": 636 }, { "epoch": 0.08876193130356023, "grad_norm": 5.18108606338501, "learning_rate": 9.991461940497786e-06, "loss": 0.14550209045410156, "step": 637 }, { "epoch": 0.08890127499477461, "grad_norm": 1.1466851234436035, "learning_rate": 9.991323714577421e-06, "loss": 0.09231185913085938, "step": 638 }, { "epoch": 0.08904061868598899, "grad_norm": 2.3660190105438232, "learning_rate": 9.99118437969888e-06, "loss": 0.1123504638671875, "step": 639 }, { "epoch": 0.08917996237720337, "grad_norm": 3.9972572326660156, "learning_rate": 9.99104393589312e-06, "loss": 0.12134647369384766, "step": 640 }, { "epoch": 0.08931930606841776, "grad_norm": 1.2318658828735352, "learning_rate": 9.990902383191346e-06, "loss": 0.12644577026367188, "step": 641 }, { "epoch": 0.08945864975963214, "grad_norm": 1.424584150314331, "learning_rate": 9.990759721625005e-06, "loss": 0.0949850082397461, "step": 642 }, { "epoch": 0.08959799345084651, "grad_norm": 1.835664987564087, "learning_rate": 9.990615951225797e-06, "loss": 0.08685111999511719, "step": 643 }, { "epoch": 0.08973733714206089, "grad_norm": 1.743386149406433, "learning_rate": 9.99047107202566e-06, "loss": 0.12126350402832031, "step": 644 }, { "epoch": 0.08987668083327527, "grad_norm": 1.2302591800689697, "learning_rate": 9.990325084056787e-06, "loss": 0.09798812866210938, "step": 645 }, { "epoch": 0.09001602452448966, "grad_norm": 2.834160566329956, "learning_rate": 9.99017798735161e-06, "loss": 0.09411811828613281, "step": 646 }, { "epoch": 0.09015536821570404, "grad_norm": 2.784489870071411, "learning_rate": 9.990029781942814e-06, "loss": 0.0896453857421875, "step": 647 }, { "epoch": 0.09029471190691842, "grad_norm": 2.386698007583618, "learning_rate": 9.989880467863323e-06, "loss": 0.11586761474609375, "step": 648 }, { "epoch": 0.09043405559813279, "grad_norm": 3.3977205753326416, "learning_rate": 9.989730045146313e-06, "loss": 0.10122108459472656, "step": 649 }, { "epoch": 0.09057339928934717, "grad_norm": 2.7924249172210693, "learning_rate": 9.989578513825205e-06, "loss": 0.15230178833007812, "step": 650 }, { "epoch": 0.09071274298056156, "grad_norm": 4.573820114135742, "learning_rate": 9.989425873933666e-06, "loss": 0.11007881164550781, "step": 651 }, { "epoch": 0.09085208667177594, "grad_norm": 1.7082947492599487, "learning_rate": 9.989272125505606e-06, "loss": 0.09856033325195312, "step": 652 }, { "epoch": 0.09099143036299032, "grad_norm": 3.2926292419433594, "learning_rate": 9.98911726857519e-06, "loss": 0.10404014587402344, "step": 653 }, { "epoch": 0.0911307740542047, "grad_norm": 3.5778794288635254, "learning_rate": 9.988961303176818e-06, "loss": 0.11270618438720703, "step": 654 }, { "epoch": 0.09127011774541907, "grad_norm": 3.617037296295166, "learning_rate": 9.988804229345146e-06, "loss": 0.10513687133789062, "step": 655 }, { "epoch": 0.09140946143663346, "grad_norm": 1.9419587850570679, "learning_rate": 9.98864604711507e-06, "loss": 0.0999603271484375, "step": 656 }, { "epoch": 0.09154880512784784, "grad_norm": 3.0145645141601562, "learning_rate": 9.988486756521733e-06, "loss": 0.12877273559570312, "step": 657 }, { "epoch": 0.09168814881906222, "grad_norm": 2.787506580352783, "learning_rate": 9.98832635760053e-06, "loss": 0.09153270721435547, "step": 658 }, { "epoch": 0.0918274925102766, "grad_norm": 1.6131328344345093, "learning_rate": 9.988164850387095e-06, "loss": 0.10790634155273438, "step": 659 }, { "epoch": 0.09196683620149097, "grad_norm": 2.528409004211426, "learning_rate": 9.988002234917312e-06, "loss": 0.1375885009765625, "step": 660 }, { "epoch": 0.09210617989270536, "grad_norm": 1.5224435329437256, "learning_rate": 9.987838511227311e-06, "loss": 0.10797882080078125, "step": 661 }, { "epoch": 0.09224552358391974, "grad_norm": 1.3194559812545776, "learning_rate": 9.987673679353467e-06, "loss": 0.09380912780761719, "step": 662 }, { "epoch": 0.09238486727513412, "grad_norm": 2.14178204536438, "learning_rate": 9.987507739332401e-06, "loss": 0.11218452453613281, "step": 663 }, { "epoch": 0.0925242109663485, "grad_norm": 2.736180305480957, "learning_rate": 9.987340691200984e-06, "loss": 0.09060096740722656, "step": 664 }, { "epoch": 0.09266355465756287, "grad_norm": 2.032010316848755, "learning_rate": 9.987172534996326e-06, "loss": 0.10441017150878906, "step": 665 }, { "epoch": 0.09280289834877727, "grad_norm": 1.749715805053711, "learning_rate": 9.98700327075579e-06, "loss": 0.07816934585571289, "step": 666 }, { "epoch": 0.09294224203999164, "grad_norm": 3.8271918296813965, "learning_rate": 9.986832898516985e-06, "loss": 0.11358833312988281, "step": 667 }, { "epoch": 0.09308158573120602, "grad_norm": 1.632064700126648, "learning_rate": 9.986661418317759e-06, "loss": 0.12437820434570312, "step": 668 }, { "epoch": 0.0932209294224204, "grad_norm": 1.5567821264266968, "learning_rate": 9.986488830196215e-06, "loss": 0.10905742645263672, "step": 669 }, { "epoch": 0.09336027311363478, "grad_norm": 0.8736646771430969, "learning_rate": 9.986315134190694e-06, "loss": 0.08147811889648438, "step": 670 }, { "epoch": 0.09349961680484917, "grad_norm": 2.1038918495178223, "learning_rate": 9.98614033033979e-06, "loss": 0.11187314987182617, "step": 671 }, { "epoch": 0.09363896049606354, "grad_norm": 3.353769540786743, "learning_rate": 9.985964418682342e-06, "loss": 0.12425041198730469, "step": 672 }, { "epoch": 0.09377830418727792, "grad_norm": 2.5576508045196533, "learning_rate": 9.985787399257431e-06, "loss": 0.09802627563476562, "step": 673 }, { "epoch": 0.0939176478784923, "grad_norm": 3.034010171890259, "learning_rate": 9.985609272104387e-06, "loss": 0.12883377075195312, "step": 674 }, { "epoch": 0.09405699156970668, "grad_norm": 3.713082790374756, "learning_rate": 9.985430037262787e-06, "loss": 0.12043190002441406, "step": 675 }, { "epoch": 0.09419633526092107, "grad_norm": 7.526726245880127, "learning_rate": 9.98524969477245e-06, "loss": 0.1190176010131836, "step": 676 }, { "epoch": 0.09433567895213545, "grad_norm": 4.221823215484619, "learning_rate": 9.985068244673449e-06, "loss": 0.0942535400390625, "step": 677 }, { "epoch": 0.09447502264334982, "grad_norm": 4.698761940002441, "learning_rate": 9.984885687006093e-06, "loss": 0.14894485473632812, "step": 678 }, { "epoch": 0.0946143663345642, "grad_norm": 2.4052653312683105, "learning_rate": 9.984702021810944e-06, "loss": 0.11540603637695312, "step": 679 }, { "epoch": 0.09475371002577858, "grad_norm": 3.045351505279541, "learning_rate": 9.98451724912881e-06, "loss": 0.10387229919433594, "step": 680 }, { "epoch": 0.09489305371699297, "grad_norm": 2.5853540897369385, "learning_rate": 9.984331369000739e-06, "loss": 0.1091461181640625, "step": 681 }, { "epoch": 0.09503239740820735, "grad_norm": 3.2854673862457275, "learning_rate": 9.984144381468035e-06, "loss": 0.09879302978515625, "step": 682 }, { "epoch": 0.09517174109942172, "grad_norm": 1.6698999404907227, "learning_rate": 9.983956286572238e-06, "loss": 0.11076831817626953, "step": 683 }, { "epoch": 0.0953110847906361, "grad_norm": 2.4952433109283447, "learning_rate": 9.983767084355141e-06, "loss": 0.1062016487121582, "step": 684 }, { "epoch": 0.09545042848185048, "grad_norm": 3.789139986038208, "learning_rate": 9.983576774858776e-06, "loss": 0.08889198303222656, "step": 685 }, { "epoch": 0.09558977217306487, "grad_norm": 2.5171399116516113, "learning_rate": 9.983385358125432e-06, "loss": 0.09064865112304688, "step": 686 }, { "epoch": 0.09572911586427925, "grad_norm": 0.9583251476287842, "learning_rate": 9.983192834197633e-06, "loss": 0.10217857360839844, "step": 687 }, { "epoch": 0.09586845955549363, "grad_norm": 1.4222511053085327, "learning_rate": 9.982999203118153e-06, "loss": 0.083465576171875, "step": 688 }, { "epoch": 0.096007803246708, "grad_norm": 1.5851069688796997, "learning_rate": 9.982804464930016e-06, "loss": 0.11659431457519531, "step": 689 }, { "epoch": 0.09614714693792238, "grad_norm": 1.347166657447815, "learning_rate": 9.982608619676485e-06, "loss": 0.1036834716796875, "step": 690 }, { "epoch": 0.09628649062913676, "grad_norm": 6.2561936378479, "learning_rate": 9.982411667401076e-06, "loss": 0.12367725372314453, "step": 691 }, { "epoch": 0.09642583432035115, "grad_norm": 3.036735773086548, "learning_rate": 9.982213608147541e-06, "loss": 0.1369953155517578, "step": 692 }, { "epoch": 0.09656517801156553, "grad_norm": 2.8223204612731934, "learning_rate": 9.982014441959891e-06, "loss": 0.1033172607421875, "step": 693 }, { "epoch": 0.0967045217027799, "grad_norm": 2.432889461517334, "learning_rate": 9.98181416888237e-06, "loss": 0.0842742919921875, "step": 694 }, { "epoch": 0.09684386539399428, "grad_norm": 3.9409234523773193, "learning_rate": 9.981612788959481e-06, "loss": 0.11580276489257812, "step": 695 }, { "epoch": 0.09698320908520866, "grad_norm": 1.9701169729232788, "learning_rate": 9.981410302235962e-06, "loss": 0.10985374450683594, "step": 696 }, { "epoch": 0.09712255277642305, "grad_norm": 1.7083005905151367, "learning_rate": 9.9812067087568e-06, "loss": 0.11327266693115234, "step": 697 }, { "epoch": 0.09726189646763743, "grad_norm": 1.87345290184021, "learning_rate": 9.98100200856723e-06, "loss": 0.09745407104492188, "step": 698 }, { "epoch": 0.0974012401588518, "grad_norm": 4.0686187744140625, "learning_rate": 9.980796201712734e-06, "loss": 0.10778617858886719, "step": 699 }, { "epoch": 0.09754058385006618, "grad_norm": 3.212695360183716, "learning_rate": 9.980589288239034e-06, "loss": 0.0975961685180664, "step": 700 }, { "epoch": 0.09767992754128056, "grad_norm": 1.3126630783081055, "learning_rate": 9.980381268192103e-06, "loss": 0.10184669494628906, "step": 701 }, { "epoch": 0.09781927123249495, "grad_norm": 5.132455825805664, "learning_rate": 9.980172141618159e-06, "loss": 0.13627243041992188, "step": 702 }, { "epoch": 0.09795861492370933, "grad_norm": 2.80426287651062, "learning_rate": 9.979961908563663e-06, "loss": 0.08942222595214844, "step": 703 }, { "epoch": 0.09809795861492371, "grad_norm": 0.9429901242256165, "learning_rate": 9.979750569075325e-06, "loss": 0.0957937240600586, "step": 704 }, { "epoch": 0.09823730230613809, "grad_norm": 2.358445167541504, "learning_rate": 9.979538123200102e-06, "loss": 0.10777473449707031, "step": 705 }, { "epoch": 0.09837664599735246, "grad_norm": 1.3750903606414795, "learning_rate": 9.979324570985194e-06, "loss": 0.09464550018310547, "step": 706 }, { "epoch": 0.09851598968856685, "grad_norm": 1.6715707778930664, "learning_rate": 9.979109912478044e-06, "loss": 0.09626483917236328, "step": 707 }, { "epoch": 0.09865533337978123, "grad_norm": 2.680370569229126, "learning_rate": 9.978894147726346e-06, "loss": 0.14476966857910156, "step": 708 }, { "epoch": 0.09879467707099561, "grad_norm": 1.6363295316696167, "learning_rate": 9.97867727677804e-06, "loss": 0.08297920227050781, "step": 709 }, { "epoch": 0.09893402076220999, "grad_norm": 2.5092785358428955, "learning_rate": 9.978459299681306e-06, "loss": 0.12119674682617188, "step": 710 }, { "epoch": 0.09907336445342436, "grad_norm": 1.9921579360961914, "learning_rate": 9.978240216484579e-06, "loss": 0.0884237289428711, "step": 711 }, { "epoch": 0.09921270814463876, "grad_norm": 1.0170505046844482, "learning_rate": 9.978020027236529e-06, "loss": 0.09255695343017578, "step": 712 }, { "epoch": 0.09935205183585313, "grad_norm": 1.1628023386001587, "learning_rate": 9.977798731986079e-06, "loss": 0.08703231811523438, "step": 713 }, { "epoch": 0.09949139552706751, "grad_norm": 3.1536803245544434, "learning_rate": 9.977576330782397e-06, "loss": 0.11497306823730469, "step": 714 }, { "epoch": 0.09963073921828189, "grad_norm": 1.1799778938293457, "learning_rate": 9.977352823674893e-06, "loss": 0.08346271514892578, "step": 715 }, { "epoch": 0.09977008290949627, "grad_norm": 3.746068239212036, "learning_rate": 9.977128210713227e-06, "loss": 0.138885498046875, "step": 716 }, { "epoch": 0.09990942660071066, "grad_norm": 0.8772028684616089, "learning_rate": 9.976902491947303e-06, "loss": 0.11251258850097656, "step": 717 }, { "epoch": 0.10004877029192503, "grad_norm": 3.58586049079895, "learning_rate": 9.976675667427268e-06, "loss": 0.11344528198242188, "step": 718 }, { "epoch": 0.10018811398313941, "grad_norm": 4.82117223739624, "learning_rate": 9.976447737203521e-06, "loss": 0.14815902709960938, "step": 719 }, { "epoch": 0.10032745767435379, "grad_norm": 1.9808851480484009, "learning_rate": 9.976218701326701e-06, "loss": 0.09821796417236328, "step": 720 }, { "epoch": 0.10046680136556817, "grad_norm": 1.1042473316192627, "learning_rate": 9.975988559847693e-06, "loss": 0.10564041137695312, "step": 721 }, { "epoch": 0.10060614505678256, "grad_norm": 1.6728925704956055, "learning_rate": 9.975757312817634e-06, "loss": 0.08536911010742188, "step": 722 }, { "epoch": 0.10074548874799694, "grad_norm": 2.1889777183532715, "learning_rate": 9.975524960287895e-06, "loss": 0.08949661254882812, "step": 723 }, { "epoch": 0.10088483243921131, "grad_norm": 1.6933765411376953, "learning_rate": 9.975291502310105e-06, "loss": 0.08871936798095703, "step": 724 }, { "epoch": 0.10102417613042569, "grad_norm": 0.7890425324440002, "learning_rate": 9.975056938936129e-06, "loss": 0.08868026733398438, "step": 725 }, { "epoch": 0.10116351982164007, "grad_norm": 2.4679064750671387, "learning_rate": 9.974821270218086e-06, "loss": 0.14049148559570312, "step": 726 }, { "epoch": 0.10130286351285446, "grad_norm": 2.1149818897247314, "learning_rate": 9.974584496208334e-06, "loss": 0.08620834350585938, "step": 727 }, { "epoch": 0.10144220720406884, "grad_norm": 2.5277271270751953, "learning_rate": 9.974346616959476e-06, "loss": 0.10886001586914062, "step": 728 }, { "epoch": 0.10158155089528322, "grad_norm": 1.5283364057540894, "learning_rate": 9.974107632524368e-06, "loss": 0.08883094787597656, "step": 729 }, { "epoch": 0.10172089458649759, "grad_norm": 1.33802330493927, "learning_rate": 9.973867542956104e-06, "loss": 0.08467960357666016, "step": 730 }, { "epoch": 0.10186023827771197, "grad_norm": 1.874732494354248, "learning_rate": 9.973626348308027e-06, "loss": 0.10538673400878906, "step": 731 }, { "epoch": 0.10199958196892636, "grad_norm": 2.7216391563415527, "learning_rate": 9.973384048633728e-06, "loss": 0.10394477844238281, "step": 732 }, { "epoch": 0.10213892566014074, "grad_norm": 3.500472068786621, "learning_rate": 9.973140643987034e-06, "loss": 0.08011341094970703, "step": 733 }, { "epoch": 0.10227826935135512, "grad_norm": 1.8805209398269653, "learning_rate": 9.97289613442203e-06, "loss": 0.11932945251464844, "step": 734 }, { "epoch": 0.1024176130425695, "grad_norm": 2.1218197345733643, "learning_rate": 9.972650519993037e-06, "loss": 0.12036895751953125, "step": 735 }, { "epoch": 0.10255695673378387, "grad_norm": 3.5556702613830566, "learning_rate": 9.972403800754626e-06, "loss": 0.11461067199707031, "step": 736 }, { "epoch": 0.10269630042499826, "grad_norm": 1.4330123662948608, "learning_rate": 9.972155976761613e-06, "loss": 0.08630943298339844, "step": 737 }, { "epoch": 0.10283564411621264, "grad_norm": 1.1850618124008179, "learning_rate": 9.971907048069058e-06, "loss": 0.11071586608886719, "step": 738 }, { "epoch": 0.10297498780742702, "grad_norm": 1.752241611480713, "learning_rate": 9.971657014732268e-06, "loss": 0.10502815246582031, "step": 739 }, { "epoch": 0.1031143314986414, "grad_norm": 1.3441224098205566, "learning_rate": 9.971405876806792e-06, "loss": 0.09416770935058594, "step": 740 }, { "epoch": 0.10325367518985577, "grad_norm": 1.9720158576965332, "learning_rate": 9.971153634348431e-06, "loss": 0.09287643432617188, "step": 741 }, { "epoch": 0.10339301888107016, "grad_norm": 4.877232551574707, "learning_rate": 9.970900287413225e-06, "loss": 0.12223529815673828, "step": 742 }, { "epoch": 0.10353236257228454, "grad_norm": 3.0457241535186768, "learning_rate": 9.970645836057464e-06, "loss": 0.10464000701904297, "step": 743 }, { "epoch": 0.10367170626349892, "grad_norm": 2.4532203674316406, "learning_rate": 9.970390280337681e-06, "loss": 0.1247100830078125, "step": 744 }, { "epoch": 0.1038110499547133, "grad_norm": 1.3197269439697266, "learning_rate": 9.970133620310652e-06, "loss": 0.12008857727050781, "step": 745 }, { "epoch": 0.10395039364592767, "grad_norm": 2.100801944732666, "learning_rate": 9.969875856033402e-06, "loss": 0.1019124984741211, "step": 746 }, { "epoch": 0.10408973733714207, "grad_norm": 2.4693398475646973, "learning_rate": 9.969616987563202e-06, "loss": 0.10694122314453125, "step": 747 }, { "epoch": 0.10422908102835644, "grad_norm": 1.682647466659546, "learning_rate": 9.969357014957564e-06, "loss": 0.07581138610839844, "step": 748 }, { "epoch": 0.10436842471957082, "grad_norm": 1.4330832958221436, "learning_rate": 9.969095938274251e-06, "loss": 0.10183906555175781, "step": 749 }, { "epoch": 0.1045077684107852, "grad_norm": 1.4564610719680786, "learning_rate": 9.968833757571268e-06, "loss": 0.09746074676513672, "step": 750 }, { "epoch": 0.10464711210199958, "grad_norm": 3.6809611320495605, "learning_rate": 9.968570472906862e-06, "loss": 0.09102916717529297, "step": 751 }, { "epoch": 0.10478645579321397, "grad_norm": 2.2014899253845215, "learning_rate": 9.968306084339534e-06, "loss": 0.10172653198242188, "step": 752 }, { "epoch": 0.10492579948442834, "grad_norm": 1.0506147146224976, "learning_rate": 9.96804059192802e-06, "loss": 0.10642242431640625, "step": 753 }, { "epoch": 0.10506514317564272, "grad_norm": 1.2428604364395142, "learning_rate": 9.96777399573131e-06, "loss": 0.09770584106445312, "step": 754 }, { "epoch": 0.1052044868668571, "grad_norm": 1.1429522037506104, "learning_rate": 9.967506295808634e-06, "loss": 0.08932304382324219, "step": 755 }, { "epoch": 0.10534383055807148, "grad_norm": 2.466229200363159, "learning_rate": 9.96723749221947e-06, "loss": 0.119140625, "step": 756 }, { "epoch": 0.10548317424928587, "grad_norm": 2.0744099617004395, "learning_rate": 9.96696758502354e-06, "loss": 0.10016441345214844, "step": 757 }, { "epoch": 0.10562251794050025, "grad_norm": 3.722377300262451, "learning_rate": 9.966696574280808e-06, "loss": 0.11511516571044922, "step": 758 }, { "epoch": 0.10576186163171462, "grad_norm": 3.814262628555298, "learning_rate": 9.966424460051489e-06, "loss": 0.11670875549316406, "step": 759 }, { "epoch": 0.105901205322929, "grad_norm": 4.375004291534424, "learning_rate": 9.96615124239604e-06, "loss": 0.1206817626953125, "step": 760 }, { "epoch": 0.10604054901414338, "grad_norm": 1.4195188283920288, "learning_rate": 9.965876921375165e-06, "loss": 0.09574699401855469, "step": 761 }, { "epoch": 0.10617989270535777, "grad_norm": 0.9377275109291077, "learning_rate": 9.965601497049812e-06, "loss": 0.08892154693603516, "step": 762 }, { "epoch": 0.10631923639657215, "grad_norm": 1.2069249153137207, "learning_rate": 9.965324969481172e-06, "loss": 0.08740425109863281, "step": 763 }, { "epoch": 0.10645858008778653, "grad_norm": 2.529149055480957, "learning_rate": 9.965047338730685e-06, "loss": 0.12810516357421875, "step": 764 }, { "epoch": 0.1065979237790009, "grad_norm": 0.9171305894851685, "learning_rate": 9.964768604860033e-06, "loss": 0.08461761474609375, "step": 765 }, { "epoch": 0.10673726747021528, "grad_norm": 1.903211236000061, "learning_rate": 9.964488767931144e-06, "loss": 0.1058807373046875, "step": 766 }, { "epoch": 0.10687661116142967, "grad_norm": 2.842460870742798, "learning_rate": 9.964207828006191e-06, "loss": 0.11818504333496094, "step": 767 }, { "epoch": 0.10701595485264405, "grad_norm": 1.292407512664795, "learning_rate": 9.963925785147595e-06, "loss": 0.09690284729003906, "step": 768 }, { "epoch": 0.10715529854385843, "grad_norm": 1.2495291233062744, "learning_rate": 9.963642639418018e-06, "loss": 0.09445571899414062, "step": 769 }, { "epoch": 0.1072946422350728, "grad_norm": 1.3123372793197632, "learning_rate": 9.963358390880367e-06, "loss": 0.09674739837646484, "step": 770 }, { "epoch": 0.10743398592628718, "grad_norm": 2.2593939304351807, "learning_rate": 9.963073039597798e-06, "loss": 0.11729049682617188, "step": 771 }, { "epoch": 0.10757332961750157, "grad_norm": 1.4637072086334229, "learning_rate": 9.962786585633708e-06, "loss": 0.08953094482421875, "step": 772 }, { "epoch": 0.10771267330871595, "grad_norm": 2.144435405731201, "learning_rate": 9.962499029051742e-06, "loss": 0.09616947174072266, "step": 773 }, { "epoch": 0.10785201699993033, "grad_norm": 1.542245864868164, "learning_rate": 9.962210369915787e-06, "loss": 0.10188865661621094, "step": 774 }, { "epoch": 0.1079913606911447, "grad_norm": 0.9170899391174316, "learning_rate": 9.961920608289977e-06, "loss": 0.08798027038574219, "step": 775 }, { "epoch": 0.10813070438235908, "grad_norm": 1.108333706855774, "learning_rate": 9.96162974423869e-06, "loss": 0.09201431274414062, "step": 776 }, { "epoch": 0.10827004807357347, "grad_norm": 1.630070447921753, "learning_rate": 9.961337777826549e-06, "loss": 0.10082626342773438, "step": 777 }, { "epoch": 0.10840939176478785, "grad_norm": 4.0974836349487305, "learning_rate": 9.961044709118425e-06, "loss": 0.13223838806152344, "step": 778 }, { "epoch": 0.10854873545600223, "grad_norm": 2.010678291320801, "learning_rate": 9.960750538179428e-06, "loss": 0.10019874572753906, "step": 779 }, { "epoch": 0.1086880791472166, "grad_norm": 1.1926679611206055, "learning_rate": 9.960455265074918e-06, "loss": 0.07912254333496094, "step": 780 }, { "epoch": 0.10882742283843098, "grad_norm": 1.4911532402038574, "learning_rate": 9.960158889870495e-06, "loss": 0.09974288940429688, "step": 781 }, { "epoch": 0.10896676652964538, "grad_norm": 2.349036455154419, "learning_rate": 9.959861412632011e-06, "loss": 0.09983158111572266, "step": 782 }, { "epoch": 0.10910611022085975, "grad_norm": 1.5691572427749634, "learning_rate": 9.959562833425557e-06, "loss": 0.10206222534179688, "step": 783 }, { "epoch": 0.10924545391207413, "grad_norm": 2.0674378871917725, "learning_rate": 9.95926315231747e-06, "loss": 0.12126541137695312, "step": 784 }, { "epoch": 0.10938479760328851, "grad_norm": 1.599292278289795, "learning_rate": 9.958962369374333e-06, "loss": 0.12720870971679688, "step": 785 }, { "epoch": 0.10952414129450289, "grad_norm": 0.9647958278656006, "learning_rate": 9.95866048466297e-06, "loss": 0.10446929931640625, "step": 786 }, { "epoch": 0.10966348498571728, "grad_norm": 1.6028108596801758, "learning_rate": 9.958357498250457e-06, "loss": 0.07971572875976562, "step": 787 }, { "epoch": 0.10980282867693165, "grad_norm": 1.0687280893325806, "learning_rate": 9.95805341020411e-06, "loss": 0.11078453063964844, "step": 788 }, { "epoch": 0.10994217236814603, "grad_norm": 1.4505070447921753, "learning_rate": 9.957748220591487e-06, "loss": 0.09646034240722656, "step": 789 }, { "epoch": 0.11008151605936041, "grad_norm": 1.3765735626220703, "learning_rate": 9.9574419294804e-06, "loss": 0.12560081481933594, "step": 790 }, { "epoch": 0.11022085975057479, "grad_norm": 1.8675143718719482, "learning_rate": 9.957134536938894e-06, "loss": 0.11774158477783203, "step": 791 }, { "epoch": 0.11036020344178918, "grad_norm": 2.2310221195220947, "learning_rate": 9.956826043035268e-06, "loss": 0.12749767303466797, "step": 792 }, { "epoch": 0.11049954713300356, "grad_norm": 1.6723136901855469, "learning_rate": 9.956516447838063e-06, "loss": 0.11185264587402344, "step": 793 }, { "epoch": 0.11063889082421793, "grad_norm": 0.9014464020729065, "learning_rate": 9.95620575141606e-06, "loss": 0.08545875549316406, "step": 794 }, { "epoch": 0.11077823451543231, "grad_norm": 1.2213530540466309, "learning_rate": 9.955893953838293e-06, "loss": 0.09136009216308594, "step": 795 }, { "epoch": 0.11091757820664669, "grad_norm": 0.9696155786514282, "learning_rate": 9.955581055174034e-06, "loss": 0.10777854919433594, "step": 796 }, { "epoch": 0.11105692189786108, "grad_norm": 3.757479190826416, "learning_rate": 9.9552670554928e-06, "loss": 0.11300086975097656, "step": 797 }, { "epoch": 0.11119626558907546, "grad_norm": 2.420297384262085, "learning_rate": 9.954951954864361e-06, "loss": 0.1083221435546875, "step": 798 }, { "epoch": 0.11133560928028984, "grad_norm": 0.8774046301841736, "learning_rate": 9.954635753358718e-06, "loss": 0.09822559356689453, "step": 799 }, { "epoch": 0.11147495297150421, "grad_norm": 2.087707042694092, "learning_rate": 9.954318451046128e-06, "loss": 0.09160995483398438, "step": 800 }, { "epoch": 0.11161429666271859, "grad_norm": 3.6787891387939453, "learning_rate": 9.954000047997088e-06, "loss": 0.130340576171875, "step": 801 }, { "epoch": 0.11175364035393298, "grad_norm": 1.7617974281311035, "learning_rate": 9.953680544282338e-06, "loss": 0.09482574462890625, "step": 802 }, { "epoch": 0.11189298404514736, "grad_norm": 1.6886796951293945, "learning_rate": 9.953359939972866e-06, "loss": 0.0901803970336914, "step": 803 }, { "epoch": 0.11203232773636174, "grad_norm": 1.4043207168579102, "learning_rate": 9.953038235139902e-06, "loss": 0.09095191955566406, "step": 804 }, { "epoch": 0.11217167142757611, "grad_norm": 1.7212027311325073, "learning_rate": 9.952715429854923e-06, "loss": 0.09382820129394531, "step": 805 }, { "epoch": 0.11231101511879049, "grad_norm": 1.3421287536621094, "learning_rate": 9.952391524189646e-06, "loss": 0.09499645233154297, "step": 806 }, { "epoch": 0.11245035881000488, "grad_norm": 2.5850937366485596, "learning_rate": 9.952066518216039e-06, "loss": 0.10544490814208984, "step": 807 }, { "epoch": 0.11258970250121926, "grad_norm": 2.2632040977478027, "learning_rate": 9.951740412006308e-06, "loss": 0.08260536193847656, "step": 808 }, { "epoch": 0.11272904619243364, "grad_norm": 2.1957855224609375, "learning_rate": 9.95141320563291e-06, "loss": 0.11352729797363281, "step": 809 }, { "epoch": 0.11286838988364802, "grad_norm": 1.8687235116958618, "learning_rate": 9.951084899168537e-06, "loss": 0.10365104675292969, "step": 810 }, { "epoch": 0.11300773357486239, "grad_norm": 1.0006659030914307, "learning_rate": 9.950755492686138e-06, "loss": 0.0776834487915039, "step": 811 }, { "epoch": 0.11314707726607678, "grad_norm": 2.786501169204712, "learning_rate": 9.950424986258893e-06, "loss": 0.11076736450195312, "step": 812 }, { "epoch": 0.11328642095729116, "grad_norm": 0.695553183555603, "learning_rate": 9.950093379960238e-06, "loss": 0.08635711669921875, "step": 813 }, { "epoch": 0.11342576464850554, "grad_norm": 1.1318482160568237, "learning_rate": 9.949760673863846e-06, "loss": 0.11400032043457031, "step": 814 }, { "epoch": 0.11356510833971992, "grad_norm": 1.4155064821243286, "learning_rate": 9.949426868043638e-06, "loss": 0.09675788879394531, "step": 815 }, { "epoch": 0.1137044520309343, "grad_norm": 1.0636341571807861, "learning_rate": 9.949091962573775e-06, "loss": 0.08849525451660156, "step": 816 }, { "epoch": 0.11384379572214869, "grad_norm": 2.2004010677337646, "learning_rate": 9.94875595752867e-06, "loss": 0.06697225570678711, "step": 817 }, { "epoch": 0.11398313941336306, "grad_norm": 2.221762180328369, "learning_rate": 9.948418852982973e-06, "loss": 0.11858749389648438, "step": 818 }, { "epoch": 0.11412248310457744, "grad_norm": 1.4289008378982544, "learning_rate": 9.948080649011582e-06, "loss": 0.10202598571777344, "step": 819 }, { "epoch": 0.11426182679579182, "grad_norm": 1.018510103225708, "learning_rate": 9.947741345689635e-06, "loss": 0.07754707336425781, "step": 820 }, { "epoch": 0.1144011704870062, "grad_norm": 5.341399669647217, "learning_rate": 9.947400943092522e-06, "loss": 0.1364727020263672, "step": 821 }, { "epoch": 0.11454051417822059, "grad_norm": 2.2190592288970947, "learning_rate": 9.94705944129587e-06, "loss": 0.1136474609375, "step": 822 }, { "epoch": 0.11467985786943496, "grad_norm": 1.3611273765563965, "learning_rate": 9.946716840375552e-06, "loss": 0.10445022583007812, "step": 823 }, { "epoch": 0.11481920156064934, "grad_norm": 2.379213333129883, "learning_rate": 9.946373140407688e-06, "loss": 0.11188697814941406, "step": 824 }, { "epoch": 0.11495854525186372, "grad_norm": 2.6975340843200684, "learning_rate": 9.946028341468642e-06, "loss": 0.10360908508300781, "step": 825 }, { "epoch": 0.1150978889430781, "grad_norm": 2.604271411895752, "learning_rate": 9.945682443635015e-06, "loss": 0.08564352989196777, "step": 826 }, { "epoch": 0.11523723263429249, "grad_norm": 1.089497447013855, "learning_rate": 9.945335446983662e-06, "loss": 0.0843038558959961, "step": 827 }, { "epoch": 0.11537657632550687, "grad_norm": 0.9181999564170837, "learning_rate": 9.944987351591677e-06, "loss": 0.0876007080078125, "step": 828 }, { "epoch": 0.11551592001672124, "grad_norm": 2.01544451713562, "learning_rate": 9.944638157536399e-06, "loss": 0.09333038330078125, "step": 829 }, { "epoch": 0.11565526370793562, "grad_norm": 3.6506855487823486, "learning_rate": 9.94428786489541e-06, "loss": 0.1485729217529297, "step": 830 }, { "epoch": 0.11579460739915, "grad_norm": 1.6793065071105957, "learning_rate": 9.943936473746539e-06, "loss": 0.09402084350585938, "step": 831 }, { "epoch": 0.11593395109036439, "grad_norm": 2.08170485496521, "learning_rate": 9.943583984167853e-06, "loss": 0.09688472747802734, "step": 832 }, { "epoch": 0.11607329478157877, "grad_norm": 1.6456128358840942, "learning_rate": 9.94323039623767e-06, "loss": 0.09696006774902344, "step": 833 }, { "epoch": 0.11621263847279314, "grad_norm": 1.1506013870239258, "learning_rate": 9.942875710034549e-06, "loss": 0.07967567443847656, "step": 834 }, { "epoch": 0.11635198216400752, "grad_norm": 1.3888951539993286, "learning_rate": 9.942519925637293e-06, "loss": 0.09196662902832031, "step": 835 }, { "epoch": 0.1164913258552219, "grad_norm": 2.159106492996216, "learning_rate": 9.942163043124951e-06, "loss": 0.09074020385742188, "step": 836 }, { "epoch": 0.11663066954643629, "grad_norm": 2.839181661605835, "learning_rate": 9.941805062576811e-06, "loss": 0.10086250305175781, "step": 837 }, { "epoch": 0.11677001323765067, "grad_norm": 1.3319770097732544, "learning_rate": 9.941445984072408e-06, "loss": 0.09731483459472656, "step": 838 }, { "epoch": 0.11690935692886505, "grad_norm": 0.8901289701461792, "learning_rate": 9.941085807691524e-06, "loss": 0.0906362533569336, "step": 839 }, { "epoch": 0.11704870062007942, "grad_norm": 1.2774358987808228, "learning_rate": 9.94072453351418e-06, "loss": 0.09225082397460938, "step": 840 }, { "epoch": 0.1171880443112938, "grad_norm": 1.1507598161697388, "learning_rate": 9.940362161620644e-06, "loss": 0.08711814880371094, "step": 841 }, { "epoch": 0.11732738800250819, "grad_norm": 1.1571953296661377, "learning_rate": 9.939998692091427e-06, "loss": 0.09951257705688477, "step": 842 }, { "epoch": 0.11746673169372257, "grad_norm": 2.3634040355682373, "learning_rate": 9.939634125007279e-06, "loss": 0.07692146301269531, "step": 843 }, { "epoch": 0.11760607538493695, "grad_norm": 1.800382137298584, "learning_rate": 9.939268460449205e-06, "loss": 0.09414958953857422, "step": 844 }, { "epoch": 0.11774541907615133, "grad_norm": 2.1446101665496826, "learning_rate": 9.938901698498444e-06, "loss": 0.09654045104980469, "step": 845 }, { "epoch": 0.1178847627673657, "grad_norm": 1.2164088487625122, "learning_rate": 9.938533839236483e-06, "loss": 0.10442543029785156, "step": 846 }, { "epoch": 0.1180241064585801, "grad_norm": 1.554991602897644, "learning_rate": 9.938164882745051e-06, "loss": 0.10575675964355469, "step": 847 }, { "epoch": 0.11816345014979447, "grad_norm": 3.217015266418457, "learning_rate": 9.937794829106122e-06, "loss": 0.13063812255859375, "step": 848 }, { "epoch": 0.11830279384100885, "grad_norm": 1.7502083778381348, "learning_rate": 9.937423678401913e-06, "loss": 0.08990859985351562, "step": 849 }, { "epoch": 0.11844213753222323, "grad_norm": 1.3284517526626587, "learning_rate": 9.937051430714888e-06, "loss": 0.09270668029785156, "step": 850 }, { "epoch": 0.1185814812234376, "grad_norm": 1.5154290199279785, "learning_rate": 9.936678086127749e-06, "loss": 0.09395217895507812, "step": 851 }, { "epoch": 0.118720824914652, "grad_norm": 1.3850528001785278, "learning_rate": 9.936303644723446e-06, "loss": 0.07828712463378906, "step": 852 }, { "epoch": 0.11886016860586637, "grad_norm": 2.0882813930511475, "learning_rate": 9.93592810658517e-06, "loss": 0.13129043579101562, "step": 853 }, { "epoch": 0.11899951229708075, "grad_norm": 1.8136924505233765, "learning_rate": 9.935551471796358e-06, "loss": 0.08617019653320312, "step": 854 }, { "epoch": 0.11913885598829513, "grad_norm": 2.0943355560302734, "learning_rate": 9.935173740440692e-06, "loss": 0.10989570617675781, "step": 855 }, { "epoch": 0.1192781996795095, "grad_norm": 2.0462605953216553, "learning_rate": 9.93479491260209e-06, "loss": 0.11104202270507812, "step": 856 }, { "epoch": 0.1194175433707239, "grad_norm": 1.2032077312469482, "learning_rate": 9.934414988364722e-06, "loss": 0.09397125244140625, "step": 857 }, { "epoch": 0.11955688706193827, "grad_norm": 1.8036898374557495, "learning_rate": 9.934033967812998e-06, "loss": 0.09663772583007812, "step": 858 }, { "epoch": 0.11969623075315265, "grad_norm": 2.5349628925323486, "learning_rate": 9.933651851031573e-06, "loss": 0.11570072174072266, "step": 859 }, { "epoch": 0.11983557444436703, "grad_norm": 0.9020851850509644, "learning_rate": 9.933268638105345e-06, "loss": 0.09195709228515625, "step": 860 }, { "epoch": 0.11997491813558141, "grad_norm": 2.472271203994751, "learning_rate": 9.932884329119452e-06, "loss": 0.11168098449707031, "step": 861 }, { "epoch": 0.1201142618267958, "grad_norm": 1.6842321157455444, "learning_rate": 9.932498924159281e-06, "loss": 0.10125923156738281, "step": 862 }, { "epoch": 0.12025360551801018, "grad_norm": 2.448655128479004, "learning_rate": 9.93211242331046e-06, "loss": 0.08446693420410156, "step": 863 }, { "epoch": 0.12039294920922455, "grad_norm": 1.693741798400879, "learning_rate": 9.931724826658861e-06, "loss": 0.13668060302734375, "step": 864 }, { "epoch": 0.12053229290043893, "grad_norm": 2.3335368633270264, "learning_rate": 9.931336134290598e-06, "loss": 0.11164665222167969, "step": 865 }, { "epoch": 0.12067163659165331, "grad_norm": 1.3700780868530273, "learning_rate": 9.930946346292032e-06, "loss": 0.10934638977050781, "step": 866 }, { "epoch": 0.1208109802828677, "grad_norm": 2.2249538898468018, "learning_rate": 9.930555462749762e-06, "loss": 0.12428665161132812, "step": 867 }, { "epoch": 0.12095032397408208, "grad_norm": 1.5483174324035645, "learning_rate": 9.930163483750636e-06, "loss": 0.07758426666259766, "step": 868 }, { "epoch": 0.12108966766529645, "grad_norm": 5.557027816772461, "learning_rate": 9.92977040938174e-06, "loss": 0.13163375854492188, "step": 869 }, { "epoch": 0.12122901135651083, "grad_norm": 3.7854278087615967, "learning_rate": 9.929376239730408e-06, "loss": 0.09229373931884766, "step": 870 }, { "epoch": 0.12136835504772521, "grad_norm": 3.029078483581543, "learning_rate": 9.928980974884215e-06, "loss": 0.10680770874023438, "step": 871 }, { "epoch": 0.1215076987389396, "grad_norm": 3.456355571746826, "learning_rate": 9.928584614930981e-06, "loss": 0.10721969604492188, "step": 872 }, { "epoch": 0.12164704243015398, "grad_norm": 2.3991870880126953, "learning_rate": 9.928187159958764e-06, "loss": 0.10540294647216797, "step": 873 }, { "epoch": 0.12178638612136836, "grad_norm": 1.607142686843872, "learning_rate": 9.927788610055875e-06, "loss": 0.13491344451904297, "step": 874 }, { "epoch": 0.12192572981258273, "grad_norm": 2.9370779991149902, "learning_rate": 9.92738896531086e-06, "loss": 0.09739875793457031, "step": 875 }, { "epoch": 0.12206507350379711, "grad_norm": 2.103093385696411, "learning_rate": 9.926988225812511e-06, "loss": 0.13292503356933594, "step": 876 }, { "epoch": 0.1222044171950115, "grad_norm": 0.8254170417785645, "learning_rate": 9.926586391649863e-06, "loss": 0.06249713897705078, "step": 877 }, { "epoch": 0.12234376088622588, "grad_norm": 2.188608169555664, "learning_rate": 9.926183462912196e-06, "loss": 0.09144973754882812, "step": 878 }, { "epoch": 0.12248310457744026, "grad_norm": 1.0848872661590576, "learning_rate": 9.925779439689028e-06, "loss": 0.11252593994140625, "step": 879 }, { "epoch": 0.12262244826865464, "grad_norm": 3.2954211235046387, "learning_rate": 9.925374322070126e-06, "loss": 0.10610389709472656, "step": 880 }, { "epoch": 0.12276179195986901, "grad_norm": 2.800495147705078, "learning_rate": 9.9249681101455e-06, "loss": 0.09205436706542969, "step": 881 }, { "epoch": 0.1229011356510834, "grad_norm": 1.2207145690917969, "learning_rate": 9.924560804005397e-06, "loss": 0.09731483459472656, "step": 882 }, { "epoch": 0.12304047934229778, "grad_norm": 2.171149730682373, "learning_rate": 9.924152403740315e-06, "loss": 0.11501264572143555, "step": 883 }, { "epoch": 0.12317982303351216, "grad_norm": 4.047091484069824, "learning_rate": 9.923742909440987e-06, "loss": 0.11170196533203125, "step": 884 }, { "epoch": 0.12331916672472654, "grad_norm": 2.7806410789489746, "learning_rate": 9.923332321198396e-06, "loss": 0.13657569885253906, "step": 885 }, { "epoch": 0.12345851041594091, "grad_norm": 2.3237364292144775, "learning_rate": 9.922920639103766e-06, "loss": 0.11185455322265625, "step": 886 }, { "epoch": 0.12359785410715529, "grad_norm": 2.277559280395508, "learning_rate": 9.92250786324856e-06, "loss": 0.11772823333740234, "step": 887 }, { "epoch": 0.12373719779836968, "grad_norm": 2.9791419506073, "learning_rate": 9.922093993724492e-06, "loss": 0.09569835662841797, "step": 888 }, { "epoch": 0.12387654148958406, "grad_norm": 0.9989010691642761, "learning_rate": 9.92167903062351e-06, "loss": 0.0996694564819336, "step": 889 }, { "epoch": 0.12401588518079844, "grad_norm": 1.330994963645935, "learning_rate": 9.921262974037813e-06, "loss": 0.09036445617675781, "step": 890 }, { "epoch": 0.12415522887201282, "grad_norm": 1.1724978685379028, "learning_rate": 9.920845824059836e-06, "loss": 0.0846710205078125, "step": 891 }, { "epoch": 0.12429457256322719, "grad_norm": 1.461248517036438, "learning_rate": 9.920427580782263e-06, "loss": 0.12006187438964844, "step": 892 }, { "epoch": 0.12443391625444158, "grad_norm": 1.066603422164917, "learning_rate": 9.920008244298016e-06, "loss": 0.09593009948730469, "step": 893 }, { "epoch": 0.12457325994565596, "grad_norm": 0.7315662503242493, "learning_rate": 9.919587814700262e-06, "loss": 0.08841228485107422, "step": 894 }, { "epoch": 0.12471260363687034, "grad_norm": 1.908085823059082, "learning_rate": 9.919166292082414e-06, "loss": 0.1264781951904297, "step": 895 }, { "epoch": 0.12485194732808472, "grad_norm": 1.3838165998458862, "learning_rate": 9.91874367653812e-06, "loss": 0.10321044921875, "step": 896 }, { "epoch": 0.1249912910192991, "grad_norm": 2.3532509803771973, "learning_rate": 9.91831996816128e-06, "loss": 0.10271453857421875, "step": 897 }, { "epoch": 0.12513063471051347, "grad_norm": 2.2254114151000977, "learning_rate": 9.917895167046027e-06, "loss": 0.09473991394042969, "step": 898 }, { "epoch": 0.12526997840172785, "grad_norm": 1.4791496992111206, "learning_rate": 9.917469273286749e-06, "loss": 0.11015605926513672, "step": 899 }, { "epoch": 0.12540932209294225, "grad_norm": 2.9317569732666016, "learning_rate": 9.917042286978064e-06, "loss": 0.08510017395019531, "step": 900 }, { "epoch": 0.12554866578415663, "grad_norm": 2.5378856658935547, "learning_rate": 9.916614208214841e-06, "loss": 0.11310958862304688, "step": 901 }, { "epoch": 0.125688009475371, "grad_norm": 1.240079402923584, "learning_rate": 9.91618503709219e-06, "loss": 0.08706092834472656, "step": 902 }, { "epoch": 0.1258273531665854, "grad_norm": 6.722053050994873, "learning_rate": 9.915754773705461e-06, "loss": 0.1479034423828125, "step": 903 }, { "epoch": 0.12596669685779976, "grad_norm": 2.2076432704925537, "learning_rate": 9.915323418150252e-06, "loss": 0.10244369506835938, "step": 904 }, { "epoch": 0.12610604054901414, "grad_norm": 2.1214945316314697, "learning_rate": 9.914890970522397e-06, "loss": 0.09888076782226562, "step": 905 }, { "epoch": 0.12624538424022852, "grad_norm": 1.2141368389129639, "learning_rate": 9.914457430917977e-06, "loss": 0.11794090270996094, "step": 906 }, { "epoch": 0.1263847279314429, "grad_norm": 4.023681640625, "learning_rate": 9.914022799433315e-06, "loss": 0.12688064575195312, "step": 907 }, { "epoch": 0.12652407162265727, "grad_norm": 2.6025023460388184, "learning_rate": 9.913587076164976e-06, "loss": 0.10251045227050781, "step": 908 }, { "epoch": 0.12666341531387165, "grad_norm": 1.4886006116867065, "learning_rate": 9.913150261209767e-06, "loss": 0.10257434844970703, "step": 909 }, { "epoch": 0.12680275900508606, "grad_norm": 3.050658941268921, "learning_rate": 9.91271235466474e-06, "loss": 0.10079383850097656, "step": 910 }, { "epoch": 0.12694210269630044, "grad_norm": 3.482774257659912, "learning_rate": 9.912273356627188e-06, "loss": 0.09300041198730469, "step": 911 }, { "epoch": 0.1270814463875148, "grad_norm": 3.427107810974121, "learning_rate": 9.911833267194643e-06, "loss": 0.13120269775390625, "step": 912 }, { "epoch": 0.1272207900787292, "grad_norm": 2.005823850631714, "learning_rate": 9.911392086464886e-06, "loss": 0.07969284057617188, "step": 913 }, { "epoch": 0.12736013376994357, "grad_norm": 2.4347689151763916, "learning_rate": 9.910949814535936e-06, "loss": 0.10712432861328125, "step": 914 }, { "epoch": 0.12749947746115795, "grad_norm": 1.4215284585952759, "learning_rate": 9.910506451506056e-06, "loss": 0.09469795227050781, "step": 915 }, { "epoch": 0.12763882115237232, "grad_norm": 2.050304651260376, "learning_rate": 9.910061997473753e-06, "loss": 0.0709218978881836, "step": 916 }, { "epoch": 0.1277781648435867, "grad_norm": 1.9227895736694336, "learning_rate": 9.909616452537772e-06, "loss": 0.06928634643554688, "step": 917 }, { "epoch": 0.12791750853480108, "grad_norm": 4.7330803871154785, "learning_rate": 9.909169816797102e-06, "loss": 0.10946273803710938, "step": 918 }, { "epoch": 0.12805685222601546, "grad_norm": 5.912502288818359, "learning_rate": 9.908722090350979e-06, "loss": 0.10862922668457031, "step": 919 }, { "epoch": 0.12819619591722986, "grad_norm": 0.8719412684440613, "learning_rate": 9.908273273298874e-06, "loss": 0.08774375915527344, "step": 920 }, { "epoch": 0.12833553960844424, "grad_norm": 2.1609606742858887, "learning_rate": 9.907823365740507e-06, "loss": 0.09725570678710938, "step": 921 }, { "epoch": 0.12847488329965862, "grad_norm": 2.1488595008850098, "learning_rate": 9.907372367775834e-06, "loss": 0.09234619140625, "step": 922 }, { "epoch": 0.128614226990873, "grad_norm": 0.8687872886657715, "learning_rate": 9.906920279505058e-06, "loss": 0.08239555358886719, "step": 923 }, { "epoch": 0.12875357068208737, "grad_norm": 3.681856155395508, "learning_rate": 9.906467101028625e-06, "loss": 0.15980148315429688, "step": 924 }, { "epoch": 0.12889291437330175, "grad_norm": 1.6489996910095215, "learning_rate": 9.906012832447219e-06, "loss": 0.12435054779052734, "step": 925 }, { "epoch": 0.12903225806451613, "grad_norm": 1.0700832605361938, "learning_rate": 9.905557473861764e-06, "loss": 0.09510612487792969, "step": 926 }, { "epoch": 0.1291716017557305, "grad_norm": 1.9759401082992554, "learning_rate": 9.905101025373438e-06, "loss": 0.12375831604003906, "step": 927 }, { "epoch": 0.12931094544694488, "grad_norm": 1.10135817527771, "learning_rate": 9.904643487083648e-06, "loss": 0.09514236450195312, "step": 928 }, { "epoch": 0.12945028913815926, "grad_norm": 1.2697397470474243, "learning_rate": 9.90418485909405e-06, "loss": 0.08399772644042969, "step": 929 }, { "epoch": 0.12958963282937366, "grad_norm": 0.9644181132316589, "learning_rate": 9.903725141506539e-06, "loss": 0.0878305435180664, "step": 930 }, { "epoch": 0.12972897652058804, "grad_norm": 1.1840234994888306, "learning_rate": 9.903264334423258e-06, "loss": 0.090576171875, "step": 931 }, { "epoch": 0.12986832021180242, "grad_norm": 2.3120014667510986, "learning_rate": 9.902802437946584e-06, "loss": 0.09244346618652344, "step": 932 }, { "epoch": 0.1300076639030168, "grad_norm": 2.6095006465911865, "learning_rate": 9.902339452179142e-06, "loss": 0.1238250732421875, "step": 933 }, { "epoch": 0.13014700759423117, "grad_norm": 2.838391065597534, "learning_rate": 9.901875377223796e-06, "loss": 0.1380748748779297, "step": 934 }, { "epoch": 0.13028635128544555, "grad_norm": 1.6304290294647217, "learning_rate": 9.901410213183653e-06, "loss": 0.09809398651123047, "step": 935 }, { "epoch": 0.13042569497665993, "grad_norm": 1.3722645044326782, "learning_rate": 9.900943960162061e-06, "loss": 0.15543174743652344, "step": 936 }, { "epoch": 0.1305650386678743, "grad_norm": 0.9374591112136841, "learning_rate": 9.900476618262612e-06, "loss": 0.09188127517700195, "step": 937 }, { "epoch": 0.13070438235908868, "grad_norm": 1.1715962886810303, "learning_rate": 9.900008187589138e-06, "loss": 0.08965682983398438, "step": 938 }, { "epoch": 0.13084372605030306, "grad_norm": 1.601389765739441, "learning_rate": 9.899538668245713e-06, "loss": 0.07994842529296875, "step": 939 }, { "epoch": 0.13098306974151747, "grad_norm": 1.0653841495513916, "learning_rate": 9.899068060336656e-06, "loss": 0.09381103515625, "step": 940 }, { "epoch": 0.13112241343273184, "grad_norm": 0.9559437036514282, "learning_rate": 9.898596363966523e-06, "loss": 0.09891700744628906, "step": 941 }, { "epoch": 0.13126175712394622, "grad_norm": 1.1253247261047363, "learning_rate": 9.898123579240115e-06, "loss": 0.08476829528808594, "step": 942 }, { "epoch": 0.1314011008151606, "grad_norm": 0.9708438515663147, "learning_rate": 9.897649706262474e-06, "loss": 0.0862436294555664, "step": 943 }, { "epoch": 0.13154044450637498, "grad_norm": 1.8625317811965942, "learning_rate": 9.897174745138883e-06, "loss": 0.10697174072265625, "step": 944 }, { "epoch": 0.13167978819758935, "grad_norm": 1.5819621086120605, "learning_rate": 9.896698695974866e-06, "loss": 0.09056377410888672, "step": 945 }, { "epoch": 0.13181913188880373, "grad_norm": 2.2445919513702393, "learning_rate": 9.896221558876195e-06, "loss": 0.09499835968017578, "step": 946 }, { "epoch": 0.1319584755800181, "grad_norm": 1.1750365495681763, "learning_rate": 9.895743333948875e-06, "loss": 0.08951663970947266, "step": 947 }, { "epoch": 0.1320978192712325, "grad_norm": 1.7636520862579346, "learning_rate": 9.895264021299158e-06, "loss": 0.09051132202148438, "step": 948 }, { "epoch": 0.13223716296244686, "grad_norm": 3.1230621337890625, "learning_rate": 9.894783621033538e-06, "loss": 0.11138534545898438, "step": 949 }, { "epoch": 0.13237650665366127, "grad_norm": 6.293827533721924, "learning_rate": 9.894302133258747e-06, "loss": 0.1668529510498047, "step": 950 }, { "epoch": 0.13251585034487565, "grad_norm": 1.156690001487732, "learning_rate": 9.893819558081759e-06, "loss": 0.08439254760742188, "step": 951 }, { "epoch": 0.13265519403609002, "grad_norm": 1.979092001914978, "learning_rate": 9.893335895609792e-06, "loss": 0.12440109252929688, "step": 952 }, { "epoch": 0.1327945377273044, "grad_norm": 2.5189456939697266, "learning_rate": 9.892851145950308e-06, "loss": 0.1226205825805664, "step": 953 }, { "epoch": 0.13293388141851878, "grad_norm": 1.9200191497802734, "learning_rate": 9.892365309211005e-06, "loss": 0.0992889404296875, "step": 954 }, { "epoch": 0.13307322510973316, "grad_norm": 1.197752594947815, "learning_rate": 9.891878385499825e-06, "loss": 0.10314369201660156, "step": 955 }, { "epoch": 0.13321256880094753, "grad_norm": 1.3519290685653687, "learning_rate": 9.891390374924949e-06, "loss": 0.08767497539520264, "step": 956 }, { "epoch": 0.1333519124921619, "grad_norm": 2.1944072246551514, "learning_rate": 9.890901277594806e-06, "loss": 0.12186622619628906, "step": 957 }, { "epoch": 0.1334912561833763, "grad_norm": 1.4970351457595825, "learning_rate": 9.89041109361806e-06, "loss": 0.08492851257324219, "step": 958 }, { "epoch": 0.13363059987459067, "grad_norm": 0.8175457715988159, "learning_rate": 9.889919823103618e-06, "loss": 0.08107948303222656, "step": 959 }, { "epoch": 0.13376994356580507, "grad_norm": 1.8443547487258911, "learning_rate": 9.889427466160633e-06, "loss": 0.13904571533203125, "step": 960 }, { "epoch": 0.13390928725701945, "grad_norm": 2.570413827896118, "learning_rate": 9.888934022898488e-06, "loss": 0.09127473831176758, "step": 961 }, { "epoch": 0.13404863094823383, "grad_norm": 2.6893527507781982, "learning_rate": 9.888439493426824e-06, "loss": 0.09501457214355469, "step": 962 }, { "epoch": 0.1341879746394482, "grad_norm": 2.9003145694732666, "learning_rate": 9.887943877855505e-06, "loss": 0.10154533386230469, "step": 963 }, { "epoch": 0.13432731833066258, "grad_norm": 1.579357385635376, "learning_rate": 9.887447176294653e-06, "loss": 0.1083364486694336, "step": 964 }, { "epoch": 0.13446666202187696, "grad_norm": 0.6860966086387634, "learning_rate": 9.88694938885462e-06, "loss": 0.08471393585205078, "step": 965 }, { "epoch": 0.13460600571309134, "grad_norm": 2.3246982097625732, "learning_rate": 9.886450515646005e-06, "loss": 0.11292266845703125, "step": 966 }, { "epoch": 0.13474534940430571, "grad_norm": 0.9757624864578247, "learning_rate": 9.885950556779644e-06, "loss": 0.08849906921386719, "step": 967 }, { "epoch": 0.1348846930955201, "grad_norm": 2.01823353767395, "learning_rate": 9.885449512366617e-06, "loss": 0.13010311126708984, "step": 968 }, { "epoch": 0.13502403678673447, "grad_norm": 2.2149226665496826, "learning_rate": 9.884947382518247e-06, "loss": 0.10741519927978516, "step": 969 }, { "epoch": 0.13516338047794887, "grad_norm": 1.9280140399932861, "learning_rate": 9.88444416734609e-06, "loss": 0.08994865417480469, "step": 970 }, { "epoch": 0.13530272416916325, "grad_norm": 1.7576361894607544, "learning_rate": 9.883939866961956e-06, "loss": 0.09931182861328125, "step": 971 }, { "epoch": 0.13544206786037763, "grad_norm": 3.7847235202789307, "learning_rate": 9.883434481477885e-06, "loss": 0.11445808410644531, "step": 972 }, { "epoch": 0.135581411551592, "grad_norm": 2.3599109649658203, "learning_rate": 9.882928011006163e-06, "loss": 0.108551025390625, "step": 973 }, { "epoch": 0.13572075524280638, "grad_norm": 1.985635757446289, "learning_rate": 9.882420455659316e-06, "loss": 0.09661102294921875, "step": 974 }, { "epoch": 0.13586009893402076, "grad_norm": 1.132599115371704, "learning_rate": 9.881911815550111e-06, "loss": 0.0975656509399414, "step": 975 }, { "epoch": 0.13599944262523514, "grad_norm": 1.0557219982147217, "learning_rate": 9.881402090791556e-06, "loss": 0.09927558898925781, "step": 976 }, { "epoch": 0.13613878631644952, "grad_norm": 0.8597046732902527, "learning_rate": 9.880891281496901e-06, "loss": 0.10744285583496094, "step": 977 }, { "epoch": 0.1362781300076639, "grad_norm": 2.573059558868408, "learning_rate": 9.880379387779637e-06, "loss": 0.08208274841308594, "step": 978 }, { "epoch": 0.13641747369887827, "grad_norm": 3.9510960578918457, "learning_rate": 9.879866409753493e-06, "loss": 0.10420989990234375, "step": 979 }, { "epoch": 0.13655681739009268, "grad_norm": 1.4629656076431274, "learning_rate": 9.879352347532442e-06, "loss": 0.09867572784423828, "step": 980 }, { "epoch": 0.13669616108130705, "grad_norm": 0.9018091559410095, "learning_rate": 9.878837201230697e-06, "loss": 0.0873880386352539, "step": 981 }, { "epoch": 0.13683550477252143, "grad_norm": 0.9711273312568665, "learning_rate": 9.878320970962712e-06, "loss": 0.10496807098388672, "step": 982 }, { "epoch": 0.1369748484637358, "grad_norm": 1.4927395582199097, "learning_rate": 9.877803656843182e-06, "loss": 0.08838272094726562, "step": 983 }, { "epoch": 0.1371141921549502, "grad_norm": 2.2258317470550537, "learning_rate": 9.877285258987039e-06, "loss": 0.12379169464111328, "step": 984 }, { "epoch": 0.13725353584616456, "grad_norm": 0.8857151865959167, "learning_rate": 9.876765777509463e-06, "loss": 0.08512687683105469, "step": 985 }, { "epoch": 0.13739287953737894, "grad_norm": 1.3888643980026245, "learning_rate": 9.87624521252587e-06, "loss": 0.1025543212890625, "step": 986 }, { "epoch": 0.13753222322859332, "grad_norm": 2.877152442932129, "learning_rate": 9.875723564151918e-06, "loss": 0.12361812591552734, "step": 987 }, { "epoch": 0.1376715669198077, "grad_norm": 2.308096408843994, "learning_rate": 9.875200832503505e-06, "loss": 0.09977912902832031, "step": 988 }, { "epoch": 0.13781091061102207, "grad_norm": 2.170861005783081, "learning_rate": 9.874677017696769e-06, "loss": 0.0976858139038086, "step": 989 }, { "epoch": 0.13795025430223645, "grad_norm": 0.8470692038536072, "learning_rate": 9.87415211984809e-06, "loss": 0.08013916015625, "step": 990 }, { "epoch": 0.13808959799345086, "grad_norm": 2.0779428482055664, "learning_rate": 9.873626139074088e-06, "loss": 0.12930870056152344, "step": 991 }, { "epoch": 0.13822894168466524, "grad_norm": 1.8243467807769775, "learning_rate": 9.873099075491626e-06, "loss": 0.10002326965332031, "step": 992 }, { "epoch": 0.1383682853758796, "grad_norm": 1.165392279624939, "learning_rate": 9.872570929217804e-06, "loss": 0.07743263244628906, "step": 993 }, { "epoch": 0.138507629067094, "grad_norm": 1.4380844831466675, "learning_rate": 9.872041700369965e-06, "loss": 0.101318359375, "step": 994 }, { "epoch": 0.13864697275830837, "grad_norm": 1.5015668869018555, "learning_rate": 9.871511389065689e-06, "loss": 0.09948539733886719, "step": 995 }, { "epoch": 0.13878631644952275, "grad_norm": 1.3710381984710693, "learning_rate": 9.870979995422803e-06, "loss": 0.09816551208496094, "step": 996 }, { "epoch": 0.13892566014073712, "grad_norm": 1.7848713397979736, "learning_rate": 9.870447519559366e-06, "loss": 0.08720970153808594, "step": 997 }, { "epoch": 0.1390650038319515, "grad_norm": 2.321362018585205, "learning_rate": 9.869913961593685e-06, "loss": 0.088470458984375, "step": 998 }, { "epoch": 0.13920434752316588, "grad_norm": 2.2728757858276367, "learning_rate": 9.869379321644306e-06, "loss": 0.10360431671142578, "step": 999 }, { "epoch": 0.13934369121438026, "grad_norm": 2.0530247688293457, "learning_rate": 9.868843599830009e-06, "loss": 0.12275218963623047, "step": 1000 }, { "epoch": 0.13948303490559466, "grad_norm": 1.7765544652938843, "learning_rate": 9.868306796269822e-06, "loss": 0.10804939270019531, "step": 1001 }, { "epoch": 0.13962237859680904, "grad_norm": 2.986790895462036, "learning_rate": 9.86776891108301e-06, "loss": 0.09539222717285156, "step": 1002 }, { "epoch": 0.13976172228802342, "grad_norm": 2.6739466190338135, "learning_rate": 9.86722994438908e-06, "loss": 0.11291885375976562, "step": 1003 }, { "epoch": 0.1399010659792378, "grad_norm": 0.9738577008247375, "learning_rate": 9.866689896307778e-06, "loss": 0.09045600891113281, "step": 1004 }, { "epoch": 0.14004040967045217, "grad_norm": 2.473088026046753, "learning_rate": 9.866148766959087e-06, "loss": 0.08867979049682617, "step": 1005 }, { "epoch": 0.14017975336166655, "grad_norm": 2.1247072219848633, "learning_rate": 9.865606556463239e-06, "loss": 0.08902978897094727, "step": 1006 }, { "epoch": 0.14031909705288093, "grad_norm": 1.6665223836898804, "learning_rate": 9.865063264940695e-06, "loss": 0.08212661743164062, "step": 1007 }, { "epoch": 0.1404584407440953, "grad_norm": 0.7444074153900146, "learning_rate": 9.864518892512167e-06, "loss": 0.09097671508789062, "step": 1008 }, { "epoch": 0.14059778443530968, "grad_norm": 2.55668568611145, "learning_rate": 9.863973439298597e-06, "loss": 0.11830615997314453, "step": 1009 }, { "epoch": 0.14073712812652406, "grad_norm": 1.4433138370513916, "learning_rate": 9.863426905421179e-06, "loss": 0.10504722595214844, "step": 1010 }, { "epoch": 0.14087647181773846, "grad_norm": 1.2762213945388794, "learning_rate": 9.862879291001334e-06, "loss": 0.08287811279296875, "step": 1011 }, { "epoch": 0.14101581550895284, "grad_norm": 1.9599277973175049, "learning_rate": 9.862330596160732e-06, "loss": 0.1390819549560547, "step": 1012 }, { "epoch": 0.14115515920016722, "grad_norm": 0.8300939202308655, "learning_rate": 9.861780821021282e-06, "loss": 0.09241104125976562, "step": 1013 }, { "epoch": 0.1412945028913816, "grad_norm": 0.6963160037994385, "learning_rate": 9.861229965705129e-06, "loss": 0.07754135131835938, "step": 1014 }, { "epoch": 0.14143384658259597, "grad_norm": 3.505854368209839, "learning_rate": 9.86067803033466e-06, "loss": 0.11487197875976562, "step": 1015 }, { "epoch": 0.14157319027381035, "grad_norm": 2.8542983531951904, "learning_rate": 9.860125015032506e-06, "loss": 0.09474563598632812, "step": 1016 }, { "epoch": 0.14171253396502473, "grad_norm": 1.5732182264328003, "learning_rate": 9.859570919921533e-06, "loss": 0.09020805358886719, "step": 1017 }, { "epoch": 0.1418518776562391, "grad_norm": 1.3614387512207031, "learning_rate": 9.859015745124844e-06, "loss": 0.08576202392578125, "step": 1018 }, { "epoch": 0.14199122134745348, "grad_norm": 1.8020273447036743, "learning_rate": 9.858459490765792e-06, "loss": 0.10387706756591797, "step": 1019 }, { "epoch": 0.14213056503866786, "grad_norm": 1.8021730184555054, "learning_rate": 9.857902156967961e-06, "loss": 0.09251976013183594, "step": 1020 }, { "epoch": 0.14226990872988227, "grad_norm": 1.404181957244873, "learning_rate": 9.857343743855178e-06, "loss": 0.09272956848144531, "step": 1021 }, { "epoch": 0.14240925242109664, "grad_norm": 1.5663177967071533, "learning_rate": 9.856784251551512e-06, "loss": 0.10237884521484375, "step": 1022 }, { "epoch": 0.14254859611231102, "grad_norm": 1.6190791130065918, "learning_rate": 9.856223680181267e-06, "loss": 0.09511089324951172, "step": 1023 }, { "epoch": 0.1426879398035254, "grad_norm": 2.3458120822906494, "learning_rate": 9.85566202986899e-06, "loss": 0.12595748901367188, "step": 1024 }, { "epoch": 0.14282728349473978, "grad_norm": 1.028939962387085, "learning_rate": 9.855099300739463e-06, "loss": 0.09689712524414062, "step": 1025 }, { "epoch": 0.14296662718595415, "grad_norm": 0.9270734786987305, "learning_rate": 9.854535492917718e-06, "loss": 0.08349037170410156, "step": 1026 }, { "epoch": 0.14310597087716853, "grad_norm": 0.318574994802475, "learning_rate": 9.853970606529018e-06, "loss": 0.06051063537597656, "step": 1027 }, { "epoch": 0.1432453145683829, "grad_norm": 0.8513997197151184, "learning_rate": 9.853404641698866e-06, "loss": 0.08884906768798828, "step": 1028 }, { "epoch": 0.1433846582595973, "grad_norm": 1.7568845748901367, "learning_rate": 9.85283759855301e-06, "loss": 0.09932541847229004, "step": 1029 }, { "epoch": 0.14352400195081166, "grad_norm": 1.5457407236099243, "learning_rate": 9.852269477217428e-06, "loss": 0.10272502899169922, "step": 1030 }, { "epoch": 0.14366334564202607, "grad_norm": 1.3917511701583862, "learning_rate": 9.85170027781835e-06, "loss": 0.09850120544433594, "step": 1031 }, { "epoch": 0.14380268933324045, "grad_norm": 1.3188990354537964, "learning_rate": 9.851130000482236e-06, "loss": 0.08572006225585938, "step": 1032 }, { "epoch": 0.14394203302445482, "grad_norm": 1.2815016508102417, "learning_rate": 9.85055864533579e-06, "loss": 0.09262847900390625, "step": 1033 }, { "epoch": 0.1440813767156692, "grad_norm": 2.8906948566436768, "learning_rate": 9.849986212505952e-06, "loss": 0.10932445526123047, "step": 1034 }, { "epoch": 0.14422072040688358, "grad_norm": 1.4570984840393066, "learning_rate": 9.849412702119905e-06, "loss": 0.0787801742553711, "step": 1035 }, { "epoch": 0.14436006409809796, "grad_norm": 1.8087422847747803, "learning_rate": 9.848838114305069e-06, "loss": 0.09499931335449219, "step": 1036 }, { "epoch": 0.14449940778931233, "grad_norm": 1.511576771736145, "learning_rate": 9.848262449189105e-06, "loss": 0.10103797912597656, "step": 1037 }, { "epoch": 0.1446387514805267, "grad_norm": 1.8907912969589233, "learning_rate": 9.847685706899913e-06, "loss": 0.10245132446289062, "step": 1038 }, { "epoch": 0.1447780951717411, "grad_norm": 1.0099728107452393, "learning_rate": 9.84710788756563e-06, "loss": 0.09464168548583984, "step": 1039 }, { "epoch": 0.14491743886295547, "grad_norm": 0.9124287962913513, "learning_rate": 9.846528991314638e-06, "loss": 0.09028816223144531, "step": 1040 }, { "epoch": 0.14505678255416987, "grad_norm": 1.342206597328186, "learning_rate": 9.845949018275551e-06, "loss": 0.09596061706542969, "step": 1041 }, { "epoch": 0.14519612624538425, "grad_norm": 1.1664304733276367, "learning_rate": 9.845367968577229e-06, "loss": 0.10447883605957031, "step": 1042 }, { "epoch": 0.14533546993659863, "grad_norm": 0.9638790488243103, "learning_rate": 9.844785842348764e-06, "loss": 0.09147167205810547, "step": 1043 }, { "epoch": 0.145474813627813, "grad_norm": 1.9991483688354492, "learning_rate": 9.844202639719492e-06, "loss": 0.1307373046875, "step": 1044 }, { "epoch": 0.14561415731902738, "grad_norm": 1.3466566801071167, "learning_rate": 9.84361836081899e-06, "loss": 0.09374427795410156, "step": 1045 }, { "epoch": 0.14575350101024176, "grad_norm": 2.6291580200195312, "learning_rate": 9.84303300577707e-06, "loss": 0.1125030517578125, "step": 1046 }, { "epoch": 0.14589284470145614, "grad_norm": 1.2104741334915161, "learning_rate": 9.842446574723786e-06, "loss": 0.07867431640625, "step": 1047 }, { "epoch": 0.14603218839267051, "grad_norm": 1.317328929901123, "learning_rate": 9.841859067789425e-06, "loss": 0.09706497192382812, "step": 1048 }, { "epoch": 0.1461715320838849, "grad_norm": 2.3094091415405273, "learning_rate": 9.841270485104522e-06, "loss": 0.11441230773925781, "step": 1049 }, { "epoch": 0.14631087577509927, "grad_norm": 0.5083723664283752, "learning_rate": 9.840680826799845e-06, "loss": 0.06562423706054688, "step": 1050 }, { "epoch": 0.14645021946631367, "grad_norm": 1.1528676748275757, "learning_rate": 9.840090093006403e-06, "loss": 0.09754371643066406, "step": 1051 }, { "epoch": 0.14658956315752805, "grad_norm": 0.8301803469657898, "learning_rate": 9.839498283855444e-06, "loss": 0.1009521484375, "step": 1052 }, { "epoch": 0.14672890684874243, "grad_norm": 0.8880504369735718, "learning_rate": 9.838905399478453e-06, "loss": 0.08307218551635742, "step": 1053 }, { "epoch": 0.1468682505399568, "grad_norm": 0.7160991430282593, "learning_rate": 9.838311440007159e-06, "loss": 0.0817108154296875, "step": 1054 }, { "epoch": 0.14700759423117118, "grad_norm": 1.7525385618209839, "learning_rate": 9.83771640557352e-06, "loss": 0.08893203735351562, "step": 1055 }, { "epoch": 0.14714693792238556, "grad_norm": 1.5397742986679077, "learning_rate": 9.837120296309744e-06, "loss": 0.15022659301757812, "step": 1056 }, { "epoch": 0.14728628161359994, "grad_norm": 1.221344232559204, "learning_rate": 9.836523112348271e-06, "loss": 0.09613990783691406, "step": 1057 }, { "epoch": 0.14742562530481432, "grad_norm": 1.0916364192962646, "learning_rate": 9.835924853821783e-06, "loss": 0.0890035629272461, "step": 1058 }, { "epoch": 0.1475649689960287, "grad_norm": 1.6087307929992676, "learning_rate": 9.8353255208632e-06, "loss": 0.09650993347167969, "step": 1059 }, { "epoch": 0.14770431268724307, "grad_norm": 3.149857997894287, "learning_rate": 9.834725113605676e-06, "loss": 0.12077140808105469, "step": 1060 }, { "epoch": 0.14784365637845748, "grad_norm": 0.8253120183944702, "learning_rate": 9.83412363218261e-06, "loss": 0.078460693359375, "step": 1061 }, { "epoch": 0.14798300006967186, "grad_norm": 0.9364370107650757, "learning_rate": 9.833521076727638e-06, "loss": 0.08872699737548828, "step": 1062 }, { "epoch": 0.14812234376088623, "grad_norm": 0.8260170221328735, "learning_rate": 9.832917447374637e-06, "loss": 0.10431861877441406, "step": 1063 }, { "epoch": 0.1482616874521006, "grad_norm": 2.3340303897857666, "learning_rate": 9.832312744257715e-06, "loss": 0.10474205017089844, "step": 1064 }, { "epoch": 0.148401031143315, "grad_norm": 1.3082389831542969, "learning_rate": 9.831706967511223e-06, "loss": 0.11061859130859375, "step": 1065 }, { "epoch": 0.14854037483452937, "grad_norm": 0.961006224155426, "learning_rate": 9.831100117269755e-06, "loss": 0.11346149444580078, "step": 1066 }, { "epoch": 0.14867971852574374, "grad_norm": 0.873349666595459, "learning_rate": 9.830492193668135e-06, "loss": 0.07535266876220703, "step": 1067 }, { "epoch": 0.14881906221695812, "grad_norm": 1.0029617547988892, "learning_rate": 9.829883196841433e-06, "loss": 0.09860658645629883, "step": 1068 }, { "epoch": 0.1489584059081725, "grad_norm": 2.085698366165161, "learning_rate": 9.829273126924952e-06, "loss": 0.10308265686035156, "step": 1069 }, { "epoch": 0.14909774959938688, "grad_norm": 1.324282169342041, "learning_rate": 9.828661984054238e-06, "loss": 0.10731792449951172, "step": 1070 }, { "epoch": 0.14923709329060128, "grad_norm": 1.0287165641784668, "learning_rate": 9.82804976836507e-06, "loss": 0.08272838592529297, "step": 1071 }, { "epoch": 0.14937643698181566, "grad_norm": 1.1989431381225586, "learning_rate": 9.827436479993468e-06, "loss": 0.09537506103515625, "step": 1072 }, { "epoch": 0.14951578067303004, "grad_norm": 1.2215065956115723, "learning_rate": 9.826822119075694e-06, "loss": 0.07261276245117188, "step": 1073 }, { "epoch": 0.1496551243642444, "grad_norm": 0.9809848070144653, "learning_rate": 9.826206685748242e-06, "loss": 0.1004037857055664, "step": 1074 }, { "epoch": 0.1497944680554588, "grad_norm": 0.6262859106063843, "learning_rate": 9.825590180147852e-06, "loss": 0.07096099853515625, "step": 1075 }, { "epoch": 0.14993381174667317, "grad_norm": 1.0547878742218018, "learning_rate": 9.82497260241149e-06, "loss": 0.08783149719238281, "step": 1076 }, { "epoch": 0.15007315543788755, "grad_norm": 1.7103930711746216, "learning_rate": 9.824353952676375e-06, "loss": 0.1035909652709961, "step": 1077 }, { "epoch": 0.15021249912910192, "grad_norm": 1.2512211799621582, "learning_rate": 9.823734231079953e-06, "loss": 0.07993412017822266, "step": 1078 }, { "epoch": 0.1503518428203163, "grad_norm": 1.5053097009658813, "learning_rate": 9.823113437759912e-06, "loss": 0.09385490417480469, "step": 1079 }, { "epoch": 0.15049118651153068, "grad_norm": 1.5308281183242798, "learning_rate": 9.822491572854178e-06, "loss": 0.11551475524902344, "step": 1080 }, { "epoch": 0.15063053020274508, "grad_norm": 1.108298897743225, "learning_rate": 9.821868636500917e-06, "loss": 0.09919357299804688, "step": 1081 }, { "epoch": 0.15076987389395946, "grad_norm": 1.354781985282898, "learning_rate": 9.82124462883853e-06, "loss": 0.084136962890625, "step": 1082 }, { "epoch": 0.15090921758517384, "grad_norm": 1.8322592973709106, "learning_rate": 9.820619550005656e-06, "loss": 0.109405517578125, "step": 1083 }, { "epoch": 0.15104856127638822, "grad_norm": 0.8529441952705383, "learning_rate": 9.819993400141176e-06, "loss": 0.0920553207397461, "step": 1084 }, { "epoch": 0.1511879049676026, "grad_norm": 1.7789256572723389, "learning_rate": 9.819366179384204e-06, "loss": 0.10103511810302734, "step": 1085 }, { "epoch": 0.15132724865881697, "grad_norm": 1.597623586654663, "learning_rate": 9.818737887874097e-06, "loss": 0.0894937515258789, "step": 1086 }, { "epoch": 0.15146659235003135, "grad_norm": 2.048753261566162, "learning_rate": 9.818108525750442e-06, "loss": 0.0994119644165039, "step": 1087 }, { "epoch": 0.15160593604124573, "grad_norm": 2.2380776405334473, "learning_rate": 9.817478093153074e-06, "loss": 0.09688472747802734, "step": 1088 }, { "epoch": 0.1517452797324601, "grad_norm": 0.9003255367279053, "learning_rate": 9.816846590222058e-06, "loss": 0.10793304443359375, "step": 1089 }, { "epoch": 0.15188462342367448, "grad_norm": 1.8119484186172485, "learning_rate": 9.8162140170977e-06, "loss": 0.100921630859375, "step": 1090 }, { "epoch": 0.15202396711488889, "grad_norm": 0.9050304293632507, "learning_rate": 9.815580373920543e-06, "loss": 0.09136962890625, "step": 1091 }, { "epoch": 0.15216331080610326, "grad_norm": 1.5289684534072876, "learning_rate": 9.81494566083137e-06, "loss": 0.09210395812988281, "step": 1092 }, { "epoch": 0.15230265449731764, "grad_norm": 0.8332616686820984, "learning_rate": 9.814309877971195e-06, "loss": 0.08760452270507812, "step": 1093 }, { "epoch": 0.15244199818853202, "grad_norm": 0.9780274033546448, "learning_rate": 9.81367302548128e-06, "loss": 0.09468460083007812, "step": 1094 }, { "epoch": 0.1525813418797464, "grad_norm": 0.8144912719726562, "learning_rate": 9.813035103503116e-06, "loss": 0.09545135498046875, "step": 1095 }, { "epoch": 0.15272068557096077, "grad_norm": 1.1840070486068726, "learning_rate": 9.812396112178437e-06, "loss": 0.10716533660888672, "step": 1096 }, { "epoch": 0.15286002926217515, "grad_norm": 1.5225036144256592, "learning_rate": 9.811756051649209e-06, "loss": 0.100555419921875, "step": 1097 }, { "epoch": 0.15299937295338953, "grad_norm": 1.8132117986679077, "learning_rate": 9.811114922057642e-06, "loss": 0.11723899841308594, "step": 1098 }, { "epoch": 0.1531387166446039, "grad_norm": 2.8852267265319824, "learning_rate": 9.810472723546178e-06, "loss": 0.09038352966308594, "step": 1099 }, { "epoch": 0.15327806033581828, "grad_norm": 2.9912641048431396, "learning_rate": 9.8098294562575e-06, "loss": 0.08743953704833984, "step": 1100 }, { "epoch": 0.1534174040270327, "grad_norm": 1.5471612215042114, "learning_rate": 9.809185120334528e-06, "loss": 0.10645580291748047, "step": 1101 }, { "epoch": 0.15355674771824707, "grad_norm": 1.680764079093933, "learning_rate": 9.808539715920415e-06, "loss": 0.10923004150390625, "step": 1102 }, { "epoch": 0.15369609140946144, "grad_norm": 0.7955313324928284, "learning_rate": 9.807893243158562e-06, "loss": 0.08536720275878906, "step": 1103 }, { "epoch": 0.15383543510067582, "grad_norm": 1.9002324342727661, "learning_rate": 9.807245702192593e-06, "loss": 0.0941619873046875, "step": 1104 }, { "epoch": 0.1539747787918902, "grad_norm": 0.852405309677124, "learning_rate": 9.80659709316638e-06, "loss": 0.08478927612304688, "step": 1105 }, { "epoch": 0.15411412248310458, "grad_norm": 0.8341513276100159, "learning_rate": 9.805947416224034e-06, "loss": 0.08557319641113281, "step": 1106 }, { "epoch": 0.15425346617431895, "grad_norm": 1.8417181968688965, "learning_rate": 9.80529667150989e-06, "loss": 0.11310958862304688, "step": 1107 }, { "epoch": 0.15439280986553333, "grad_norm": 1.1702768802642822, "learning_rate": 9.804644859168534e-06, "loss": 0.0735006332397461, "step": 1108 }, { "epoch": 0.1545321535567477, "grad_norm": 1.1797901391983032, "learning_rate": 9.80399197934478e-06, "loss": 0.09170198440551758, "step": 1109 }, { "epoch": 0.1546714972479621, "grad_norm": 1.266969084739685, "learning_rate": 9.803338032183686e-06, "loss": 0.08622169494628906, "step": 1110 }, { "epoch": 0.1548108409391765, "grad_norm": 2.183901071548462, "learning_rate": 9.802683017830544e-06, "loss": 0.08665847778320312, "step": 1111 }, { "epoch": 0.15495018463039087, "grad_norm": 3.8952810764312744, "learning_rate": 9.802026936430883e-06, "loss": 0.08670806884765625, "step": 1112 }, { "epoch": 0.15508952832160525, "grad_norm": 2.578521728515625, "learning_rate": 9.801369788130468e-06, "loss": 0.07809734344482422, "step": 1113 }, { "epoch": 0.15522887201281962, "grad_norm": 1.2240926027297974, "learning_rate": 9.800711573075303e-06, "loss": 0.09064483642578125, "step": 1114 }, { "epoch": 0.155368215704034, "grad_norm": 1.3361150026321411, "learning_rate": 9.80005229141163e-06, "loss": 0.09966850280761719, "step": 1115 }, { "epoch": 0.15550755939524838, "grad_norm": 0.9794757962226868, "learning_rate": 9.799391943285923e-06, "loss": 0.09842300415039062, "step": 1116 }, { "epoch": 0.15564690308646276, "grad_norm": 0.8577336668968201, "learning_rate": 9.798730528844899e-06, "loss": 0.10710811614990234, "step": 1117 }, { "epoch": 0.15578624677767713, "grad_norm": 1.149190068244934, "learning_rate": 9.79806804823551e-06, "loss": 0.08488845825195312, "step": 1118 }, { "epoch": 0.1559255904688915, "grad_norm": 1.0691388845443726, "learning_rate": 9.79740450160494e-06, "loss": 0.10047721862792969, "step": 1119 }, { "epoch": 0.1560649341601059, "grad_norm": 0.7763535976409912, "learning_rate": 9.796739889100617e-06, "loss": 0.07032394409179688, "step": 1120 }, { "epoch": 0.1562042778513203, "grad_norm": 0.892849862575531, "learning_rate": 9.796074210870204e-06, "loss": 0.10574150085449219, "step": 1121 }, { "epoch": 0.15634362154253467, "grad_norm": 1.1999292373657227, "learning_rate": 9.795407467061596e-06, "loss": 0.10157585144042969, "step": 1122 }, { "epoch": 0.15648296523374905, "grad_norm": 1.343788504600525, "learning_rate": 9.794739657822929e-06, "loss": 0.09326553344726562, "step": 1123 }, { "epoch": 0.15662230892496343, "grad_norm": 1.5805126428604126, "learning_rate": 9.794070783302576e-06, "loss": 0.08817768096923828, "step": 1124 }, { "epoch": 0.1567616526161778, "grad_norm": 0.8712198734283447, "learning_rate": 9.793400843649146e-06, "loss": 0.08943784236907959, "step": 1125 }, { "epoch": 0.15690099630739218, "grad_norm": 1.6480292081832886, "learning_rate": 9.792729839011484e-06, "loss": 0.09365558624267578, "step": 1126 }, { "epoch": 0.15704033999860656, "grad_norm": 2.3092586994171143, "learning_rate": 9.792057769538672e-06, "loss": 0.10745048522949219, "step": 1127 }, { "epoch": 0.15717968368982094, "grad_norm": 0.9587599039077759, "learning_rate": 9.791384635380028e-06, "loss": 0.08586740493774414, "step": 1128 }, { "epoch": 0.15731902738103531, "grad_norm": 1.996751308441162, "learning_rate": 9.790710436685105e-06, "loss": 0.10839414596557617, "step": 1129 }, { "epoch": 0.1574583710722497, "grad_norm": 1.0176217555999756, "learning_rate": 9.790035173603699e-06, "loss": 0.08227252960205078, "step": 1130 }, { "epoch": 0.1575977147634641, "grad_norm": 1.1575099229812622, "learning_rate": 9.789358846285835e-06, "loss": 0.10988807678222656, "step": 1131 }, { "epoch": 0.15773705845467847, "grad_norm": 1.2770309448242188, "learning_rate": 9.788681454881778e-06, "loss": 0.10785865783691406, "step": 1132 }, { "epoch": 0.15787640214589285, "grad_norm": 1.4020662307739258, "learning_rate": 9.78800299954203e-06, "loss": 0.09758377075195312, "step": 1133 }, { "epoch": 0.15801574583710723, "grad_norm": 1.5910415649414062, "learning_rate": 9.787323480417328e-06, "loss": 0.08411026000976562, "step": 1134 }, { "epoch": 0.1581550895283216, "grad_norm": 1.1607707738876343, "learning_rate": 9.786642897658645e-06, "loss": 0.07996273040771484, "step": 1135 }, { "epoch": 0.15829443321953598, "grad_norm": 1.6818755865097046, "learning_rate": 9.78596125141719e-06, "loss": 0.09852218627929688, "step": 1136 }, { "epoch": 0.15843377691075036, "grad_norm": 1.5712807178497314, "learning_rate": 9.785278541844409e-06, "loss": 0.10311508178710938, "step": 1137 }, { "epoch": 0.15857312060196474, "grad_norm": 1.3605015277862549, "learning_rate": 9.784594769091989e-06, "loss": 0.08379173278808594, "step": 1138 }, { "epoch": 0.15871246429317912, "grad_norm": 3.5439658164978027, "learning_rate": 9.783909933311844e-06, "loss": 0.07604408264160156, "step": 1139 }, { "epoch": 0.1588518079843935, "grad_norm": 1.0919520854949951, "learning_rate": 9.78322403465613e-06, "loss": 0.10411453247070312, "step": 1140 }, { "epoch": 0.1589911516756079, "grad_norm": 1.4753750562667847, "learning_rate": 9.782537073277238e-06, "loss": 0.12116622924804688, "step": 1141 }, { "epoch": 0.15913049536682228, "grad_norm": 0.8186841011047363, "learning_rate": 9.781849049327796e-06, "loss": 0.09048843383789062, "step": 1142 }, { "epoch": 0.15926983905803666, "grad_norm": 0.8888534307479858, "learning_rate": 9.781159962960667e-06, "loss": 0.09494495391845703, "step": 1143 }, { "epoch": 0.15940918274925103, "grad_norm": 1.268661618232727, "learning_rate": 9.78046981432895e-06, "loss": 0.10731077194213867, "step": 1144 }, { "epoch": 0.1595485264404654, "grad_norm": 1.141624093055725, "learning_rate": 9.77977860358598e-06, "loss": 0.1190643310546875, "step": 1145 }, { "epoch": 0.1596878701316798, "grad_norm": 1.4748849868774414, "learning_rate": 9.779086330885328e-06, "loss": 0.09515666961669922, "step": 1146 }, { "epoch": 0.15982721382289417, "grad_norm": 1.1145546436309814, "learning_rate": 9.778392996380803e-06, "loss": 0.09918451309204102, "step": 1147 }, { "epoch": 0.15996655751410854, "grad_norm": 0.7956634759902954, "learning_rate": 9.777698600226446e-06, "loss": 0.0804452896118164, "step": 1148 }, { "epoch": 0.16010590120532292, "grad_norm": 1.3588972091674805, "learning_rate": 9.777003142576536e-06, "loss": 0.0817403793334961, "step": 1149 }, { "epoch": 0.1602452448965373, "grad_norm": 1.153755784034729, "learning_rate": 9.77630662358559e-06, "loss": 0.10795307159423828, "step": 1150 }, { "epoch": 0.1603845885877517, "grad_norm": 1.9578222036361694, "learning_rate": 9.775609043408356e-06, "loss": 0.10352325439453125, "step": 1151 }, { "epoch": 0.16052393227896608, "grad_norm": 1.2248183488845825, "learning_rate": 9.774910402199821e-06, "loss": 0.08663177490234375, "step": 1152 }, { "epoch": 0.16066327597018046, "grad_norm": 1.5568251609802246, "learning_rate": 9.774210700115209e-06, "loss": 0.10350227355957031, "step": 1153 }, { "epoch": 0.16080261966139484, "grad_norm": 0.8503689169883728, "learning_rate": 9.773509937309978e-06, "loss": 0.12135887145996094, "step": 1154 }, { "epoch": 0.1609419633526092, "grad_norm": 0.9294819831848145, "learning_rate": 9.772808113939819e-06, "loss": 0.11413955688476562, "step": 1155 }, { "epoch": 0.1610813070438236, "grad_norm": 1.596540093421936, "learning_rate": 9.77210523016066e-06, "loss": 0.08065414428710938, "step": 1156 }, { "epoch": 0.16122065073503797, "grad_norm": 1.5375570058822632, "learning_rate": 9.771401286128668e-06, "loss": 0.08821678161621094, "step": 1157 }, { "epoch": 0.16135999442625235, "grad_norm": 1.1916730403900146, "learning_rate": 9.770696282000245e-06, "loss": 0.10048294067382812, "step": 1158 }, { "epoch": 0.16149933811746672, "grad_norm": 1.8586493730545044, "learning_rate": 9.769990217932023e-06, "loss": 0.08395099639892578, "step": 1159 }, { "epoch": 0.1616386818086811, "grad_norm": 2.3229503631591797, "learning_rate": 9.769283094080878e-06, "loss": 0.10968589782714844, "step": 1160 }, { "epoch": 0.1617780254998955, "grad_norm": 1.8285952806472778, "learning_rate": 9.768574910603912e-06, "loss": 0.10309028625488281, "step": 1161 }, { "epoch": 0.16191736919110988, "grad_norm": 1.1316295862197876, "learning_rate": 9.767865667658472e-06, "loss": 0.07615280151367188, "step": 1162 }, { "epoch": 0.16205671288232426, "grad_norm": 0.7905935645103455, "learning_rate": 9.76715536540213e-06, "loss": 0.08192825317382812, "step": 1163 }, { "epoch": 0.16219605657353864, "grad_norm": 1.3821555376052856, "learning_rate": 9.766444003992704e-06, "loss": 0.08113288879394531, "step": 1164 }, { "epoch": 0.16233540026475302, "grad_norm": 1.9384013414382935, "learning_rate": 9.765731583588237e-06, "loss": 0.10932731628417969, "step": 1165 }, { "epoch": 0.1624747439559674, "grad_norm": 2.051154136657715, "learning_rate": 9.765018104347017e-06, "loss": 0.12326812744140625, "step": 1166 }, { "epoch": 0.16261408764718177, "grad_norm": 1.5196720361709595, "learning_rate": 9.764303566427561e-06, "loss": 0.11877250671386719, "step": 1167 }, { "epoch": 0.16275343133839615, "grad_norm": 1.4125516414642334, "learning_rate": 9.763587969988626e-06, "loss": 0.08615303039550781, "step": 1168 }, { "epoch": 0.16289277502961053, "grad_norm": 1.8106799125671387, "learning_rate": 9.762871315189198e-06, "loss": 0.12387466430664062, "step": 1169 }, { "epoch": 0.1630321187208249, "grad_norm": 1.9158939123153687, "learning_rate": 9.7621536021885e-06, "loss": 0.11096954345703125, "step": 1170 }, { "epoch": 0.1631714624120393, "grad_norm": 1.09415864944458, "learning_rate": 9.761434831145995e-06, "loss": 0.09769535064697266, "step": 1171 }, { "epoch": 0.1633108061032537, "grad_norm": 0.9526848196983337, "learning_rate": 9.760715002221375e-06, "loss": 0.09103965759277344, "step": 1172 }, { "epoch": 0.16345014979446806, "grad_norm": 0.8261739015579224, "learning_rate": 9.759994115574571e-06, "loss": 0.087615966796875, "step": 1173 }, { "epoch": 0.16358949348568244, "grad_norm": 0.7631791234016418, "learning_rate": 9.759272171365746e-06, "loss": 0.10352420806884766, "step": 1174 }, { "epoch": 0.16372883717689682, "grad_norm": 1.3122955560684204, "learning_rate": 9.758549169755302e-06, "loss": 0.08828353881835938, "step": 1175 }, { "epoch": 0.1638681808681112, "grad_norm": 0.6068509221076965, "learning_rate": 9.757825110903872e-06, "loss": 0.07632255554199219, "step": 1176 }, { "epoch": 0.16400752455932557, "grad_norm": 1.751841425895691, "learning_rate": 9.757099994972323e-06, "loss": 0.09424495697021484, "step": 1177 }, { "epoch": 0.16414686825053995, "grad_norm": 1.2907423973083496, "learning_rate": 9.756373822121762e-06, "loss": 0.08220863342285156, "step": 1178 }, { "epoch": 0.16428621194175433, "grad_norm": 2.7610702514648438, "learning_rate": 9.75564659251353e-06, "loss": 0.1303863525390625, "step": 1179 }, { "epoch": 0.1644255556329687, "grad_norm": 0.9114015698432922, "learning_rate": 9.754918306309197e-06, "loss": 0.08099555969238281, "step": 1180 }, { "epoch": 0.16456489932418308, "grad_norm": 1.5623822212219238, "learning_rate": 9.754188963670573e-06, "loss": 0.07650470733642578, "step": 1181 }, { "epoch": 0.1647042430153975, "grad_norm": 2.7504591941833496, "learning_rate": 9.753458564759701e-06, "loss": 0.10783958435058594, "step": 1182 }, { "epoch": 0.16484358670661187, "grad_norm": 2.0857791900634766, "learning_rate": 9.752727109738859e-06, "loss": 0.09900093078613281, "step": 1183 }, { "epoch": 0.16498293039782624, "grad_norm": 1.4463368654251099, "learning_rate": 9.751994598770563e-06, "loss": 0.09311866760253906, "step": 1184 }, { "epoch": 0.16512227408904062, "grad_norm": 1.1940252780914307, "learning_rate": 9.751261032017553e-06, "loss": 0.1149749755859375, "step": 1185 }, { "epoch": 0.165261617780255, "grad_norm": 1.0813747644424438, "learning_rate": 9.750526409642818e-06, "loss": 0.08655548095703125, "step": 1186 }, { "epoch": 0.16540096147146938, "grad_norm": 0.9751739501953125, "learning_rate": 9.749790731809568e-06, "loss": 0.08813667297363281, "step": 1187 }, { "epoch": 0.16554030516268375, "grad_norm": 1.9588899612426758, "learning_rate": 9.74905399868126e-06, "loss": 0.11686325073242188, "step": 1188 }, { "epoch": 0.16567964885389813, "grad_norm": 1.1100391149520874, "learning_rate": 9.748316210421573e-06, "loss": 0.09420108795166016, "step": 1189 }, { "epoch": 0.1658189925451125, "grad_norm": 0.7508955001831055, "learning_rate": 9.747577367194432e-06, "loss": 0.07991981506347656, "step": 1190 }, { "epoch": 0.1659583362363269, "grad_norm": 1.1964166164398193, "learning_rate": 9.74683746916399e-06, "loss": 0.07317256927490234, "step": 1191 }, { "epoch": 0.1660976799275413, "grad_norm": 1.166247010231018, "learning_rate": 9.746096516494632e-06, "loss": 0.08791160583496094, "step": 1192 }, { "epoch": 0.16623702361875567, "grad_norm": 1.505084753036499, "learning_rate": 9.745354509350983e-06, "loss": 0.10103130340576172, "step": 1193 }, { "epoch": 0.16637636730997005, "grad_norm": 0.8795771598815918, "learning_rate": 9.744611447897902e-06, "loss": 0.0886383056640625, "step": 1194 }, { "epoch": 0.16651571100118442, "grad_norm": 0.8311167359352112, "learning_rate": 9.743867332300478e-06, "loss": 0.08408355712890625, "step": 1195 }, { "epoch": 0.1666550546923988, "grad_norm": 0.665740430355072, "learning_rate": 9.743122162724038e-06, "loss": 0.08578872680664062, "step": 1196 }, { "epoch": 0.16679439838361318, "grad_norm": 0.9940952658653259, "learning_rate": 9.742375939334141e-06, "loss": 0.10229969024658203, "step": 1197 }, { "epoch": 0.16693374207482756, "grad_norm": 1.2916537523269653, "learning_rate": 9.74162866229658e-06, "loss": 0.07493400573730469, "step": 1198 }, { "epoch": 0.16707308576604193, "grad_norm": 2.2252373695373535, "learning_rate": 9.740880331777383e-06, "loss": 0.086181640625, "step": 1199 }, { "epoch": 0.1672124294572563, "grad_norm": 1.8285987377166748, "learning_rate": 9.740130947942812e-06, "loss": 0.10569953918457031, "step": 1200 }, { "epoch": 0.1673517731484707, "grad_norm": 0.7170781493186951, "learning_rate": 9.739380510959365e-06, "loss": 0.07727813720703125, "step": 1201 }, { "epoch": 0.1674911168396851, "grad_norm": 1.0194393396377563, "learning_rate": 9.738629020993769e-06, "loss": 0.08393096923828125, "step": 1202 }, { "epoch": 0.16763046053089947, "grad_norm": 1.5907315015792847, "learning_rate": 9.737876478212989e-06, "loss": 0.10532569885253906, "step": 1203 }, { "epoch": 0.16776980422211385, "grad_norm": 1.1496648788452148, "learning_rate": 9.737122882784225e-06, "loss": 0.10015106201171875, "step": 1204 }, { "epoch": 0.16790914791332823, "grad_norm": 0.8343194127082825, "learning_rate": 9.736368234874904e-06, "loss": 0.09713363647460938, "step": 1205 }, { "epoch": 0.1680484916045426, "grad_norm": 0.975882887840271, "learning_rate": 9.735612534652697e-06, "loss": 0.08704948425292969, "step": 1206 }, { "epoch": 0.16818783529575698, "grad_norm": 1.4861931800842285, "learning_rate": 9.734855782285499e-06, "loss": 0.07485103607177734, "step": 1207 }, { "epoch": 0.16832717898697136, "grad_norm": 1.859553337097168, "learning_rate": 9.734097977941446e-06, "loss": 0.11151123046875, "step": 1208 }, { "epoch": 0.16846652267818574, "grad_norm": 1.0335298776626587, "learning_rate": 9.733339121788903e-06, "loss": 0.1052560806274414, "step": 1209 }, { "epoch": 0.16860586636940011, "grad_norm": 2.236985445022583, "learning_rate": 9.73257921399647e-06, "loss": 0.11979866027832031, "step": 1210 }, { "epoch": 0.1687452100606145, "grad_norm": 1.5313371419906616, "learning_rate": 9.731818254732983e-06, "loss": 0.10327720642089844, "step": 1211 }, { "epoch": 0.1688845537518289, "grad_norm": 1.022464632987976, "learning_rate": 9.73105624416751e-06, "loss": 0.07732963562011719, "step": 1212 }, { "epoch": 0.16902389744304328, "grad_norm": 0.6667361259460449, "learning_rate": 9.73029318246935e-06, "loss": 0.0798635482788086, "step": 1213 }, { "epoch": 0.16916324113425765, "grad_norm": 0.8343891501426697, "learning_rate": 9.72952906980804e-06, "loss": 0.08612632751464844, "step": 1214 }, { "epoch": 0.16930258482547203, "grad_norm": 0.9165477752685547, "learning_rate": 9.72876390635335e-06, "loss": 0.10829353332519531, "step": 1215 }, { "epoch": 0.1694419285166864, "grad_norm": 0.8803470730781555, "learning_rate": 9.727997692275275e-06, "loss": 0.09906578063964844, "step": 1216 }, { "epoch": 0.16958127220790079, "grad_norm": 1.035496473312378, "learning_rate": 9.727230427744058e-06, "loss": 0.10280895233154297, "step": 1217 }, { "epoch": 0.16972061589911516, "grad_norm": 1.263916254043579, "learning_rate": 9.726462112930165e-06, "loss": 0.10481739044189453, "step": 1218 }, { "epoch": 0.16985995959032954, "grad_norm": 1.2625648975372314, "learning_rate": 9.725692748004295e-06, "loss": 0.08800983428955078, "step": 1219 }, { "epoch": 0.16999930328154392, "grad_norm": 1.4347437620162964, "learning_rate": 9.724922333137385e-06, "loss": 0.08849525451660156, "step": 1220 }, { "epoch": 0.1701386469727583, "grad_norm": 1.4844249486923218, "learning_rate": 9.724150868500607e-06, "loss": 0.0933537483215332, "step": 1221 }, { "epoch": 0.1702779906639727, "grad_norm": 1.319075107574463, "learning_rate": 9.72337835426536e-06, "loss": 0.09779739379882812, "step": 1222 }, { "epoch": 0.17041733435518708, "grad_norm": 1.8355998992919922, "learning_rate": 9.722604790603279e-06, "loss": 0.10584259033203125, "step": 1223 }, { "epoch": 0.17055667804640146, "grad_norm": 0.8977566957473755, "learning_rate": 9.721830177686231e-06, "loss": 0.06727027893066406, "step": 1224 }, { "epoch": 0.17069602173761583, "grad_norm": 0.6530700922012329, "learning_rate": 9.72105451568632e-06, "loss": 0.06859779357910156, "step": 1225 }, { "epoch": 0.1708353654288302, "grad_norm": 1.3034313917160034, "learning_rate": 9.720277804775879e-06, "loss": 0.07234001159667969, "step": 1226 }, { "epoch": 0.1709747091200446, "grad_norm": 1.7412443161010742, "learning_rate": 9.719500045127475e-06, "loss": 0.0932149887084961, "step": 1227 }, { "epoch": 0.17111405281125897, "grad_norm": 1.0956732034683228, "learning_rate": 9.718721236913909e-06, "loss": 0.08623313903808594, "step": 1228 }, { "epoch": 0.17125339650247334, "grad_norm": 1.3423274755477905, "learning_rate": 9.717941380308216e-06, "loss": 0.10395050048828125, "step": 1229 }, { "epoch": 0.17139274019368772, "grad_norm": 0.8717119693756104, "learning_rate": 9.717160475483659e-06, "loss": 0.10861778259277344, "step": 1230 }, { "epoch": 0.1715320838849021, "grad_norm": 1.5736032724380493, "learning_rate": 9.71637852261374e-06, "loss": 0.10129547119140625, "step": 1231 }, { "epoch": 0.1716714275761165, "grad_norm": 0.8196880221366882, "learning_rate": 9.71559552187219e-06, "loss": 0.06345081329345703, "step": 1232 }, { "epoch": 0.17181077126733088, "grad_norm": 1.6678080558776855, "learning_rate": 9.714811473432973e-06, "loss": 0.07780933380126953, "step": 1233 }, { "epoch": 0.17195011495854526, "grad_norm": 1.0144964456558228, "learning_rate": 9.714026377470287e-06, "loss": 0.0688333511352539, "step": 1234 }, { "epoch": 0.17208945864975964, "grad_norm": 1.5091493129730225, "learning_rate": 9.713240234158565e-06, "loss": 0.10831069946289062, "step": 1235 }, { "epoch": 0.172228802340974, "grad_norm": 3.1040701866149902, "learning_rate": 9.712453043672467e-06, "loss": 0.08608818054199219, "step": 1236 }, { "epoch": 0.1723681460321884, "grad_norm": 1.0824999809265137, "learning_rate": 9.71166480618689e-06, "loss": 0.07570934295654297, "step": 1237 }, { "epoch": 0.17250748972340277, "grad_norm": 0.9848284125328064, "learning_rate": 9.71087552187696e-06, "loss": 0.09514236450195312, "step": 1238 }, { "epoch": 0.17264683341461715, "grad_norm": 0.8655440807342529, "learning_rate": 9.710085190918044e-06, "loss": 0.06940841674804688, "step": 1239 }, { "epoch": 0.17278617710583152, "grad_norm": 1.7679252624511719, "learning_rate": 9.70929381348573e-06, "loss": 0.10305595397949219, "step": 1240 }, { "epoch": 0.1729255207970459, "grad_norm": 1.246396541595459, "learning_rate": 9.708501389755846e-06, "loss": 0.09487152099609375, "step": 1241 }, { "epoch": 0.1730648644882603, "grad_norm": 0.8326136469841003, "learning_rate": 9.70770791990445e-06, "loss": 0.08181571960449219, "step": 1242 }, { "epoch": 0.17320420817947468, "grad_norm": 3.195253849029541, "learning_rate": 9.706913404107832e-06, "loss": 0.1127023696899414, "step": 1243 }, { "epoch": 0.17334355187068906, "grad_norm": 2.796140193939209, "learning_rate": 9.706117842542517e-06, "loss": 0.1083984375, "step": 1244 }, { "epoch": 0.17348289556190344, "grad_norm": 2.5570337772369385, "learning_rate": 9.70532123538526e-06, "loss": 0.10183334350585938, "step": 1245 }, { "epoch": 0.17362223925311782, "grad_norm": 1.1588389873504639, "learning_rate": 9.704523582813049e-06, "loss": 0.0998678207397461, "step": 1246 }, { "epoch": 0.1737615829443322, "grad_norm": 1.0359927415847778, "learning_rate": 9.703724885003102e-06, "loss": 0.0774993896484375, "step": 1247 }, { "epoch": 0.17390092663554657, "grad_norm": 1.020087480545044, "learning_rate": 9.702925142132876e-06, "loss": 0.07734489440917969, "step": 1248 }, { "epoch": 0.17404027032676095, "grad_norm": 1.0521235466003418, "learning_rate": 9.70212435438005e-06, "loss": 0.08890342712402344, "step": 1249 }, { "epoch": 0.17417961401797533, "grad_norm": 1.1235849857330322, "learning_rate": 9.701322521922549e-06, "loss": 0.09887886047363281, "step": 1250 }, { "epoch": 0.1743189577091897, "grad_norm": 1.6128746271133423, "learning_rate": 9.700519644938513e-06, "loss": 0.12009048461914062, "step": 1251 }, { "epoch": 0.1744583014004041, "grad_norm": 1.713385820388794, "learning_rate": 9.699715723606327e-06, "loss": 0.09670352935791016, "step": 1252 }, { "epoch": 0.1745976450916185, "grad_norm": 0.8505555987358093, "learning_rate": 9.698910758104603e-06, "loss": 0.07814407348632812, "step": 1253 }, { "epoch": 0.17473698878283286, "grad_norm": 2.354280948638916, "learning_rate": 9.698104748612187e-06, "loss": 0.1351184844970703, "step": 1254 }, { "epoch": 0.17487633247404724, "grad_norm": 2.577026605606079, "learning_rate": 9.697297695308157e-06, "loss": 0.11858367919921875, "step": 1255 }, { "epoch": 0.17501567616526162, "grad_norm": 0.7710025310516357, "learning_rate": 9.696489598371817e-06, "loss": 0.0741724967956543, "step": 1256 }, { "epoch": 0.175155019856476, "grad_norm": 1.0190132856369019, "learning_rate": 9.695680457982713e-06, "loss": 0.10178947448730469, "step": 1257 }, { "epoch": 0.17529436354769037, "grad_norm": 1.9377479553222656, "learning_rate": 9.694870274320616e-06, "loss": 0.09333992004394531, "step": 1258 }, { "epoch": 0.17543370723890475, "grad_norm": 2.2412078380584717, "learning_rate": 9.694059047565529e-06, "loss": 0.0970458984375, "step": 1259 }, { "epoch": 0.17557305093011913, "grad_norm": 1.2209364175796509, "learning_rate": 9.69324677789769e-06, "loss": 0.07793807983398438, "step": 1260 }, { "epoch": 0.1757123946213335, "grad_norm": 0.5498433709144592, "learning_rate": 9.692433465497562e-06, "loss": 0.0631418228149414, "step": 1261 }, { "epoch": 0.1758517383125479, "grad_norm": 0.7798569798469543, "learning_rate": 9.69161911054585e-06, "loss": 0.09348297119140625, "step": 1262 }, { "epoch": 0.1759910820037623, "grad_norm": 2.7917745113372803, "learning_rate": 9.690803713223485e-06, "loss": 0.10541725158691406, "step": 1263 }, { "epoch": 0.17613042569497667, "grad_norm": 1.356605887413025, "learning_rate": 9.689987273711626e-06, "loss": 0.07784271240234375, "step": 1264 }, { "epoch": 0.17626976938619104, "grad_norm": 1.9664082527160645, "learning_rate": 9.68916979219167e-06, "loss": 0.07863044738769531, "step": 1265 }, { "epoch": 0.17640911307740542, "grad_norm": 1.029004693031311, "learning_rate": 9.68835126884524e-06, "loss": 0.08534622192382812, "step": 1266 }, { "epoch": 0.1765484567686198, "grad_norm": 1.4491554498672485, "learning_rate": 9.687531703854196e-06, "loss": 0.10631179809570312, "step": 1267 }, { "epoch": 0.17668780045983418, "grad_norm": 1.2923908233642578, "learning_rate": 9.686711097400625e-06, "loss": 0.0818634033203125, "step": 1268 }, { "epoch": 0.17682714415104855, "grad_norm": 0.9397392868995667, "learning_rate": 9.685889449666849e-06, "loss": 0.09821701049804688, "step": 1269 }, { "epoch": 0.17696648784226293, "grad_norm": 1.4564980268478394, "learning_rate": 9.685066760835417e-06, "loss": 0.11620092391967773, "step": 1270 }, { "epoch": 0.1771058315334773, "grad_norm": 2.3344173431396484, "learning_rate": 9.684243031089113e-06, "loss": 0.11724138259887695, "step": 1271 }, { "epoch": 0.17724517522469171, "grad_norm": 1.4394267797470093, "learning_rate": 9.68341826061095e-06, "loss": 0.09792232513427734, "step": 1272 }, { "epoch": 0.1773845189159061, "grad_norm": 1.2998223304748535, "learning_rate": 9.682592449584174e-06, "loss": 0.10722827911376953, "step": 1273 }, { "epoch": 0.17752386260712047, "grad_norm": 1.0042469501495361, "learning_rate": 9.68176559819226e-06, "loss": 0.1020517349243164, "step": 1274 }, { "epoch": 0.17766320629833485, "grad_norm": 2.5002217292785645, "learning_rate": 9.680937706618919e-06, "loss": 0.12885665893554688, "step": 1275 }, { "epoch": 0.17780254998954922, "grad_norm": 2.4773647785186768, "learning_rate": 9.680108775048087e-06, "loss": 0.09636116027832031, "step": 1276 }, { "epoch": 0.1779418936807636, "grad_norm": 1.050142765045166, "learning_rate": 9.679278803663932e-06, "loss": 0.09124755859375, "step": 1277 }, { "epoch": 0.17808123737197798, "grad_norm": 1.7325665950775146, "learning_rate": 9.678447792650858e-06, "loss": 0.08367538452148438, "step": 1278 }, { "epoch": 0.17822058106319236, "grad_norm": 0.6951298713684082, "learning_rate": 9.677615742193495e-06, "loss": 0.0889129638671875, "step": 1279 }, { "epoch": 0.17835992475440673, "grad_norm": 0.9774685502052307, "learning_rate": 9.676782652476705e-06, "loss": 0.07532405853271484, "step": 1280 }, { "epoch": 0.1784992684456211, "grad_norm": 1.0664173364639282, "learning_rate": 9.675948523685583e-06, "loss": 0.09251117706298828, "step": 1281 }, { "epoch": 0.17863861213683552, "grad_norm": 0.7073770761489868, "learning_rate": 9.675113356005453e-06, "loss": 0.09807395935058594, "step": 1282 }, { "epoch": 0.1787779558280499, "grad_norm": 1.119476318359375, "learning_rate": 9.674277149621869e-06, "loss": 0.08405113220214844, "step": 1283 }, { "epoch": 0.17891729951926427, "grad_norm": 1.737236499786377, "learning_rate": 9.673439904720619e-06, "loss": 0.09703826904296875, "step": 1284 }, { "epoch": 0.17905664321047865, "grad_norm": 1.3132320642471313, "learning_rate": 9.672601621487718e-06, "loss": 0.08998584747314453, "step": 1285 }, { "epoch": 0.17919598690169303, "grad_norm": 0.5210084319114685, "learning_rate": 9.671762300109415e-06, "loss": 0.08054161071777344, "step": 1286 }, { "epoch": 0.1793353305929074, "grad_norm": 1.3310471773147583, "learning_rate": 9.670921940772186e-06, "loss": 0.12939834594726562, "step": 1287 }, { "epoch": 0.17947467428412178, "grad_norm": 1.67942214012146, "learning_rate": 9.670080543662742e-06, "loss": 0.11445426940917969, "step": 1288 }, { "epoch": 0.17961401797533616, "grad_norm": 2.388690710067749, "learning_rate": 9.669238108968018e-06, "loss": 0.1295337677001953, "step": 1289 }, { "epoch": 0.17975336166655054, "grad_norm": 0.8554693460464478, "learning_rate": 9.668394636875188e-06, "loss": 0.0926961898803711, "step": 1290 }, { "epoch": 0.17989270535776491, "grad_norm": 0.8732423186302185, "learning_rate": 9.667550127571653e-06, "loss": 0.09302043914794922, "step": 1291 }, { "epoch": 0.18003204904897932, "grad_norm": 1.2994273900985718, "learning_rate": 9.666704581245041e-06, "loss": 0.10396814346313477, "step": 1292 }, { "epoch": 0.1801713927401937, "grad_norm": 0.7099890112876892, "learning_rate": 9.665857998083212e-06, "loss": 0.08876991271972656, "step": 1293 }, { "epoch": 0.18031073643140808, "grad_norm": 0.777015745639801, "learning_rate": 9.66501037827426e-06, "loss": 0.09668254852294922, "step": 1294 }, { "epoch": 0.18045008012262245, "grad_norm": 1.6920137405395508, "learning_rate": 9.664161722006506e-06, "loss": 0.09070676565170288, "step": 1295 }, { "epoch": 0.18058942381383683, "grad_norm": 1.3095040321350098, "learning_rate": 9.663312029468504e-06, "loss": 0.1050558090209961, "step": 1296 }, { "epoch": 0.1807287675050512, "grad_norm": 1.7323347330093384, "learning_rate": 9.662461300849031e-06, "loss": 0.08585739135742188, "step": 1297 }, { "epoch": 0.18086811119626559, "grad_norm": 1.5496801137924194, "learning_rate": 9.661609536337104e-06, "loss": 0.11704826354980469, "step": 1298 }, { "epoch": 0.18100745488747996, "grad_norm": 2.506335973739624, "learning_rate": 9.660756736121964e-06, "loss": 0.11313247680664062, "step": 1299 }, { "epoch": 0.18114679857869434, "grad_norm": 1.2854666709899902, "learning_rate": 9.659902900393086e-06, "loss": 0.08018207550048828, "step": 1300 }, { "epoch": 0.18128614226990872, "grad_norm": 0.916867196559906, "learning_rate": 9.659048029340169e-06, "loss": 0.10668182373046875, "step": 1301 }, { "epoch": 0.18142548596112312, "grad_norm": 0.7550085783004761, "learning_rate": 9.658192123153149e-06, "loss": 0.07761192321777344, "step": 1302 }, { "epoch": 0.1815648296523375, "grad_norm": 1.1836977005004883, "learning_rate": 9.657335182022187e-06, "loss": 0.101776123046875, "step": 1303 }, { "epoch": 0.18170417334355188, "grad_norm": 1.2813057899475098, "learning_rate": 9.656477206137675e-06, "loss": 0.09202861785888672, "step": 1304 }, { "epoch": 0.18184351703476626, "grad_norm": 1.1595286130905151, "learning_rate": 9.655618195690239e-06, "loss": 0.12033843994140625, "step": 1305 }, { "epoch": 0.18198286072598063, "grad_norm": 1.0230021476745605, "learning_rate": 9.654758150870728e-06, "loss": 0.09097671508789062, "step": 1306 }, { "epoch": 0.182122204417195, "grad_norm": 1.3507204055786133, "learning_rate": 9.653897071870226e-06, "loss": 0.10140228271484375, "step": 1307 }, { "epoch": 0.1822615481084094, "grad_norm": 0.8329418897628784, "learning_rate": 9.653034958880045e-06, "loss": 0.09126091003417969, "step": 1308 }, { "epoch": 0.18240089179962377, "grad_norm": 0.8642613291740417, "learning_rate": 9.652171812091728e-06, "loss": 0.08509635925292969, "step": 1309 }, { "epoch": 0.18254023549083814, "grad_norm": 1.0386313199996948, "learning_rate": 9.651307631697044e-06, "loss": 0.09396743774414062, "step": 1310 }, { "epoch": 0.18267957918205252, "grad_norm": 1.4211556911468506, "learning_rate": 9.650442417887995e-06, "loss": 0.0880899429321289, "step": 1311 }, { "epoch": 0.18281892287326693, "grad_norm": 0.8001335859298706, "learning_rate": 9.649576170856814e-06, "loss": 0.08299064636230469, "step": 1312 }, { "epoch": 0.1829582665644813, "grad_norm": 2.2559750080108643, "learning_rate": 9.64870889079596e-06, "loss": 0.09721946716308594, "step": 1313 }, { "epoch": 0.18309761025569568, "grad_norm": 0.9657684564590454, "learning_rate": 9.64784057789812e-06, "loss": 0.07274484634399414, "step": 1314 }, { "epoch": 0.18323695394691006, "grad_norm": 1.7231841087341309, "learning_rate": 9.646971232356215e-06, "loss": 0.11386775970458984, "step": 1315 }, { "epoch": 0.18337629763812444, "grad_norm": 1.3315321207046509, "learning_rate": 9.646100854363396e-06, "loss": 0.08524036407470703, "step": 1316 }, { "epoch": 0.1835156413293388, "grad_norm": 1.3092519044876099, "learning_rate": 9.64522944411304e-06, "loss": 0.11288261413574219, "step": 1317 }, { "epoch": 0.1836549850205532, "grad_norm": 0.814566969871521, "learning_rate": 9.644357001798752e-06, "loss": 0.08892822265625, "step": 1318 }, { "epoch": 0.18379432871176757, "grad_norm": 1.217764139175415, "learning_rate": 9.643483527614372e-06, "loss": 0.0858163833618164, "step": 1319 }, { "epoch": 0.18393367240298195, "grad_norm": 1.1304620504379272, "learning_rate": 9.642609021753964e-06, "loss": 0.09758853912353516, "step": 1320 }, { "epoch": 0.18407301609419632, "grad_norm": 1.5792527198791504, "learning_rate": 9.641733484411823e-06, "loss": 0.08026981353759766, "step": 1321 }, { "epoch": 0.18421235978541073, "grad_norm": 2.0025131702423096, "learning_rate": 9.640856915782477e-06, "loss": 0.12569808959960938, "step": 1322 }, { "epoch": 0.1843517034766251, "grad_norm": 1.7611826658248901, "learning_rate": 9.639979316060675e-06, "loss": 0.10508346557617188, "step": 1323 }, { "epoch": 0.18449104716783948, "grad_norm": 0.9010658860206604, "learning_rate": 9.639100685441403e-06, "loss": 0.08208847045898438, "step": 1324 }, { "epoch": 0.18463039085905386, "grad_norm": 1.6554841995239258, "learning_rate": 9.638221024119869e-06, "loss": 0.09917640686035156, "step": 1325 }, { "epoch": 0.18476973455026824, "grad_norm": 0.6949111819267273, "learning_rate": 9.637340332291518e-06, "loss": 0.08961963653564453, "step": 1326 }, { "epoch": 0.18490907824148262, "grad_norm": 3.059046506881714, "learning_rate": 9.636458610152015e-06, "loss": 0.11687088012695312, "step": 1327 }, { "epoch": 0.185048421932697, "grad_norm": 1.615980863571167, "learning_rate": 9.635575857897264e-06, "loss": 0.08887958526611328, "step": 1328 }, { "epoch": 0.18518776562391137, "grad_norm": 0.9089513421058655, "learning_rate": 9.634692075723386e-06, "loss": 0.0690298080444336, "step": 1329 }, { "epoch": 0.18532710931512575, "grad_norm": 1.3026353120803833, "learning_rate": 9.633807263826745e-06, "loss": 0.10392379760742188, "step": 1330 }, { "epoch": 0.18546645300634013, "grad_norm": 1.986772894859314, "learning_rate": 9.632921422403918e-06, "loss": 0.09524345397949219, "step": 1331 }, { "epoch": 0.18560579669755453, "grad_norm": 1.9386061429977417, "learning_rate": 9.632034551651723e-06, "loss": 0.11151981353759766, "step": 1332 }, { "epoch": 0.1857451403887689, "grad_norm": 1.7023369073867798, "learning_rate": 9.631146651767202e-06, "loss": 0.10352516174316406, "step": 1333 }, { "epoch": 0.1858844840799833, "grad_norm": 1.3475620746612549, "learning_rate": 9.630257722947625e-06, "loss": 0.09995651245117188, "step": 1334 }, { "epoch": 0.18602382777119766, "grad_norm": 0.6333151459693909, "learning_rate": 9.629367765390494e-06, "loss": 0.08044815063476562, "step": 1335 }, { "epoch": 0.18616317146241204, "grad_norm": 1.0882341861724854, "learning_rate": 9.628476779293536e-06, "loss": 0.0941152572631836, "step": 1336 }, { "epoch": 0.18630251515362642, "grad_norm": 0.5490504503250122, "learning_rate": 9.627584764854706e-06, "loss": 0.06722450256347656, "step": 1337 }, { "epoch": 0.1864418588448408, "grad_norm": 1.3107653856277466, "learning_rate": 9.626691722272193e-06, "loss": 0.08729743957519531, "step": 1338 }, { "epoch": 0.18658120253605517, "grad_norm": 0.5862479209899902, "learning_rate": 9.625797651744406e-06, "loss": 0.07584857940673828, "step": 1339 }, { "epoch": 0.18672054622726955, "grad_norm": 1.3785171508789062, "learning_rate": 9.62490255346999e-06, "loss": 0.09929656982421875, "step": 1340 }, { "epoch": 0.18685988991848393, "grad_norm": 1.251007318496704, "learning_rate": 9.624006427647817e-06, "loss": 0.11145210266113281, "step": 1341 }, { "epoch": 0.18699923360969833, "grad_norm": 0.9476776719093323, "learning_rate": 9.623109274476982e-06, "loss": 0.07207965850830078, "step": 1342 }, { "epoch": 0.1871385773009127, "grad_norm": 1.7354485988616943, "learning_rate": 9.622211094156812e-06, "loss": 0.0982980728149414, "step": 1343 }, { "epoch": 0.1872779209921271, "grad_norm": 0.8749983906745911, "learning_rate": 9.621311886886866e-06, "loss": 0.113006591796875, "step": 1344 }, { "epoch": 0.18741726468334147, "grad_norm": 0.9540248513221741, "learning_rate": 9.620411652866926e-06, "loss": 0.09606552124023438, "step": 1345 }, { "epoch": 0.18755660837455584, "grad_norm": 0.8494647145271301, "learning_rate": 9.619510392297e-06, "loss": 0.10596847534179688, "step": 1346 }, { "epoch": 0.18769595206577022, "grad_norm": 2.2285873889923096, "learning_rate": 9.618608105377331e-06, "loss": 0.11310863494873047, "step": 1347 }, { "epoch": 0.1878352957569846, "grad_norm": 1.5037559270858765, "learning_rate": 9.617704792308387e-06, "loss": 0.09644508361816406, "step": 1348 }, { "epoch": 0.18797463944819898, "grad_norm": 0.7214293479919434, "learning_rate": 9.61680045329086e-06, "loss": 0.08087348937988281, "step": 1349 }, { "epoch": 0.18811398313941335, "grad_norm": 1.2798100709915161, "learning_rate": 9.615895088525677e-06, "loss": 0.09976482391357422, "step": 1350 }, { "epoch": 0.18825332683062773, "grad_norm": 1.0505080223083496, "learning_rate": 9.614988698213987e-06, "loss": 0.0901947021484375, "step": 1351 }, { "epoch": 0.18839267052184214, "grad_norm": 1.288048505783081, "learning_rate": 9.614081282557172e-06, "loss": 0.10741758346557617, "step": 1352 }, { "epoch": 0.18853201421305651, "grad_norm": 0.8707133531570435, "learning_rate": 9.613172841756835e-06, "loss": 0.08953857421875, "step": 1353 }, { "epoch": 0.1886713579042709, "grad_norm": 1.2382354736328125, "learning_rate": 9.612263376014815e-06, "loss": 0.11561965942382812, "step": 1354 }, { "epoch": 0.18881070159548527, "grad_norm": 1.4100148677825928, "learning_rate": 9.611352885533171e-06, "loss": 0.1218404769897461, "step": 1355 }, { "epoch": 0.18895004528669965, "grad_norm": 1.1017041206359863, "learning_rate": 9.610441370514196e-06, "loss": 0.07653999328613281, "step": 1356 }, { "epoch": 0.18908938897791402, "grad_norm": 1.1789462566375732, "learning_rate": 9.609528831160407e-06, "loss": 0.09505748748779297, "step": 1357 }, { "epoch": 0.1892287326691284, "grad_norm": 1.3538581132888794, "learning_rate": 9.608615267674548e-06, "loss": 0.10825538635253906, "step": 1358 }, { "epoch": 0.18936807636034278, "grad_norm": 2.1647591590881348, "learning_rate": 9.607700680259593e-06, "loss": 0.09601831436157227, "step": 1359 }, { "epoch": 0.18950742005155716, "grad_norm": 1.6534696817398071, "learning_rate": 9.606785069118742e-06, "loss": 0.11366653442382812, "step": 1360 }, { "epoch": 0.18964676374277153, "grad_norm": 1.338085412979126, "learning_rate": 9.605868434455426e-06, "loss": 0.0807638168334961, "step": 1361 }, { "epoch": 0.18978610743398594, "grad_norm": 1.9100327491760254, "learning_rate": 9.604950776473294e-06, "loss": 0.09415006637573242, "step": 1362 }, { "epoch": 0.18992545112520032, "grad_norm": 1.206493616104126, "learning_rate": 9.604032095376234e-06, "loss": 0.09041213989257812, "step": 1363 }, { "epoch": 0.1900647948164147, "grad_norm": 2.68660569190979, "learning_rate": 9.603112391368354e-06, "loss": 0.11798286437988281, "step": 1364 }, { "epoch": 0.19020413850762907, "grad_norm": 0.8834440112113953, "learning_rate": 9.602191664653992e-06, "loss": 0.08881378173828125, "step": 1365 }, { "epoch": 0.19034348219884345, "grad_norm": 1.4803671836853027, "learning_rate": 9.601269915437713e-06, "loss": 0.10423660278320312, "step": 1366 }, { "epoch": 0.19048282589005783, "grad_norm": 1.0087361335754395, "learning_rate": 9.600347143924305e-06, "loss": 0.08430957794189453, "step": 1367 }, { "epoch": 0.1906221695812722, "grad_norm": 1.414247751235962, "learning_rate": 9.599423350318791e-06, "loss": 0.10066747665405273, "step": 1368 }, { "epoch": 0.19076151327248658, "grad_norm": 1.0969443321228027, "learning_rate": 9.598498534826414e-06, "loss": 0.08020782470703125, "step": 1369 }, { "epoch": 0.19090085696370096, "grad_norm": 2.4491240978240967, "learning_rate": 9.597572697652649e-06, "loss": 0.08098745346069336, "step": 1370 }, { "epoch": 0.19104020065491534, "grad_norm": 0.8836453557014465, "learning_rate": 9.596645839003196e-06, "loss": 0.06591224670410156, "step": 1371 }, { "epoch": 0.19117954434612974, "grad_norm": 1.491790771484375, "learning_rate": 9.595717959083978e-06, "loss": 0.12671661376953125, "step": 1372 }, { "epoch": 0.19131888803734412, "grad_norm": 1.437143087387085, "learning_rate": 9.594789058101154e-06, "loss": 0.11072969436645508, "step": 1373 }, { "epoch": 0.1914582317285585, "grad_norm": 0.9976823925971985, "learning_rate": 9.593859136261102e-06, "loss": 0.1030573844909668, "step": 1374 }, { "epoch": 0.19159757541977288, "grad_norm": 0.9289141893386841, "learning_rate": 9.592928193770427e-06, "loss": 0.10436820983886719, "step": 1375 }, { "epoch": 0.19173691911098725, "grad_norm": 1.5449023246765137, "learning_rate": 9.591996230835968e-06, "loss": 0.10939598083496094, "step": 1376 }, { "epoch": 0.19187626280220163, "grad_norm": 1.302983283996582, "learning_rate": 9.591063247664783e-06, "loss": 0.07718563079833984, "step": 1377 }, { "epoch": 0.192015606493416, "grad_norm": 0.7684074640274048, "learning_rate": 9.59012924446416e-06, "loss": 0.08358478546142578, "step": 1378 }, { "epoch": 0.19215495018463039, "grad_norm": 2.276838779449463, "learning_rate": 9.589194221441614e-06, "loss": 0.1040802001953125, "step": 1379 }, { "epoch": 0.19229429387584476, "grad_norm": 1.6690460443496704, "learning_rate": 9.588258178804884e-06, "loss": 0.1002349853515625, "step": 1380 }, { "epoch": 0.19243363756705914, "grad_norm": 1.8173017501831055, "learning_rate": 9.587321116761938e-06, "loss": 0.09808158874511719, "step": 1381 }, { "epoch": 0.19257298125827352, "grad_norm": 1.1685364246368408, "learning_rate": 9.586383035520972e-06, "loss": 0.09303569793701172, "step": 1382 }, { "epoch": 0.19271232494948792, "grad_norm": 0.7845630049705505, "learning_rate": 9.585443935290403e-06, "loss": 0.07396697998046875, "step": 1383 }, { "epoch": 0.1928516686407023, "grad_norm": 0.9965663552284241, "learning_rate": 9.58450381627888e-06, "loss": 0.06812000274658203, "step": 1384 }, { "epoch": 0.19299101233191668, "grad_norm": 1.1048747301101685, "learning_rate": 9.583562678695275e-06, "loss": 0.10100746154785156, "step": 1385 }, { "epoch": 0.19313035602313106, "grad_norm": 0.7664040327072144, "learning_rate": 9.582620522748686e-06, "loss": 0.08623886108398438, "step": 1386 }, { "epoch": 0.19326969971434543, "grad_norm": 1.1064105033874512, "learning_rate": 9.58167734864844e-06, "loss": 0.09709358215332031, "step": 1387 }, { "epoch": 0.1934090434055598, "grad_norm": 1.0945274829864502, "learning_rate": 9.58073315660409e-06, "loss": 0.07230758666992188, "step": 1388 }, { "epoch": 0.1935483870967742, "grad_norm": 0.8666219115257263, "learning_rate": 9.579787946825411e-06, "loss": 0.10643577575683594, "step": 1389 }, { "epoch": 0.19368773078798857, "grad_norm": 0.9240849018096924, "learning_rate": 9.57884171952241e-06, "loss": 0.1015939712524414, "step": 1390 }, { "epoch": 0.19382707447920294, "grad_norm": 0.8411861062049866, "learning_rate": 9.577894474905314e-06, "loss": 0.09719181060791016, "step": 1391 }, { "epoch": 0.19396641817041732, "grad_norm": 1.3740870952606201, "learning_rate": 9.576946213184583e-06, "loss": 0.08782958984375, "step": 1392 }, { "epoch": 0.19410576186163173, "grad_norm": 0.6253005266189575, "learning_rate": 9.575996934570896e-06, "loss": 0.09147453308105469, "step": 1393 }, { "epoch": 0.1942451055528461, "grad_norm": 0.740058422088623, "learning_rate": 9.57504663927516e-06, "loss": 0.08763980865478516, "step": 1394 }, { "epoch": 0.19438444924406048, "grad_norm": 0.9923404455184937, "learning_rate": 9.574095327508513e-06, "loss": 0.07653236389160156, "step": 1395 }, { "epoch": 0.19452379293527486, "grad_norm": 1.1781693696975708, "learning_rate": 9.573142999482313e-06, "loss": 0.09936904907226562, "step": 1396 }, { "epoch": 0.19466313662648924, "grad_norm": 1.1256449222564697, "learning_rate": 9.572189655408144e-06, "loss": 0.07156944274902344, "step": 1397 }, { "epoch": 0.1948024803177036, "grad_norm": 0.8480802178382874, "learning_rate": 9.571235295497818e-06, "loss": 0.09099769592285156, "step": 1398 }, { "epoch": 0.194941824008918, "grad_norm": 1.1207921504974365, "learning_rate": 9.570279919963373e-06, "loss": 0.1016082763671875, "step": 1399 }, { "epoch": 0.19508116770013237, "grad_norm": 1.1014158725738525, "learning_rate": 9.569323529017071e-06, "loss": 0.09241390228271484, "step": 1400 }, { "epoch": 0.19522051139134675, "grad_norm": 1.200685977935791, "learning_rate": 9.568366122871399e-06, "loss": 0.09761810302734375, "step": 1401 }, { "epoch": 0.19535985508256112, "grad_norm": 0.8176726698875427, "learning_rate": 9.567407701739075e-06, "loss": 0.08722114562988281, "step": 1402 }, { "epoch": 0.19549919877377553, "grad_norm": 1.223583698272705, "learning_rate": 9.566448265833034e-06, "loss": 0.09470748901367188, "step": 1403 }, { "epoch": 0.1956385424649899, "grad_norm": 1.223902940750122, "learning_rate": 9.56548781536644e-06, "loss": 0.10437667369842529, "step": 1404 }, { "epoch": 0.19577788615620428, "grad_norm": 1.395273208618164, "learning_rate": 9.564526350552689e-06, "loss": 0.08811569213867188, "step": 1405 }, { "epoch": 0.19591722984741866, "grad_norm": 1.1613600254058838, "learning_rate": 9.56356387160539e-06, "loss": 0.06653404235839844, "step": 1406 }, { "epoch": 0.19605657353863304, "grad_norm": 1.1909128427505493, "learning_rate": 9.562600378738389e-06, "loss": 0.09128570556640625, "step": 1407 }, { "epoch": 0.19619591722984742, "grad_norm": 0.5399674773216248, "learning_rate": 9.561635872165747e-06, "loss": 0.07754230499267578, "step": 1408 }, { "epoch": 0.1963352609210618, "grad_norm": 0.9802528619766235, "learning_rate": 9.56067035210176e-06, "loss": 0.09837865829467773, "step": 1409 }, { "epoch": 0.19647460461227617, "grad_norm": 1.2220150232315063, "learning_rate": 9.559703818760943e-06, "loss": 0.11161231994628906, "step": 1410 }, { "epoch": 0.19661394830349055, "grad_norm": 1.2540855407714844, "learning_rate": 9.558736272358036e-06, "loss": 0.08632373809814453, "step": 1411 }, { "epoch": 0.19675329199470493, "grad_norm": 2.7055041790008545, "learning_rate": 9.557767713108009e-06, "loss": 0.13875770568847656, "step": 1412 }, { "epoch": 0.19689263568591933, "grad_norm": 1.3398455381393433, "learning_rate": 9.55679814122605e-06, "loss": 0.08096790313720703, "step": 1413 }, { "epoch": 0.1970319793771337, "grad_norm": 1.5343655347824097, "learning_rate": 9.555827556927578e-06, "loss": 0.08799362182617188, "step": 1414 }, { "epoch": 0.1971713230683481, "grad_norm": 0.8219375610351562, "learning_rate": 9.554855960428234e-06, "loss": 0.06986713409423828, "step": 1415 }, { "epoch": 0.19731066675956246, "grad_norm": 0.6303895115852356, "learning_rate": 9.553883351943882e-06, "loss": 0.07735443115234375, "step": 1416 }, { "epoch": 0.19745001045077684, "grad_norm": 2.1014623641967773, "learning_rate": 9.55290973169062e-06, "loss": 0.12485504150390625, "step": 1417 }, { "epoch": 0.19758935414199122, "grad_norm": 1.4221731424331665, "learning_rate": 9.55193509988476e-06, "loss": 0.096954345703125, "step": 1418 }, { "epoch": 0.1977286978332056, "grad_norm": 0.8054177761077881, "learning_rate": 9.55095945674284e-06, "loss": 0.09123039245605469, "step": 1419 }, { "epoch": 0.19786804152441997, "grad_norm": 1.6319202184677124, "learning_rate": 9.549982802481632e-06, "loss": 0.08830070495605469, "step": 1420 }, { "epoch": 0.19800738521563435, "grad_norm": 1.0363547801971436, "learning_rate": 9.549005137318122e-06, "loss": 0.08972549438476562, "step": 1421 }, { "epoch": 0.19814672890684873, "grad_norm": 1.2898606061935425, "learning_rate": 9.548026461469527e-06, "loss": 0.09961128234863281, "step": 1422 }, { "epoch": 0.19828607259806313, "grad_norm": 0.8472974896430969, "learning_rate": 9.547046775153285e-06, "loss": 0.08231925964355469, "step": 1423 }, { "epoch": 0.1984254162892775, "grad_norm": 0.7902846932411194, "learning_rate": 9.54606607858706e-06, "loss": 0.09697437286376953, "step": 1424 }, { "epoch": 0.1985647599804919, "grad_norm": 0.8478670120239258, "learning_rate": 9.545084371988743e-06, "loss": 0.10732078552246094, "step": 1425 }, { "epoch": 0.19870410367170627, "grad_norm": 1.2341490983963013, "learning_rate": 9.54410165557644e-06, "loss": 0.10478591918945312, "step": 1426 }, { "epoch": 0.19884344736292064, "grad_norm": 0.5651872158050537, "learning_rate": 9.543117929568497e-06, "loss": 0.0772867202758789, "step": 1427 }, { "epoch": 0.19898279105413502, "grad_norm": 0.90003502368927, "learning_rate": 9.542133194183469e-06, "loss": 0.08767890930175781, "step": 1428 }, { "epoch": 0.1991221347453494, "grad_norm": 0.9882870316505432, "learning_rate": 9.541147449640145e-06, "loss": 0.0859832763671875, "step": 1429 }, { "epoch": 0.19926147843656378, "grad_norm": 1.3188632726669312, "learning_rate": 9.540160696157532e-06, "loss": 0.11719226837158203, "step": 1430 }, { "epoch": 0.19940082212777815, "grad_norm": 0.9890533685684204, "learning_rate": 9.539172933954867e-06, "loss": 0.07089424133300781, "step": 1431 }, { "epoch": 0.19954016581899253, "grad_norm": 1.079177975654602, "learning_rate": 9.538184163251608e-06, "loss": 0.08466148376464844, "step": 1432 }, { "epoch": 0.19967950951020694, "grad_norm": 1.0634785890579224, "learning_rate": 9.537194384267436e-06, "loss": 0.1037445068359375, "step": 1433 }, { "epoch": 0.19981885320142131, "grad_norm": 0.7928269505500793, "learning_rate": 9.536203597222259e-06, "loss": 0.09612655639648438, "step": 1434 }, { "epoch": 0.1999581968926357, "grad_norm": 1.1938390731811523, "learning_rate": 9.535211802336204e-06, "loss": 0.09079360961914062, "step": 1435 }, { "epoch": 0.20009754058385007, "grad_norm": 0.6233628988265991, "learning_rate": 9.534218999829627e-06, "loss": 0.08502864837646484, "step": 1436 }, { "epoch": 0.20023688427506445, "grad_norm": 0.8711168766021729, "learning_rate": 9.533225189923107e-06, "loss": 0.08656597137451172, "step": 1437 }, { "epoch": 0.20037622796627882, "grad_norm": 1.2257460355758667, "learning_rate": 9.532230372837446e-06, "loss": 0.08256149291992188, "step": 1438 }, { "epoch": 0.2005155716574932, "grad_norm": 2.24965763092041, "learning_rate": 9.531234548793667e-06, "loss": 0.13108444213867188, "step": 1439 }, { "epoch": 0.20065491534870758, "grad_norm": 0.821112871170044, "learning_rate": 9.530237718013023e-06, "loss": 0.07626533508300781, "step": 1440 }, { "epoch": 0.20079425903992196, "grad_norm": 0.7687667012214661, "learning_rate": 9.529239880716983e-06, "loss": 0.08524036407470703, "step": 1441 }, { "epoch": 0.20093360273113633, "grad_norm": 0.8772261142730713, "learning_rate": 9.528241037127247e-06, "loss": 0.0930337905883789, "step": 1442 }, { "epoch": 0.20107294642235074, "grad_norm": 0.9363512992858887, "learning_rate": 9.527241187465735e-06, "loss": 0.0875082015991211, "step": 1443 }, { "epoch": 0.20121229011356512, "grad_norm": 1.189929485321045, "learning_rate": 9.526240331954589e-06, "loss": 0.09538984298706055, "step": 1444 }, { "epoch": 0.2013516338047795, "grad_norm": 1.144791603088379, "learning_rate": 9.525238470816176e-06, "loss": 0.10214042663574219, "step": 1445 }, { "epoch": 0.20149097749599387, "grad_norm": 0.6360164880752563, "learning_rate": 9.524235604273088e-06, "loss": 0.08616065979003906, "step": 1446 }, { "epoch": 0.20163032118720825, "grad_norm": 0.9930375218391418, "learning_rate": 9.523231732548139e-06, "loss": 0.06405830383300781, "step": 1447 }, { "epoch": 0.20176966487842263, "grad_norm": 0.7070286870002747, "learning_rate": 9.522226855864366e-06, "loss": 0.06551265716552734, "step": 1448 }, { "epoch": 0.201909008569637, "grad_norm": 0.48673877120018005, "learning_rate": 9.521220974445032e-06, "loss": 0.06596565246582031, "step": 1449 }, { "epoch": 0.20204835226085138, "grad_norm": 1.2676658630371094, "learning_rate": 9.520214088513616e-06, "loss": 0.08936786651611328, "step": 1450 }, { "epoch": 0.20218769595206576, "grad_norm": 0.9055288434028625, "learning_rate": 9.519206198293828e-06, "loss": 0.08986854553222656, "step": 1451 }, { "epoch": 0.20232703964328014, "grad_norm": 0.7542750835418701, "learning_rate": 9.5181973040096e-06, "loss": 0.08969306945800781, "step": 1452 }, { "epoch": 0.20246638333449454, "grad_norm": 3.8496735095977783, "learning_rate": 9.517187405885082e-06, "loss": 0.09476613998413086, "step": 1453 }, { "epoch": 0.20260572702570892, "grad_norm": 2.1174538135528564, "learning_rate": 9.516176504144652e-06, "loss": 0.0874289870262146, "step": 1454 }, { "epoch": 0.2027450707169233, "grad_norm": 0.8197110295295715, "learning_rate": 9.515164599012908e-06, "loss": 0.0694427490234375, "step": 1455 }, { "epoch": 0.20288441440813768, "grad_norm": 1.0293244123458862, "learning_rate": 9.514151690714672e-06, "loss": 0.079315185546875, "step": 1456 }, { "epoch": 0.20302375809935205, "grad_norm": 0.6647963523864746, "learning_rate": 9.513137779474992e-06, "loss": 0.06489944458007812, "step": 1457 }, { "epoch": 0.20316310179056643, "grad_norm": 1.2127925157546997, "learning_rate": 9.512122865519135e-06, "loss": 0.10305404663085938, "step": 1458 }, { "epoch": 0.2033024454817808, "grad_norm": 1.0610506534576416, "learning_rate": 9.511106949072588e-06, "loss": 0.08074569702148438, "step": 1459 }, { "epoch": 0.20344178917299519, "grad_norm": 0.6777246594429016, "learning_rate": 9.51009003036107e-06, "loss": 0.06904029846191406, "step": 1460 }, { "epoch": 0.20358113286420956, "grad_norm": 0.9002620577812195, "learning_rate": 9.509072109610514e-06, "loss": 0.08086776733398438, "step": 1461 }, { "epoch": 0.20372047655542394, "grad_norm": 0.8265858888626099, "learning_rate": 9.508053187047077e-06, "loss": 0.07876968383789062, "step": 1462 }, { "epoch": 0.20385982024663835, "grad_norm": 0.8076441287994385, "learning_rate": 9.507033262897142e-06, "loss": 0.08519744873046875, "step": 1463 }, { "epoch": 0.20399916393785272, "grad_norm": 0.7379582524299622, "learning_rate": 9.506012337387315e-06, "loss": 0.0769815444946289, "step": 1464 }, { "epoch": 0.2041385076290671, "grad_norm": 0.8960319757461548, "learning_rate": 9.504990410744422e-06, "loss": 0.10612964630126953, "step": 1465 }, { "epoch": 0.20427785132028148, "grad_norm": 1.469867467880249, "learning_rate": 9.503967483195509e-06, "loss": 0.09931278228759766, "step": 1466 }, { "epoch": 0.20441719501149586, "grad_norm": 1.9647347927093506, "learning_rate": 9.502943554967848e-06, "loss": 0.0787506103515625, "step": 1467 }, { "epoch": 0.20455653870271023, "grad_norm": 0.8889943361282349, "learning_rate": 9.501918626288935e-06, "loss": 0.08191299438476562, "step": 1468 }, { "epoch": 0.2046958823939246, "grad_norm": 1.141080379486084, "learning_rate": 9.500892697386482e-06, "loss": 0.0899038314819336, "step": 1469 }, { "epoch": 0.204835226085139, "grad_norm": 1.2983663082122803, "learning_rate": 9.499865768488429e-06, "loss": 0.09371471405029297, "step": 1470 }, { "epoch": 0.20497456977635337, "grad_norm": 0.931220531463623, "learning_rate": 9.498837839822936e-06, "loss": 0.0920567512512207, "step": 1471 }, { "epoch": 0.20511391346756774, "grad_norm": 3.6879379749298096, "learning_rate": 9.497808911618385e-06, "loss": 0.11356163024902344, "step": 1472 }, { "epoch": 0.20525325715878215, "grad_norm": 1.7321406602859497, "learning_rate": 9.496778984103381e-06, "loss": 0.09388065338134766, "step": 1473 }, { "epoch": 0.20539260084999653, "grad_norm": 1.7628750801086426, "learning_rate": 9.49574805750675e-06, "loss": 0.10253238677978516, "step": 1474 }, { "epoch": 0.2055319445412109, "grad_norm": 0.6817371845245361, "learning_rate": 9.49471613205754e-06, "loss": 0.08748435974121094, "step": 1475 }, { "epoch": 0.20567128823242528, "grad_norm": 0.8515208959579468, "learning_rate": 9.493683207985022e-06, "loss": 0.09677505493164062, "step": 1476 }, { "epoch": 0.20581063192363966, "grad_norm": 2.3021597862243652, "learning_rate": 9.492649285518688e-06, "loss": 0.11104011535644531, "step": 1477 }, { "epoch": 0.20594997561485404, "grad_norm": 1.0979225635528564, "learning_rate": 9.49161436488825e-06, "loss": 0.07656383514404297, "step": 1478 }, { "epoch": 0.2060893193060684, "grad_norm": 1.174613356590271, "learning_rate": 9.490578446323646e-06, "loss": 0.09066057205200195, "step": 1479 }, { "epoch": 0.2062286629972828, "grad_norm": 1.411648154258728, "learning_rate": 9.489541530055034e-06, "loss": 0.08588790893554688, "step": 1480 }, { "epoch": 0.20636800668849717, "grad_norm": 1.0392123460769653, "learning_rate": 9.488503616312793e-06, "loss": 0.09646987915039062, "step": 1481 }, { "epoch": 0.20650735037971155, "grad_norm": 1.118794322013855, "learning_rate": 9.48746470532752e-06, "loss": 0.09631538391113281, "step": 1482 }, { "epoch": 0.20664669407092595, "grad_norm": 2.4319021701812744, "learning_rate": 9.48642479733004e-06, "loss": 0.08669281005859375, "step": 1483 }, { "epoch": 0.20678603776214033, "grad_norm": 1.0045157670974731, "learning_rate": 9.4853838925514e-06, "loss": 0.08635902404785156, "step": 1484 }, { "epoch": 0.2069253814533547, "grad_norm": 1.5884603261947632, "learning_rate": 9.484341991222858e-06, "loss": 0.10711669921875, "step": 1485 }, { "epoch": 0.20706472514456908, "grad_norm": 1.688392996788025, "learning_rate": 9.483299093575909e-06, "loss": 0.098419189453125, "step": 1486 }, { "epoch": 0.20720406883578346, "grad_norm": 1.4169597625732422, "learning_rate": 9.482255199842254e-06, "loss": 0.09436798095703125, "step": 1487 }, { "epoch": 0.20734341252699784, "grad_norm": 2.2018415927886963, "learning_rate": 9.481210310253826e-06, "loss": 0.11734771728515625, "step": 1488 }, { "epoch": 0.20748275621821222, "grad_norm": 1.8851889371871948, "learning_rate": 9.480164425042775e-06, "loss": 0.09113883972167969, "step": 1489 }, { "epoch": 0.2076220999094266, "grad_norm": 0.6371416449546814, "learning_rate": 9.479117544441472e-06, "loss": 0.06691384315490723, "step": 1490 }, { "epoch": 0.20776144360064097, "grad_norm": 1.7064621448516846, "learning_rate": 9.47806966868251e-06, "loss": 0.10298442840576172, "step": 1491 }, { "epoch": 0.20790078729185535, "grad_norm": 0.8774853348731995, "learning_rate": 9.477020797998707e-06, "loss": 0.08387279510498047, "step": 1492 }, { "epoch": 0.20804013098306975, "grad_norm": 0.6800728440284729, "learning_rate": 9.47597093262309e-06, "loss": 0.06440401077270508, "step": 1493 }, { "epoch": 0.20817947467428413, "grad_norm": 2.7185442447662354, "learning_rate": 9.474920072788925e-06, "loss": 0.12405014038085938, "step": 1494 }, { "epoch": 0.2083188183654985, "grad_norm": 0.4700227975845337, "learning_rate": 9.47386821872968e-06, "loss": 0.0676722526550293, "step": 1495 }, { "epoch": 0.2084581620567129, "grad_norm": 0.8505795001983643, "learning_rate": 9.47281537067906e-06, "loss": 0.08748340606689453, "step": 1496 }, { "epoch": 0.20859750574792726, "grad_norm": 1.075755000114441, "learning_rate": 9.471761528870978e-06, "loss": 0.09908103942871094, "step": 1497 }, { "epoch": 0.20873684943914164, "grad_norm": 1.2065001726150513, "learning_rate": 9.470706693539578e-06, "loss": 0.08876609802246094, "step": 1498 }, { "epoch": 0.20887619313035602, "grad_norm": 0.9404253363609314, "learning_rate": 9.469650864919217e-06, "loss": 0.08599853515625, "step": 1499 }, { "epoch": 0.2090155368215704, "grad_norm": 0.8120072484016418, "learning_rate": 9.46859404324448e-06, "loss": 0.09704971313476562, "step": 1500 }, { "epoch": 0.20915488051278477, "grad_norm": 0.7190840244293213, "learning_rate": 9.467536228750166e-06, "loss": 0.07736492156982422, "step": 1501 }, { "epoch": 0.20929422420399915, "grad_norm": 2.0867972373962402, "learning_rate": 9.466477421671296e-06, "loss": 0.08835649490356445, "step": 1502 }, { "epoch": 0.20943356789521356, "grad_norm": 1.3828942775726318, "learning_rate": 9.465417622243116e-06, "loss": 0.07569599151611328, "step": 1503 }, { "epoch": 0.20957291158642793, "grad_norm": 0.9054553508758545, "learning_rate": 9.464356830701086e-06, "loss": 0.09967803955078125, "step": 1504 }, { "epoch": 0.2097122552776423, "grad_norm": 0.9825350642204285, "learning_rate": 9.463295047280892e-06, "loss": 0.10050487518310547, "step": 1505 }, { "epoch": 0.2098515989688567, "grad_norm": 1.0472712516784668, "learning_rate": 9.462232272218437e-06, "loss": 0.09843730926513672, "step": 1506 }, { "epoch": 0.20999094266007107, "grad_norm": 0.6431991457939148, "learning_rate": 9.461168505749847e-06, "loss": 0.10819625854492188, "step": 1507 }, { "epoch": 0.21013028635128544, "grad_norm": 1.2948272228240967, "learning_rate": 9.460103748111462e-06, "loss": 0.09721899032592773, "step": 1508 }, { "epoch": 0.21026963004249982, "grad_norm": 0.5169112682342529, "learning_rate": 9.459037999539852e-06, "loss": 0.07789325714111328, "step": 1509 }, { "epoch": 0.2104089737337142, "grad_norm": 0.8864663243293762, "learning_rate": 9.4579712602718e-06, "loss": 0.11794567108154297, "step": 1510 }, { "epoch": 0.21054831742492858, "grad_norm": 1.1495212316513062, "learning_rate": 9.456903530544312e-06, "loss": 0.089935302734375, "step": 1511 }, { "epoch": 0.21068766111614295, "grad_norm": 1.6773157119750977, "learning_rate": 9.455834810594611e-06, "loss": 0.1133880615234375, "step": 1512 }, { "epoch": 0.21082700480735736, "grad_norm": 0.6441618204116821, "learning_rate": 9.454765100660144e-06, "loss": 0.07596397399902344, "step": 1513 }, { "epoch": 0.21096634849857174, "grad_norm": 0.6296616792678833, "learning_rate": 9.453694400978576e-06, "loss": 0.07233333587646484, "step": 1514 }, { "epoch": 0.21110569218978611, "grad_norm": 1.2547649145126343, "learning_rate": 9.452622711787793e-06, "loss": 0.09060287475585938, "step": 1515 }, { "epoch": 0.2112450358810005, "grad_norm": 1.0616463422775269, "learning_rate": 9.451550033325896e-06, "loss": 0.12949562072753906, "step": 1516 }, { "epoch": 0.21138437957221487, "grad_norm": 0.8727665543556213, "learning_rate": 9.450476365831214e-06, "loss": 0.109832763671875, "step": 1517 }, { "epoch": 0.21152372326342925, "grad_norm": 0.6284834146499634, "learning_rate": 9.449401709542289e-06, "loss": 0.0763845443725586, "step": 1518 }, { "epoch": 0.21166306695464362, "grad_norm": 0.7806018590927124, "learning_rate": 9.448326064697886e-06, "loss": 0.085357666015625, "step": 1519 }, { "epoch": 0.211802410645858, "grad_norm": 0.857973575592041, "learning_rate": 9.447249431536987e-06, "loss": 0.06423425674438477, "step": 1520 }, { "epoch": 0.21194175433707238, "grad_norm": 1.2275446653366089, "learning_rate": 9.446171810298799e-06, "loss": 0.09993553161621094, "step": 1521 }, { "epoch": 0.21208109802828676, "grad_norm": 1.6030964851379395, "learning_rate": 9.44509320122274e-06, "loss": 0.09647560119628906, "step": 1522 }, { "epoch": 0.21222044171950116, "grad_norm": 0.7220860123634338, "learning_rate": 9.444013604548457e-06, "loss": 0.08093738555908203, "step": 1523 }, { "epoch": 0.21235978541071554, "grad_norm": 0.5300483703613281, "learning_rate": 9.442933020515808e-06, "loss": 0.06272697448730469, "step": 1524 }, { "epoch": 0.21249912910192992, "grad_norm": 1.001752495765686, "learning_rate": 9.441851449364878e-06, "loss": 0.11116218566894531, "step": 1525 }, { "epoch": 0.2126384727931443, "grad_norm": 0.8542739152908325, "learning_rate": 9.440768891335962e-06, "loss": 0.08158302307128906, "step": 1526 }, { "epoch": 0.21277781648435867, "grad_norm": 0.8210951685905457, "learning_rate": 9.439685346669585e-06, "loss": 0.1112070083618164, "step": 1527 }, { "epoch": 0.21291716017557305, "grad_norm": 0.7078427076339722, "learning_rate": 9.438600815606483e-06, "loss": 0.07845115661621094, "step": 1528 }, { "epoch": 0.21305650386678743, "grad_norm": 0.9319515824317932, "learning_rate": 9.437515298387617e-06, "loss": 0.07581806182861328, "step": 1529 }, { "epoch": 0.2131958475580018, "grad_norm": 0.8140933513641357, "learning_rate": 9.436428795254159e-06, "loss": 0.10741329193115234, "step": 1530 }, { "epoch": 0.21333519124921618, "grad_norm": 0.8051738739013672, "learning_rate": 9.43534130644751e-06, "loss": 0.07440471649169922, "step": 1531 }, { "epoch": 0.21347453494043056, "grad_norm": 1.5930699110031128, "learning_rate": 9.43425283220928e-06, "loss": 0.10832405090332031, "step": 1532 }, { "epoch": 0.21361387863164497, "grad_norm": 1.426442265510559, "learning_rate": 9.43316337278131e-06, "loss": 0.09601593017578125, "step": 1533 }, { "epoch": 0.21375322232285934, "grad_norm": 0.8227933645248413, "learning_rate": 9.432072928405648e-06, "loss": 0.07357406616210938, "step": 1534 }, { "epoch": 0.21389256601407372, "grad_norm": 1.3964252471923828, "learning_rate": 9.430981499324567e-06, "loss": 0.09033966064453125, "step": 1535 }, { "epoch": 0.2140319097052881, "grad_norm": 0.9914337396621704, "learning_rate": 9.429889085780559e-06, "loss": 0.06949520111083984, "step": 1536 }, { "epoch": 0.21417125339650248, "grad_norm": 0.980757474899292, "learning_rate": 9.42879568801633e-06, "loss": 0.0770406723022461, "step": 1537 }, { "epoch": 0.21431059708771685, "grad_norm": 1.1648895740509033, "learning_rate": 9.427701306274812e-06, "loss": 0.10435295104980469, "step": 1538 }, { "epoch": 0.21444994077893123, "grad_norm": 0.6052501201629639, "learning_rate": 9.42660594079915e-06, "loss": 0.07375335693359375, "step": 1539 }, { "epoch": 0.2145892844701456, "grad_norm": 0.7359322905540466, "learning_rate": 9.42550959183271e-06, "loss": 0.08359289169311523, "step": 1540 }, { "epoch": 0.21472862816135999, "grad_norm": 1.202511191368103, "learning_rate": 9.424412259619073e-06, "loss": 0.08098602294921875, "step": 1541 }, { "epoch": 0.21486797185257436, "grad_norm": 1.4912761449813843, "learning_rate": 9.423313944402043e-06, "loss": 0.09101486206054688, "step": 1542 }, { "epoch": 0.21500731554378877, "grad_norm": 0.938217043876648, "learning_rate": 9.422214646425641e-06, "loss": 0.08542823791503906, "step": 1543 }, { "epoch": 0.21514665923500315, "grad_norm": 2.6314074993133545, "learning_rate": 9.421114365934105e-06, "loss": 0.11644792556762695, "step": 1544 }, { "epoch": 0.21528600292621752, "grad_norm": 1.6446491479873657, "learning_rate": 9.420013103171893e-06, "loss": 0.09649848937988281, "step": 1545 }, { "epoch": 0.2154253466174319, "grad_norm": 0.747387170791626, "learning_rate": 9.418910858383681e-06, "loss": 0.09345436096191406, "step": 1546 }, { "epoch": 0.21556469030864628, "grad_norm": 1.0782263278961182, "learning_rate": 9.41780763181436e-06, "loss": 0.08473396301269531, "step": 1547 }, { "epoch": 0.21570403399986066, "grad_norm": 1.263392448425293, "learning_rate": 9.416703423709044e-06, "loss": 0.09098434448242188, "step": 1548 }, { "epoch": 0.21584337769107503, "grad_norm": 1.178680658340454, "learning_rate": 9.415598234313064e-06, "loss": 0.0662374496459961, "step": 1549 }, { "epoch": 0.2159827213822894, "grad_norm": 1.1940339803695679, "learning_rate": 9.414492063871964e-06, "loss": 0.08306884765625, "step": 1550 }, { "epoch": 0.2161220650735038, "grad_norm": 1.2456468343734741, "learning_rate": 9.413384912631512e-06, "loss": 0.09459209442138672, "step": 1551 }, { "epoch": 0.21626140876471817, "grad_norm": 0.7643994688987732, "learning_rate": 9.412276780837692e-06, "loss": 0.10317230224609375, "step": 1552 }, { "epoch": 0.21640075245593257, "grad_norm": 1.735530972480774, "learning_rate": 9.411167668736707e-06, "loss": 0.08983135223388672, "step": 1553 }, { "epoch": 0.21654009614714695, "grad_norm": 1.2970693111419678, "learning_rate": 9.410057576574974e-06, "loss": 0.0968465805053711, "step": 1554 }, { "epoch": 0.21667943983836133, "grad_norm": 0.9546417593955994, "learning_rate": 9.408946504599131e-06, "loss": 0.08802032470703125, "step": 1555 }, { "epoch": 0.2168187835295757, "grad_norm": 1.2228277921676636, "learning_rate": 9.40783445305603e-06, "loss": 0.09793949127197266, "step": 1556 }, { "epoch": 0.21695812722079008, "grad_norm": 0.9059131145477295, "learning_rate": 9.406721422192748e-06, "loss": 0.10065650939941406, "step": 1557 }, { "epoch": 0.21709747091200446, "grad_norm": 1.4850291013717651, "learning_rate": 9.405607412256573e-06, "loss": 0.08413314819335938, "step": 1558 }, { "epoch": 0.21723681460321884, "grad_norm": 1.0359320640563965, "learning_rate": 9.404492423495012e-06, "loss": 0.09650421142578125, "step": 1559 }, { "epoch": 0.2173761582944332, "grad_norm": 2.1478846073150635, "learning_rate": 9.403376456155792e-06, "loss": 0.1078195571899414, "step": 1560 }, { "epoch": 0.2175155019856476, "grad_norm": 1.1876540184020996, "learning_rate": 9.402259510486855e-06, "loss": 0.09942245483398438, "step": 1561 }, { "epoch": 0.21765484567686197, "grad_norm": 1.1724313497543335, "learning_rate": 9.401141586736359e-06, "loss": 0.09189319610595703, "step": 1562 }, { "epoch": 0.21779418936807637, "grad_norm": 3.9526727199554443, "learning_rate": 9.400022685152683e-06, "loss": 0.13733673095703125, "step": 1563 }, { "epoch": 0.21793353305929075, "grad_norm": 1.812495470046997, "learning_rate": 9.398902805984417e-06, "loss": 0.115203857421875, "step": 1564 }, { "epoch": 0.21807287675050513, "grad_norm": 0.8878211379051208, "learning_rate": 9.397781949480381e-06, "loss": 0.10098123550415039, "step": 1565 }, { "epoch": 0.2182122204417195, "grad_norm": 1.3114436864852905, "learning_rate": 9.396660115889596e-06, "loss": 0.09289741516113281, "step": 1566 }, { "epoch": 0.21835156413293388, "grad_norm": 1.3087613582611084, "learning_rate": 9.395537305461312e-06, "loss": 0.10125923156738281, "step": 1567 }, { "epoch": 0.21849090782414826, "grad_norm": 0.6620432734489441, "learning_rate": 9.394413518444989e-06, "loss": 0.0681295394897461, "step": 1568 }, { "epoch": 0.21863025151536264, "grad_norm": 0.9600664377212524, "learning_rate": 9.39328875509031e-06, "loss": 0.10349273681640625, "step": 1569 }, { "epoch": 0.21876959520657702, "grad_norm": 1.3005813360214233, "learning_rate": 9.39216301564717e-06, "loss": 0.11305046081542969, "step": 1570 }, { "epoch": 0.2189089388977914, "grad_norm": 0.8386991024017334, "learning_rate": 9.391036300365681e-06, "loss": 0.07663249969482422, "step": 1571 }, { "epoch": 0.21904828258900577, "grad_norm": 1.2504945993423462, "learning_rate": 9.389908609496177e-06, "loss": 0.09974956512451172, "step": 1572 }, { "epoch": 0.21918762628022015, "grad_norm": 0.5718521475791931, "learning_rate": 9.388779943289204e-06, "loss": 0.07936668395996094, "step": 1573 }, { "epoch": 0.21932696997143455, "grad_norm": 0.8612309098243713, "learning_rate": 9.387650301995523e-06, "loss": 0.08213424682617188, "step": 1574 }, { "epoch": 0.21946631366264893, "grad_norm": 0.7131614089012146, "learning_rate": 9.386519685866117e-06, "loss": 0.09278678894042969, "step": 1575 }, { "epoch": 0.2196056573538633, "grad_norm": 0.5264075994491577, "learning_rate": 9.385388095152184e-06, "loss": 0.07836246490478516, "step": 1576 }, { "epoch": 0.2197450010450777, "grad_norm": 1.351003885269165, "learning_rate": 9.384255530105136e-06, "loss": 0.11552143096923828, "step": 1577 }, { "epoch": 0.21988434473629206, "grad_norm": 0.8349570035934448, "learning_rate": 9.383121990976602e-06, "loss": 0.0911111831665039, "step": 1578 }, { "epoch": 0.22002368842750644, "grad_norm": 0.7275611162185669, "learning_rate": 9.381987478018431e-06, "loss": 0.07676506042480469, "step": 1579 }, { "epoch": 0.22016303211872082, "grad_norm": 1.072979211807251, "learning_rate": 9.380851991482685e-06, "loss": 0.10426139831542969, "step": 1580 }, { "epoch": 0.2203023758099352, "grad_norm": 0.8594093918800354, "learning_rate": 9.379715531621642e-06, "loss": 0.11225128173828125, "step": 1581 }, { "epoch": 0.22044171950114957, "grad_norm": 1.6889922618865967, "learning_rate": 9.3785780986878e-06, "loss": 0.10063552856445312, "step": 1582 }, { "epoch": 0.22058106319236395, "grad_norm": 1.261029601097107, "learning_rate": 9.377439692933869e-06, "loss": 0.0854034423828125, "step": 1583 }, { "epoch": 0.22072040688357836, "grad_norm": 0.9944420456886292, "learning_rate": 9.376300314612775e-06, "loss": 0.06717491149902344, "step": 1584 }, { "epoch": 0.22085975057479273, "grad_norm": 1.6944750547409058, "learning_rate": 9.375159963977668e-06, "loss": 0.09950637817382812, "step": 1585 }, { "epoch": 0.2209990942660071, "grad_norm": 1.6281929016113281, "learning_rate": 9.374018641281898e-06, "loss": 0.10107707977294922, "step": 1586 }, { "epoch": 0.2211384379572215, "grad_norm": 0.6476344466209412, "learning_rate": 9.37287634677905e-06, "loss": 0.08452510833740234, "step": 1587 }, { "epoch": 0.22127778164843587, "grad_norm": 0.6732726097106934, "learning_rate": 9.371733080722911e-06, "loss": 0.07142972946166992, "step": 1588 }, { "epoch": 0.22141712533965024, "grad_norm": 0.8049268126487732, "learning_rate": 9.37058884336749e-06, "loss": 0.0819540023803711, "step": 1589 }, { "epoch": 0.22155646903086462, "grad_norm": 1.2000218629837036, "learning_rate": 9.36944363496701e-06, "loss": 0.09119987487792969, "step": 1590 }, { "epoch": 0.221695812722079, "grad_norm": 1.5772833824157715, "learning_rate": 9.368297455775911e-06, "loss": 0.11744499206542969, "step": 1591 }, { "epoch": 0.22183515641329338, "grad_norm": 1.129183053970337, "learning_rate": 9.367150306048847e-06, "loss": 0.07651710510253906, "step": 1592 }, { "epoch": 0.22197450010450775, "grad_norm": 0.7229461669921875, "learning_rate": 9.36600218604069e-06, "loss": 0.062476158142089844, "step": 1593 }, { "epoch": 0.22211384379572216, "grad_norm": 1.4597443342208862, "learning_rate": 9.364853096006523e-06, "loss": 0.11189556121826172, "step": 1594 }, { "epoch": 0.22225318748693654, "grad_norm": 1.2421599626541138, "learning_rate": 9.36370303620165e-06, "loss": 0.11078500747680664, "step": 1595 }, { "epoch": 0.22239253117815092, "grad_norm": 1.2903234958648682, "learning_rate": 9.362552006881588e-06, "loss": 0.07340240478515625, "step": 1596 }, { "epoch": 0.2225318748693653, "grad_norm": 2.0637052059173584, "learning_rate": 9.361400008302068e-06, "loss": 0.10505867004394531, "step": 1597 }, { "epoch": 0.22267121856057967, "grad_norm": 0.7003729343414307, "learning_rate": 9.36024704071904e-06, "loss": 0.07556343078613281, "step": 1598 }, { "epoch": 0.22281056225179405, "grad_norm": 0.9549722671508789, "learning_rate": 9.359093104388663e-06, "loss": 0.09298515319824219, "step": 1599 }, { "epoch": 0.22294990594300843, "grad_norm": 1.5263748168945312, "learning_rate": 9.35793819956732e-06, "loss": 0.0997934341430664, "step": 1600 }, { "epoch": 0.2230892496342228, "grad_norm": 0.860901415348053, "learning_rate": 9.356782326511602e-06, "loss": 0.12434196472167969, "step": 1601 }, { "epoch": 0.22322859332543718, "grad_norm": 1.4283360242843628, "learning_rate": 9.355625485478319e-06, "loss": 0.11455154418945312, "step": 1602 }, { "epoch": 0.22336793701665156, "grad_norm": 0.8082263469696045, "learning_rate": 9.354467676724491e-06, "loss": 0.09358406066894531, "step": 1603 }, { "epoch": 0.22350728070786596, "grad_norm": 0.9771867990493774, "learning_rate": 9.353308900507361e-06, "loss": 0.08528804779052734, "step": 1604 }, { "epoch": 0.22364662439908034, "grad_norm": 1.5490775108337402, "learning_rate": 9.352149157084383e-06, "loss": 0.11155152320861816, "step": 1605 }, { "epoch": 0.22378596809029472, "grad_norm": 0.8821843862533569, "learning_rate": 9.350988446713221e-06, "loss": 0.1003122329711914, "step": 1606 }, { "epoch": 0.2239253117815091, "grad_norm": 0.852247953414917, "learning_rate": 9.349826769651762e-06, "loss": 0.08317756652832031, "step": 1607 }, { "epoch": 0.22406465547272347, "grad_norm": 1.0273370742797852, "learning_rate": 9.348664126158103e-06, "loss": 0.08888626098632812, "step": 1608 }, { "epoch": 0.22420399916393785, "grad_norm": 1.1662538051605225, "learning_rate": 9.347500516490555e-06, "loss": 0.0646677017211914, "step": 1609 }, { "epoch": 0.22434334285515223, "grad_norm": 2.818605899810791, "learning_rate": 9.346335940907648e-06, "loss": 0.0906839370727539, "step": 1610 }, { "epoch": 0.2244826865463666, "grad_norm": 1.1886001825332642, "learning_rate": 9.345170399668127e-06, "loss": 0.08946990966796875, "step": 1611 }, { "epoch": 0.22462203023758098, "grad_norm": 0.469555526971817, "learning_rate": 9.344003893030942e-06, "loss": 0.055850982666015625, "step": 1612 }, { "epoch": 0.22476137392879536, "grad_norm": 1.2962403297424316, "learning_rate": 9.342836421255268e-06, "loss": 0.08576393127441406, "step": 1613 }, { "epoch": 0.22490071762000977, "grad_norm": 1.4726213216781616, "learning_rate": 9.341667984600489e-06, "loss": 0.10633087158203125, "step": 1614 }, { "epoch": 0.22504006131122414, "grad_norm": 1.0119068622589111, "learning_rate": 9.340498583326208e-06, "loss": 0.08341693878173828, "step": 1615 }, { "epoch": 0.22517940500243852, "grad_norm": 0.6508646011352539, "learning_rate": 9.339328217692233e-06, "loss": 0.05362224578857422, "step": 1616 }, { "epoch": 0.2253187486936529, "grad_norm": 1.4875907897949219, "learning_rate": 9.3381568879586e-06, "loss": 0.09906959533691406, "step": 1617 }, { "epoch": 0.22545809238486728, "grad_norm": 0.8925971984863281, "learning_rate": 9.336984594385547e-06, "loss": 0.08305788040161133, "step": 1618 }, { "epoch": 0.22559743607608165, "grad_norm": 0.5639494061470032, "learning_rate": 9.335811337233533e-06, "loss": 0.0730905532836914, "step": 1619 }, { "epoch": 0.22573677976729603, "grad_norm": 0.7997280359268188, "learning_rate": 9.334637116763227e-06, "loss": 0.08026409149169922, "step": 1620 }, { "epoch": 0.2258761234585104, "grad_norm": 0.8973902463912964, "learning_rate": 9.333461933235517e-06, "loss": 0.07474231719970703, "step": 1621 }, { "epoch": 0.22601546714972479, "grad_norm": 1.1838241815567017, "learning_rate": 9.332285786911498e-06, "loss": 0.09169292449951172, "step": 1622 }, { "epoch": 0.22615481084093916, "grad_norm": 1.2883822917938232, "learning_rate": 9.331108678052485e-06, "loss": 0.10404396057128906, "step": 1623 }, { "epoch": 0.22629415453215357, "grad_norm": 1.1379814147949219, "learning_rate": 9.329930606920005e-06, "loss": 0.09853935241699219, "step": 1624 }, { "epoch": 0.22643349822336795, "grad_norm": 0.7209744453430176, "learning_rate": 9.3287515737758e-06, "loss": 0.08792877197265625, "step": 1625 }, { "epoch": 0.22657284191458232, "grad_norm": 0.6898247003555298, "learning_rate": 9.32757157888182e-06, "loss": 0.0682382583618164, "step": 1626 }, { "epoch": 0.2267121856057967, "grad_norm": 0.9791976809501648, "learning_rate": 9.326390622500236e-06, "loss": 0.07193470001220703, "step": 1627 }, { "epoch": 0.22685152929701108, "grad_norm": 0.7643553614616394, "learning_rate": 9.32520870489343e-06, "loss": 0.07804489135742188, "step": 1628 }, { "epoch": 0.22699087298822546, "grad_norm": 1.3179378509521484, "learning_rate": 9.324025826323995e-06, "loss": 0.10941886901855469, "step": 1629 }, { "epoch": 0.22713021667943983, "grad_norm": 0.6064757108688354, "learning_rate": 9.322841987054741e-06, "loss": 0.0729227066040039, "step": 1630 }, { "epoch": 0.2272695603706542, "grad_norm": 0.702816903591156, "learning_rate": 9.321657187348689e-06, "loss": 0.06281375885009766, "step": 1631 }, { "epoch": 0.2274089040618686, "grad_norm": 1.477543830871582, "learning_rate": 9.320471427469076e-06, "loss": 0.08763885498046875, "step": 1632 }, { "epoch": 0.22754824775308297, "grad_norm": 0.7536695599555969, "learning_rate": 9.319284707679348e-06, "loss": 0.08460235595703125, "step": 1633 }, { "epoch": 0.22768759144429737, "grad_norm": 1.2934679985046387, "learning_rate": 9.31809702824317e-06, "loss": 0.12410163879394531, "step": 1634 }, { "epoch": 0.22782693513551175, "grad_norm": 0.7599232196807861, "learning_rate": 9.316908389424416e-06, "loss": 0.08069896697998047, "step": 1635 }, { "epoch": 0.22796627882672613, "grad_norm": 2.392730712890625, "learning_rate": 9.315718791487175e-06, "loss": 0.1116933822631836, "step": 1636 }, { "epoch": 0.2281056225179405, "grad_norm": 1.5646753311157227, "learning_rate": 9.314528234695747e-06, "loss": 0.08020973205566406, "step": 1637 }, { "epoch": 0.22824496620915488, "grad_norm": 2.134516954421997, "learning_rate": 9.31333671931465e-06, "loss": 0.11276435852050781, "step": 1638 }, { "epoch": 0.22838430990036926, "grad_norm": 1.0950521230697632, "learning_rate": 9.312144245608608e-06, "loss": 0.13088512420654297, "step": 1639 }, { "epoch": 0.22852365359158364, "grad_norm": 0.901039183139801, "learning_rate": 9.31095081384256e-06, "loss": 0.11009407043457031, "step": 1640 }, { "epoch": 0.22866299728279801, "grad_norm": 1.7532869577407837, "learning_rate": 9.309756424281664e-06, "loss": 0.09267997741699219, "step": 1641 }, { "epoch": 0.2288023409740124, "grad_norm": 1.4396076202392578, "learning_rate": 9.308561077191284e-06, "loss": 0.08753776550292969, "step": 1642 }, { "epoch": 0.22894168466522677, "grad_norm": 0.5196347832679749, "learning_rate": 9.307364772837e-06, "loss": 0.07256889343261719, "step": 1643 }, { "epoch": 0.22908102835644117, "grad_norm": 0.730159342288971, "learning_rate": 9.306167511484601e-06, "loss": 0.0865621566772461, "step": 1644 }, { "epoch": 0.22922037204765555, "grad_norm": 0.8495734333992004, "learning_rate": 9.304969293400092e-06, "loss": 0.0943460464477539, "step": 1645 }, { "epoch": 0.22935971573886993, "grad_norm": 2.7938876152038574, "learning_rate": 9.303770118849692e-06, "loss": 0.10816001892089844, "step": 1646 }, { "epoch": 0.2294990594300843, "grad_norm": 1.4709690809249878, "learning_rate": 9.302569988099825e-06, "loss": 0.09195899963378906, "step": 1647 }, { "epoch": 0.22963840312129868, "grad_norm": 1.1733431816101074, "learning_rate": 9.301368901417138e-06, "loss": 0.09926414489746094, "step": 1648 }, { "epoch": 0.22977774681251306, "grad_norm": 1.1142432689666748, "learning_rate": 9.300166859068482e-06, "loss": 0.09260177612304688, "step": 1649 }, { "epoch": 0.22991709050372744, "grad_norm": 1.6664456129074097, "learning_rate": 9.298963861320927e-06, "loss": 0.10065937042236328, "step": 1650 }, { "epoch": 0.23005643419494182, "grad_norm": 0.8115018606185913, "learning_rate": 9.297759908441747e-06, "loss": 0.07280254364013672, "step": 1651 }, { "epoch": 0.2301957778861562, "grad_norm": 1.2497023344039917, "learning_rate": 9.296555000698435e-06, "loss": 0.08162879943847656, "step": 1652 }, { "epoch": 0.23033512157737057, "grad_norm": 0.951537549495697, "learning_rate": 9.295349138358693e-06, "loss": 0.07185173034667969, "step": 1653 }, { "epoch": 0.23047446526858498, "grad_norm": 1.0722837448120117, "learning_rate": 9.294142321690438e-06, "loss": 0.07679319381713867, "step": 1654 }, { "epoch": 0.23061380895979935, "grad_norm": 0.9292567372322083, "learning_rate": 9.292934550961796e-06, "loss": 0.0831451416015625, "step": 1655 }, { "epoch": 0.23075315265101373, "grad_norm": 0.940355122089386, "learning_rate": 9.291725826441107e-06, "loss": 0.12773895263671875, "step": 1656 }, { "epoch": 0.2308924963422281, "grad_norm": 1.475089192390442, "learning_rate": 9.29051614839692e-06, "loss": 0.08729362487792969, "step": 1657 }, { "epoch": 0.2310318400334425, "grad_norm": 0.7146948575973511, "learning_rate": 9.289305517098e-06, "loss": 0.07728004455566406, "step": 1658 }, { "epoch": 0.23117118372465686, "grad_norm": 0.8660123944282532, "learning_rate": 9.28809393281332e-06, "loss": 0.08976364135742188, "step": 1659 }, { "epoch": 0.23131052741587124, "grad_norm": 1.1090730428695679, "learning_rate": 9.286881395812066e-06, "loss": 0.11414146423339844, "step": 1660 }, { "epoch": 0.23144987110708562, "grad_norm": 0.8390616178512573, "learning_rate": 9.285667906363637e-06, "loss": 0.0832509994506836, "step": 1661 }, { "epoch": 0.2315892147983, "grad_norm": 1.05966055393219, "learning_rate": 9.284453464737644e-06, "loss": 0.07389163970947266, "step": 1662 }, { "epoch": 0.23172855848951437, "grad_norm": 1.3546181917190552, "learning_rate": 9.283238071203907e-06, "loss": 0.08850479125976562, "step": 1663 }, { "epoch": 0.23186790218072878, "grad_norm": 1.4432750940322876, "learning_rate": 9.282021726032457e-06, "loss": 0.07758140563964844, "step": 1664 }, { "epoch": 0.23200724587194316, "grad_norm": 0.8495595455169678, "learning_rate": 9.280804429493542e-06, "loss": 0.0946817398071289, "step": 1665 }, { "epoch": 0.23214658956315753, "grad_norm": 0.9880334138870239, "learning_rate": 9.279586181857613e-06, "loss": 0.09282684326171875, "step": 1666 }, { "epoch": 0.2322859332543719, "grad_norm": 1.1940969228744507, "learning_rate": 9.278366983395341e-06, "loss": 0.0823974609375, "step": 1667 }, { "epoch": 0.2324252769455863, "grad_norm": 2.0361294746398926, "learning_rate": 9.277146834377601e-06, "loss": 0.11321544647216797, "step": 1668 }, { "epoch": 0.23256462063680067, "grad_norm": 1.4084041118621826, "learning_rate": 9.275925735075484e-06, "loss": 0.07938003540039062, "step": 1669 }, { "epoch": 0.23270396432801504, "grad_norm": 0.7688978910446167, "learning_rate": 9.274703685760287e-06, "loss": 0.06725692749023438, "step": 1670 }, { "epoch": 0.23284330801922942, "grad_norm": 1.1742947101593018, "learning_rate": 9.273480686703526e-06, "loss": 0.07966804504394531, "step": 1671 }, { "epoch": 0.2329826517104438, "grad_norm": 0.6260557770729065, "learning_rate": 9.272256738176924e-06, "loss": 0.06287574768066406, "step": 1672 }, { "epoch": 0.23312199540165818, "grad_norm": 1.898645281791687, "learning_rate": 9.271031840452409e-06, "loss": 0.1383514404296875, "step": 1673 }, { "epoch": 0.23326133909287258, "grad_norm": 1.0083736181259155, "learning_rate": 9.26980599380213e-06, "loss": 0.07761955261230469, "step": 1674 }, { "epoch": 0.23340068278408696, "grad_norm": 0.9944248199462891, "learning_rate": 9.268579198498438e-06, "loss": 0.09881019592285156, "step": 1675 }, { "epoch": 0.23354002647530134, "grad_norm": 1.004789113998413, "learning_rate": 9.267351454813904e-06, "loss": 0.10803031921386719, "step": 1676 }, { "epoch": 0.23367937016651572, "grad_norm": 0.8441436886787415, "learning_rate": 9.266122763021302e-06, "loss": 0.08836841583251953, "step": 1677 }, { "epoch": 0.2338187138577301, "grad_norm": 0.9757235050201416, "learning_rate": 9.264893123393618e-06, "loss": 0.09949111938476562, "step": 1678 }, { "epoch": 0.23395805754894447, "grad_norm": 0.6809691786766052, "learning_rate": 9.26366253620405e-06, "loss": 0.09030437469482422, "step": 1679 }, { "epoch": 0.23409740124015885, "grad_norm": 0.6288127303123474, "learning_rate": 9.26243100172601e-06, "loss": 0.08231830596923828, "step": 1680 }, { "epoch": 0.23423674493137323, "grad_norm": 1.7697229385375977, "learning_rate": 9.261198520233113e-06, "loss": 0.10410022735595703, "step": 1681 }, { "epoch": 0.2343760886225876, "grad_norm": 0.8280476331710815, "learning_rate": 9.25996509199919e-06, "loss": 0.08893585205078125, "step": 1682 }, { "epoch": 0.23451543231380198, "grad_norm": 0.5889497995376587, "learning_rate": 9.258730717298281e-06, "loss": 0.08195781707763672, "step": 1683 }, { "epoch": 0.23465477600501639, "grad_norm": 0.760927677154541, "learning_rate": 9.257495396404635e-06, "loss": 0.10475349426269531, "step": 1684 }, { "epoch": 0.23479411969623076, "grad_norm": 0.7881961464881897, "learning_rate": 9.256259129592711e-06, "loss": 0.08480072021484375, "step": 1685 }, { "epoch": 0.23493346338744514, "grad_norm": 0.9531662464141846, "learning_rate": 9.255021917137181e-06, "loss": 0.10332202911376953, "step": 1686 }, { "epoch": 0.23507280707865952, "grad_norm": 0.6824658513069153, "learning_rate": 9.253783759312924e-06, "loss": 0.10327625274658203, "step": 1687 }, { "epoch": 0.2352121507698739, "grad_norm": 0.7474476099014282, "learning_rate": 9.252544656395033e-06, "loss": 0.08447456359863281, "step": 1688 }, { "epoch": 0.23535149446108827, "grad_norm": 0.9935675263404846, "learning_rate": 9.251304608658806e-06, "loss": 0.0895547866821289, "step": 1689 }, { "epoch": 0.23549083815230265, "grad_norm": 1.3968757390975952, "learning_rate": 9.250063616379754e-06, "loss": 0.12171554565429688, "step": 1690 }, { "epoch": 0.23563018184351703, "grad_norm": 0.9317313432693481, "learning_rate": 9.248821679833596e-06, "loss": 0.1028280258178711, "step": 1691 }, { "epoch": 0.2357695255347314, "grad_norm": 1.2941813468933105, "learning_rate": 9.247578799296263e-06, "loss": 0.07306098937988281, "step": 1692 }, { "epoch": 0.23590886922594578, "grad_norm": 1.0108516216278076, "learning_rate": 9.246334975043896e-06, "loss": 0.07805728912353516, "step": 1693 }, { "epoch": 0.2360482129171602, "grad_norm": 0.8033221364021301, "learning_rate": 9.245090207352842e-06, "loss": 0.09640884399414062, "step": 1694 }, { "epoch": 0.23618755660837457, "grad_norm": 1.3978688716888428, "learning_rate": 9.243844496499661e-06, "loss": 0.105926513671875, "step": 1695 }, { "epoch": 0.23632690029958894, "grad_norm": 0.6376741528511047, "learning_rate": 9.242597842761123e-06, "loss": 0.0861349105834961, "step": 1696 }, { "epoch": 0.23646624399080332, "grad_norm": 0.7288745641708374, "learning_rate": 9.241350246414203e-06, "loss": 0.10024642944335938, "step": 1697 }, { "epoch": 0.2366055876820177, "grad_norm": 1.3727952241897583, "learning_rate": 9.24010170773609e-06, "loss": 0.0909576416015625, "step": 1698 }, { "epoch": 0.23674493137323208, "grad_norm": 1.9363782405853271, "learning_rate": 9.23885222700418e-06, "loss": 0.11940574645996094, "step": 1699 }, { "epoch": 0.23688427506444645, "grad_norm": 1.3783639669418335, "learning_rate": 9.237601804496081e-06, "loss": 0.08932113647460938, "step": 1700 }, { "epoch": 0.23702361875566083, "grad_norm": 1.426979422569275, "learning_rate": 9.236350440489608e-06, "loss": 0.0912933349609375, "step": 1701 }, { "epoch": 0.2371629624468752, "grad_norm": 1.1881811618804932, "learning_rate": 9.235098135262783e-06, "loss": 0.07733631134033203, "step": 1702 }, { "epoch": 0.23730230613808959, "grad_norm": 0.8795827627182007, "learning_rate": 9.233844889093842e-06, "loss": 0.09896278381347656, "step": 1703 }, { "epoch": 0.237441649829304, "grad_norm": 1.0192174911499023, "learning_rate": 9.232590702261227e-06, "loss": 0.09612751007080078, "step": 1704 }, { "epoch": 0.23758099352051837, "grad_norm": 1.3781239986419678, "learning_rate": 9.23133557504359e-06, "loss": 0.09830474853515625, "step": 1705 }, { "epoch": 0.23772033721173275, "grad_norm": 2.36914324760437, "learning_rate": 9.23007950771979e-06, "loss": 0.11327743530273438, "step": 1706 }, { "epoch": 0.23785968090294712, "grad_norm": 0.9153456687927246, "learning_rate": 9.228822500568898e-06, "loss": 0.09378623962402344, "step": 1707 }, { "epoch": 0.2379990245941615, "grad_norm": 0.9128476977348328, "learning_rate": 9.227564553870192e-06, "loss": 0.10300827026367188, "step": 1708 }, { "epoch": 0.23813836828537588, "grad_norm": 0.6403136253356934, "learning_rate": 9.226305667903159e-06, "loss": 0.10736274719238281, "step": 1709 }, { "epoch": 0.23827771197659026, "grad_norm": 1.3574599027633667, "learning_rate": 9.225045842947496e-06, "loss": 0.08393669128417969, "step": 1710 }, { "epoch": 0.23841705566780463, "grad_norm": 0.49734076857566833, "learning_rate": 9.223785079283106e-06, "loss": 0.06844043731689453, "step": 1711 }, { "epoch": 0.238556399359019, "grad_norm": 0.8656529784202576, "learning_rate": 9.2225233771901e-06, "loss": 0.09414815902709961, "step": 1712 }, { "epoch": 0.2386957430502334, "grad_norm": 1.429422378540039, "learning_rate": 9.221260736948803e-06, "loss": 0.08460044860839844, "step": 1713 }, { "epoch": 0.2388350867414478, "grad_norm": 3.2885923385620117, "learning_rate": 9.219997158839743e-06, "loss": 0.08250570297241211, "step": 1714 }, { "epoch": 0.23897443043266217, "grad_norm": 0.9422280192375183, "learning_rate": 9.21873264314366e-06, "loss": 0.09022808074951172, "step": 1715 }, { "epoch": 0.23911377412387655, "grad_norm": 0.7681655287742615, "learning_rate": 9.217467190141498e-06, "loss": 0.07285547256469727, "step": 1716 }, { "epoch": 0.23925311781509093, "grad_norm": 0.942045271396637, "learning_rate": 9.216200800114412e-06, "loss": 0.08348274230957031, "step": 1717 }, { "epoch": 0.2393924615063053, "grad_norm": 1.6744635105133057, "learning_rate": 9.214933473343765e-06, "loss": 0.0922088623046875, "step": 1718 }, { "epoch": 0.23953180519751968, "grad_norm": 0.7921797633171082, "learning_rate": 9.213665210111131e-06, "loss": 0.09058094024658203, "step": 1719 }, { "epoch": 0.23967114888873406, "grad_norm": 0.8447902202606201, "learning_rate": 9.212396010698286e-06, "loss": 0.1015167236328125, "step": 1720 }, { "epoch": 0.23981049257994844, "grad_norm": 0.5179187059402466, "learning_rate": 9.211125875387217e-06, "loss": 0.06358718872070312, "step": 1721 }, { "epoch": 0.23994983627116281, "grad_norm": 0.8657225966453552, "learning_rate": 9.209854804460121e-06, "loss": 0.06951761245727539, "step": 1722 }, { "epoch": 0.2400891799623772, "grad_norm": 1.0553314685821533, "learning_rate": 9.208582798199402e-06, "loss": 0.11070632934570312, "step": 1723 }, { "epoch": 0.2402285236535916, "grad_norm": 0.8581626415252686, "learning_rate": 9.207309856887664e-06, "loss": 0.08954811096191406, "step": 1724 }, { "epoch": 0.24036786734480597, "grad_norm": 1.3563334941864014, "learning_rate": 9.206035980807734e-06, "loss": 0.09879493713378906, "step": 1725 }, { "epoch": 0.24050721103602035, "grad_norm": 0.7521841526031494, "learning_rate": 9.204761170242635e-06, "loss": 0.07719564437866211, "step": 1726 }, { "epoch": 0.24064655472723473, "grad_norm": 0.9176639318466187, "learning_rate": 9.203485425475598e-06, "loss": 0.08045387268066406, "step": 1727 }, { "epoch": 0.2407858984184491, "grad_norm": 0.6497876048088074, "learning_rate": 9.202208746790069e-06, "loss": 0.06556892395019531, "step": 1728 }, { "epoch": 0.24092524210966348, "grad_norm": 0.6784340739250183, "learning_rate": 9.200931134469692e-06, "loss": 0.07561302185058594, "step": 1729 }, { "epoch": 0.24106458580087786, "grad_norm": 0.5992065072059631, "learning_rate": 9.199652588798327e-06, "loss": 0.08551406860351562, "step": 1730 }, { "epoch": 0.24120392949209224, "grad_norm": 0.8779336810112, "learning_rate": 9.198373110060037e-06, "loss": 0.07604742050170898, "step": 1731 }, { "epoch": 0.24134327318330662, "grad_norm": 1.167301058769226, "learning_rate": 9.197092698539092e-06, "loss": 0.10158061981201172, "step": 1732 }, { "epoch": 0.241482616874521, "grad_norm": 1.7958351373672485, "learning_rate": 9.19581135451997e-06, "loss": 0.11013031005859375, "step": 1733 }, { "epoch": 0.2416219605657354, "grad_norm": 0.8901804685592651, "learning_rate": 9.194529078287358e-06, "loss": 0.07967758178710938, "step": 1734 }, { "epoch": 0.24176130425694978, "grad_norm": 0.7320822477340698, "learning_rate": 9.193245870126147e-06, "loss": 0.09058284759521484, "step": 1735 }, { "epoch": 0.24190064794816415, "grad_norm": 1.4560117721557617, "learning_rate": 9.191961730321437e-06, "loss": 0.08716773986816406, "step": 1736 }, { "epoch": 0.24203999163937853, "grad_norm": 1.5421178340911865, "learning_rate": 9.190676659158535e-06, "loss": 0.086578369140625, "step": 1737 }, { "epoch": 0.2421793353305929, "grad_norm": 0.9126785397529602, "learning_rate": 9.189390656922955e-06, "loss": 0.07844281196594238, "step": 1738 }, { "epoch": 0.2423186790218073, "grad_norm": 0.9173855185508728, "learning_rate": 9.188103723900414e-06, "loss": 0.0995321273803711, "step": 1739 }, { "epoch": 0.24245802271302166, "grad_norm": 0.9028423428535461, "learning_rate": 9.186815860376843e-06, "loss": 0.08171844482421875, "step": 1740 }, { "epoch": 0.24259736640423604, "grad_norm": 0.7732092142105103, "learning_rate": 9.185527066638375e-06, "loss": 0.09660530090332031, "step": 1741 }, { "epoch": 0.24273671009545042, "grad_norm": 0.8160417675971985, "learning_rate": 9.184237342971349e-06, "loss": 0.0881967544555664, "step": 1742 }, { "epoch": 0.2428760537866648, "grad_norm": 0.9828669428825378, "learning_rate": 9.182946689662314e-06, "loss": 0.10031414031982422, "step": 1743 }, { "epoch": 0.2430153974778792, "grad_norm": 1.403334617614746, "learning_rate": 9.181655106998023e-06, "loss": 0.12142753601074219, "step": 1744 }, { "epoch": 0.24315474116909358, "grad_norm": 0.6640063524246216, "learning_rate": 9.180362595265435e-06, "loss": 0.07888507843017578, "step": 1745 }, { "epoch": 0.24329408486030796, "grad_norm": 1.016838550567627, "learning_rate": 9.179069154751718e-06, "loss": 0.08187675476074219, "step": 1746 }, { "epoch": 0.24343342855152234, "grad_norm": 0.9142103791236877, "learning_rate": 9.177774785744245e-06, "loss": 0.07701683044433594, "step": 1747 }, { "epoch": 0.2435727722427367, "grad_norm": 1.1025516986846924, "learning_rate": 9.176479488530594e-06, "loss": 0.07732963562011719, "step": 1748 }, { "epoch": 0.2437121159339511, "grad_norm": 0.9682011008262634, "learning_rate": 9.175183263398553e-06, "loss": 0.06744778156280518, "step": 1749 }, { "epoch": 0.24385145962516547, "grad_norm": 1.1069037914276123, "learning_rate": 9.17388611063611e-06, "loss": 0.09107589721679688, "step": 1750 }, { "epoch": 0.24399080331637985, "grad_norm": 1.721082329750061, "learning_rate": 9.172588030531467e-06, "loss": 0.10016059875488281, "step": 1751 }, { "epoch": 0.24413014700759422, "grad_norm": 0.9421934485435486, "learning_rate": 9.171289023373022e-06, "loss": 0.10459709167480469, "step": 1752 }, { "epoch": 0.2442694906988086, "grad_norm": 1.1825308799743652, "learning_rate": 9.16998908944939e-06, "loss": 0.09006881713867188, "step": 1753 }, { "epoch": 0.244408834390023, "grad_norm": 0.9808595180511475, "learning_rate": 9.168688229049386e-06, "loss": 0.07540702819824219, "step": 1754 }, { "epoch": 0.24454817808123738, "grad_norm": 1.0370088815689087, "learning_rate": 9.167386442462029e-06, "loss": 0.08497047424316406, "step": 1755 }, { "epoch": 0.24468752177245176, "grad_norm": 0.8556009531021118, "learning_rate": 9.166083729976547e-06, "loss": 0.08598041534423828, "step": 1756 }, { "epoch": 0.24482686546366614, "grad_norm": 0.52109694480896, "learning_rate": 9.164780091882374e-06, "loss": 0.07072162628173828, "step": 1757 }, { "epoch": 0.24496620915488052, "grad_norm": 0.7801805734634399, "learning_rate": 9.163475528469148e-06, "loss": 0.0783243179321289, "step": 1758 }, { "epoch": 0.2451055528460949, "grad_norm": 0.6623516082763672, "learning_rate": 9.162170040026714e-06, "loss": 0.06866455078125, "step": 1759 }, { "epoch": 0.24524489653730927, "grad_norm": 0.8453050255775452, "learning_rate": 9.16086362684512e-06, "loss": 0.10574722290039062, "step": 1760 }, { "epoch": 0.24538424022852365, "grad_norm": 1.4172288179397583, "learning_rate": 9.159556289214623e-06, "loss": 0.10000038146972656, "step": 1761 }, { "epoch": 0.24552358391973803, "grad_norm": 0.9608826637268066, "learning_rate": 9.158248027425683e-06, "loss": 0.07860660552978516, "step": 1762 }, { "epoch": 0.2456629276109524, "grad_norm": 1.2490230798721313, "learning_rate": 9.156938841768965e-06, "loss": 0.0867319107055664, "step": 1763 }, { "epoch": 0.2458022713021668, "grad_norm": 0.5936671495437622, "learning_rate": 9.155628732535342e-06, "loss": 0.0773773193359375, "step": 1764 }, { "epoch": 0.24594161499338119, "grad_norm": 1.1259030103683472, "learning_rate": 9.15431770001589e-06, "loss": 0.09623527526855469, "step": 1765 }, { "epoch": 0.24608095868459556, "grad_norm": 1.977782964706421, "learning_rate": 9.153005744501886e-06, "loss": 0.10117244720458984, "step": 1766 }, { "epoch": 0.24622030237580994, "grad_norm": 1.334829330444336, "learning_rate": 9.151692866284824e-06, "loss": 0.10811996459960938, "step": 1767 }, { "epoch": 0.24635964606702432, "grad_norm": 0.971108615398407, "learning_rate": 9.150379065656389e-06, "loss": 0.06541824340820312, "step": 1768 }, { "epoch": 0.2464989897582387, "grad_norm": 0.7441234588623047, "learning_rate": 9.149064342908482e-06, "loss": 0.10500240325927734, "step": 1769 }, { "epoch": 0.24663833344945307, "grad_norm": 1.1749356985092163, "learning_rate": 9.147748698333203e-06, "loss": 0.10256767272949219, "step": 1770 }, { "epoch": 0.24677767714066745, "grad_norm": 1.5009902715682983, "learning_rate": 9.146432132222858e-06, "loss": 0.09047698974609375, "step": 1771 }, { "epoch": 0.24691702083188183, "grad_norm": 0.6979109644889832, "learning_rate": 9.145114644869957e-06, "loss": 0.08395195007324219, "step": 1772 }, { "epoch": 0.2470563645230962, "grad_norm": 2.178696393966675, "learning_rate": 9.143796236567218e-06, "loss": 0.11443901062011719, "step": 1773 }, { "epoch": 0.24719570821431058, "grad_norm": 1.0033208131790161, "learning_rate": 9.142476907607558e-06, "loss": 0.08740425109863281, "step": 1774 }, { "epoch": 0.247335051905525, "grad_norm": 1.1641026735305786, "learning_rate": 9.141156658284104e-06, "loss": 0.07776546478271484, "step": 1775 }, { "epoch": 0.24747439559673937, "grad_norm": 0.7964776754379272, "learning_rate": 9.139835488890186e-06, "loss": 0.09384822845458984, "step": 1776 }, { "epoch": 0.24761373928795374, "grad_norm": 1.0665265321731567, "learning_rate": 9.138513399719335e-06, "loss": 0.11553764343261719, "step": 1777 }, { "epoch": 0.24775308297916812, "grad_norm": 1.3394687175750732, "learning_rate": 9.13719039106529e-06, "loss": 0.09081459045410156, "step": 1778 }, { "epoch": 0.2478924266703825, "grad_norm": 0.9653643369674683, "learning_rate": 9.135866463221994e-06, "loss": 0.09300804138183594, "step": 1779 }, { "epoch": 0.24803177036159688, "grad_norm": 1.1060453653335571, "learning_rate": 9.134541616483594e-06, "loss": 0.11513710021972656, "step": 1780 }, { "epoch": 0.24817111405281125, "grad_norm": 1.618920087814331, "learning_rate": 9.13321585114444e-06, "loss": 0.09765243530273438, "step": 1781 }, { "epoch": 0.24831045774402563, "grad_norm": 1.2811089754104614, "learning_rate": 9.131889167499086e-06, "loss": 0.06717109680175781, "step": 1782 }, { "epoch": 0.24844980143524, "grad_norm": 1.2087663412094116, "learning_rate": 9.130561565842293e-06, "loss": 0.09294605255126953, "step": 1783 }, { "epoch": 0.24858914512645439, "grad_norm": 0.7612498998641968, "learning_rate": 9.129233046469021e-06, "loss": 0.07996463775634766, "step": 1784 }, { "epoch": 0.2487284888176688, "grad_norm": 1.1102029085159302, "learning_rate": 9.12790360967444e-06, "loss": 0.09653854370117188, "step": 1785 }, { "epoch": 0.24886783250888317, "grad_norm": 1.4518768787384033, "learning_rate": 9.126573255753917e-06, "loss": 0.10498619079589844, "step": 1786 }, { "epoch": 0.24900717620009755, "grad_norm": 2.278818368911743, "learning_rate": 9.125241985003028e-06, "loss": 0.13312149047851562, "step": 1787 }, { "epoch": 0.24914651989131192, "grad_norm": 1.3037258386611938, "learning_rate": 9.123909797717551e-06, "loss": 0.11467742919921875, "step": 1788 }, { "epoch": 0.2492858635825263, "grad_norm": 2.268294095993042, "learning_rate": 9.122576694193467e-06, "loss": 0.0800018310546875, "step": 1789 }, { "epoch": 0.24942520727374068, "grad_norm": 2.4186923503875732, "learning_rate": 9.121242674726962e-06, "loss": 0.10664939880371094, "step": 1790 }, { "epoch": 0.24956455096495506, "grad_norm": 2.136061906814575, "learning_rate": 9.119907739614424e-06, "loss": 0.08829927444458008, "step": 1791 }, { "epoch": 0.24970389465616943, "grad_norm": 0.8684359192848206, "learning_rate": 9.118571889152445e-06, "loss": 0.09722137451171875, "step": 1792 }, { "epoch": 0.2498432383473838, "grad_norm": 1.3503764867782593, "learning_rate": 9.117235123637822e-06, "loss": 0.10141754150390625, "step": 1793 }, { "epoch": 0.2499825820385982, "grad_norm": 0.826618492603302, "learning_rate": 9.115897443367552e-06, "loss": 0.06499099731445312, "step": 1794 }, { "epoch": 0.25012192572981257, "grad_norm": 0.7864688634872437, "learning_rate": 9.114558848638836e-06, "loss": 0.06921195983886719, "step": 1795 }, { "epoch": 0.25026126942102694, "grad_norm": 0.8790112733840942, "learning_rate": 9.113219339749084e-06, "loss": 0.0753641128540039, "step": 1796 }, { "epoch": 0.2504006131122413, "grad_norm": 0.6454351544380188, "learning_rate": 9.1118789169959e-06, "loss": 0.0931692123413086, "step": 1797 }, { "epoch": 0.2505399568034557, "grad_norm": 0.7588014006614685, "learning_rate": 9.110537580677094e-06, "loss": 0.09242916107177734, "step": 1798 }, { "epoch": 0.2506793004946701, "grad_norm": 0.977644145488739, "learning_rate": 9.109195331090685e-06, "loss": 0.08349800109863281, "step": 1799 }, { "epoch": 0.2508186441858845, "grad_norm": 1.1096550226211548, "learning_rate": 9.10785216853489e-06, "loss": 0.08523178100585938, "step": 1800 }, { "epoch": 0.2509579878770989, "grad_norm": 0.7297939658164978, "learning_rate": 9.106508093308123e-06, "loss": 0.07859230041503906, "step": 1801 }, { "epoch": 0.25109733156831326, "grad_norm": 1.0535626411437988, "learning_rate": 9.105163105709011e-06, "loss": 0.07726192474365234, "step": 1802 }, { "epoch": 0.25123667525952764, "grad_norm": 1.7447675466537476, "learning_rate": 9.103817206036383e-06, "loss": 0.1333026885986328, "step": 1803 }, { "epoch": 0.251376018950742, "grad_norm": 0.9277600049972534, "learning_rate": 9.10247039458926e-06, "loss": 0.0848846435546875, "step": 1804 }, { "epoch": 0.2515153626419564, "grad_norm": 1.0854343175888062, "learning_rate": 9.101122671666878e-06, "loss": 0.07810211181640625, "step": 1805 }, { "epoch": 0.2516547063331708, "grad_norm": 1.0454293489456177, "learning_rate": 9.09977403756867e-06, "loss": 0.07900285720825195, "step": 1806 }, { "epoch": 0.25179405002438515, "grad_norm": 0.7537268996238708, "learning_rate": 9.098424492594268e-06, "loss": 0.07356071472167969, "step": 1807 }, { "epoch": 0.25193339371559953, "grad_norm": 0.6957730650901794, "learning_rate": 9.097074037043512e-06, "loss": 0.08463287353515625, "step": 1808 }, { "epoch": 0.2520727374068139, "grad_norm": 1.0358058214187622, "learning_rate": 9.095722671216443e-06, "loss": 0.0903482437133789, "step": 1809 }, { "epoch": 0.2522120810980283, "grad_norm": 1.5469472408294678, "learning_rate": 9.094370395413306e-06, "loss": 0.13394927978515625, "step": 1810 }, { "epoch": 0.25235142478924266, "grad_norm": 2.0590686798095703, "learning_rate": 9.09301720993454e-06, "loss": 0.13013458251953125, "step": 1811 }, { "epoch": 0.25249076848045704, "grad_norm": 1.1034537553787231, "learning_rate": 9.091663115080797e-06, "loss": 0.09582901000976562, "step": 1812 }, { "epoch": 0.2526301121716714, "grad_norm": 1.1897519826889038, "learning_rate": 9.090308111152924e-06, "loss": 0.09058094024658203, "step": 1813 }, { "epoch": 0.2527694558628858, "grad_norm": 1.779200553894043, "learning_rate": 9.08895219845197e-06, "loss": 0.10949134826660156, "step": 1814 }, { "epoch": 0.25290879955410017, "grad_norm": 1.2912291288375854, "learning_rate": 9.087595377279192e-06, "loss": 0.08861541748046875, "step": 1815 }, { "epoch": 0.25304814324531455, "grad_norm": 1.683497428894043, "learning_rate": 9.086237647936043e-06, "loss": 0.1076202392578125, "step": 1816 }, { "epoch": 0.2531874869365289, "grad_norm": 1.5914033651351929, "learning_rate": 9.084879010724177e-06, "loss": 0.12421131134033203, "step": 1817 }, { "epoch": 0.2533268306277433, "grad_norm": 1.4590450525283813, "learning_rate": 9.083519465945456e-06, "loss": 0.07975006103515625, "step": 1818 }, { "epoch": 0.2534661743189577, "grad_norm": 1.7417349815368652, "learning_rate": 9.082159013901937e-06, "loss": 0.09181594848632812, "step": 1819 }, { "epoch": 0.2536055180101721, "grad_norm": 1.5016816854476929, "learning_rate": 9.080797654895883e-06, "loss": 0.11702537536621094, "step": 1820 }, { "epoch": 0.2537448617013865, "grad_norm": 1.0113550424575806, "learning_rate": 9.079435389229755e-06, "loss": 0.10206222534179688, "step": 1821 }, { "epoch": 0.25388420539260087, "grad_norm": 1.1953781843185425, "learning_rate": 9.07807221720622e-06, "loss": 0.09650611877441406, "step": 1822 }, { "epoch": 0.25402354908381525, "grad_norm": 0.5797760486602783, "learning_rate": 9.07670813912814e-06, "loss": 0.06418275833129883, "step": 1823 }, { "epoch": 0.2541628927750296, "grad_norm": 1.134065866470337, "learning_rate": 9.075343155298589e-06, "loss": 0.0939788818359375, "step": 1824 }, { "epoch": 0.254302236466244, "grad_norm": 0.5990044474601746, "learning_rate": 9.073977266020826e-06, "loss": 0.07845449447631836, "step": 1825 }, { "epoch": 0.2544415801574584, "grad_norm": 1.1297277212142944, "learning_rate": 9.072610471598327e-06, "loss": 0.10096549987792969, "step": 1826 }, { "epoch": 0.25458092384867276, "grad_norm": 1.2930301427841187, "learning_rate": 9.07124277233476e-06, "loss": 0.07662343978881836, "step": 1827 }, { "epoch": 0.25472026753988714, "grad_norm": 1.5174939632415771, "learning_rate": 9.069874168533996e-06, "loss": 0.10560798645019531, "step": 1828 }, { "epoch": 0.2548596112311015, "grad_norm": 1.1403822898864746, "learning_rate": 9.068504660500111e-06, "loss": 0.08518791198730469, "step": 1829 }, { "epoch": 0.2549989549223159, "grad_norm": 0.8549230098724365, "learning_rate": 9.067134248537374e-06, "loss": 0.08364582061767578, "step": 1830 }, { "epoch": 0.25513829861353027, "grad_norm": 0.8359103202819824, "learning_rate": 9.065762932950262e-06, "loss": 0.09833145141601562, "step": 1831 }, { "epoch": 0.25527764230474465, "grad_norm": 1.1406655311584473, "learning_rate": 9.06439071404345e-06, "loss": 0.0821075439453125, "step": 1832 }, { "epoch": 0.255416985995959, "grad_norm": 1.1921743154525757, "learning_rate": 9.063017592121812e-06, "loss": 0.0826120376586914, "step": 1833 }, { "epoch": 0.2555563296871734, "grad_norm": 1.6149479150772095, "learning_rate": 9.061643567490425e-06, "loss": 0.10055303573608398, "step": 1834 }, { "epoch": 0.2556956733783878, "grad_norm": 2.0450656414031982, "learning_rate": 9.060268640454565e-06, "loss": 0.1287841796875, "step": 1835 }, { "epoch": 0.25583501706960216, "grad_norm": 1.0270569324493408, "learning_rate": 9.058892811319713e-06, "loss": 0.09480667114257812, "step": 1836 }, { "epoch": 0.25597436076081653, "grad_norm": 1.860572099685669, "learning_rate": 9.057516080391544e-06, "loss": 0.08600616455078125, "step": 1837 }, { "epoch": 0.2561137044520309, "grad_norm": 1.0132607221603394, "learning_rate": 9.056138447975936e-06, "loss": 0.09083318710327148, "step": 1838 }, { "epoch": 0.2562530481432453, "grad_norm": 1.6892234086990356, "learning_rate": 9.05475991437897e-06, "loss": 0.07625770568847656, "step": 1839 }, { "epoch": 0.2563923918344597, "grad_norm": 2.0547101497650146, "learning_rate": 9.053380479906919e-06, "loss": 0.1085662841796875, "step": 1840 }, { "epoch": 0.2565317355256741, "grad_norm": 0.8111345171928406, "learning_rate": 9.052000144866269e-06, "loss": 0.07961463928222656, "step": 1841 }, { "epoch": 0.2566710792168885, "grad_norm": 0.792777419090271, "learning_rate": 9.050618909563693e-06, "loss": 0.08237838745117188, "step": 1842 }, { "epoch": 0.25681042290810285, "grad_norm": 0.6582019925117493, "learning_rate": 9.049236774306073e-06, "loss": 0.07786941528320312, "step": 1843 }, { "epoch": 0.25694976659931723, "grad_norm": 0.7740215063095093, "learning_rate": 9.04785373940049e-06, "loss": 0.08020591735839844, "step": 1844 }, { "epoch": 0.2570891102905316, "grad_norm": 1.1032124757766724, "learning_rate": 9.046469805154218e-06, "loss": 0.09979629516601562, "step": 1845 }, { "epoch": 0.257228453981746, "grad_norm": 0.7411170601844788, "learning_rate": 9.045084971874738e-06, "loss": 0.09519195556640625, "step": 1846 }, { "epoch": 0.25736779767296036, "grad_norm": 1.1324962377548218, "learning_rate": 9.043699239869727e-06, "loss": 0.09179496765136719, "step": 1847 }, { "epoch": 0.25750714136417474, "grad_norm": 0.836621880531311, "learning_rate": 9.042312609447066e-06, "loss": 0.08917808532714844, "step": 1848 }, { "epoch": 0.2576464850553891, "grad_norm": 1.3155337572097778, "learning_rate": 9.040925080914832e-06, "loss": 0.0912313461303711, "step": 1849 }, { "epoch": 0.2577858287466035, "grad_norm": 1.8869633674621582, "learning_rate": 9.039536654581297e-06, "loss": 0.11069297790527344, "step": 1850 }, { "epoch": 0.2579251724378179, "grad_norm": 0.6954725384712219, "learning_rate": 9.038147330754944e-06, "loss": 0.07722187042236328, "step": 1851 }, { "epoch": 0.25806451612903225, "grad_norm": 0.9358587861061096, "learning_rate": 9.036757109744447e-06, "loss": 0.08288002014160156, "step": 1852 }, { "epoch": 0.25820385982024663, "grad_norm": 0.9146438241004944, "learning_rate": 9.035365991858679e-06, "loss": 0.09528732299804688, "step": 1853 }, { "epoch": 0.258343203511461, "grad_norm": 1.735120415687561, "learning_rate": 9.033973977406718e-06, "loss": 0.11772727966308594, "step": 1854 }, { "epoch": 0.2584825472026754, "grad_norm": 0.556697428226471, "learning_rate": 9.032581066697836e-06, "loss": 0.0792531967163086, "step": 1855 }, { "epoch": 0.25862189089388976, "grad_norm": 1.0190846920013428, "learning_rate": 9.031187260041505e-06, "loss": 0.09375333786010742, "step": 1856 }, { "epoch": 0.25876123458510414, "grad_norm": 0.8517302870750427, "learning_rate": 9.0297925577474e-06, "loss": 0.08422279357910156, "step": 1857 }, { "epoch": 0.2589005782763185, "grad_norm": 0.6715866923332214, "learning_rate": 9.028396960125392e-06, "loss": 0.08108139038085938, "step": 1858 }, { "epoch": 0.2590399219675329, "grad_norm": 1.042622685432434, "learning_rate": 9.027000467485547e-06, "loss": 0.0911417007446289, "step": 1859 }, { "epoch": 0.2591792656587473, "grad_norm": 0.5857561230659485, "learning_rate": 9.025603080138136e-06, "loss": 0.08611106872558594, "step": 1860 }, { "epoch": 0.2593186093499617, "grad_norm": 1.8329181671142578, "learning_rate": 9.024204798393627e-06, "loss": 0.08861446380615234, "step": 1861 }, { "epoch": 0.2594579530411761, "grad_norm": 0.8327253460884094, "learning_rate": 9.022805622562687e-06, "loss": 0.09306144714355469, "step": 1862 }, { "epoch": 0.25959729673239046, "grad_norm": 0.6564353108406067, "learning_rate": 9.02140555295618e-06, "loss": 0.08687233924865723, "step": 1863 }, { "epoch": 0.25973664042360484, "grad_norm": 0.8139690160751343, "learning_rate": 9.020004589885167e-06, "loss": 0.08061027526855469, "step": 1864 }, { "epoch": 0.2598759841148192, "grad_norm": 0.5785820484161377, "learning_rate": 9.018602733660915e-06, "loss": 0.07667732238769531, "step": 1865 }, { "epoch": 0.2600153278060336, "grad_norm": 0.5958017706871033, "learning_rate": 9.01719998459488e-06, "loss": 0.07225751876831055, "step": 1866 }, { "epoch": 0.26015467149724797, "grad_norm": 0.6546416878700256, "learning_rate": 9.015796342998724e-06, "loss": 0.06481647491455078, "step": 1867 }, { "epoch": 0.26029401518846235, "grad_norm": 0.6383397579193115, "learning_rate": 9.014391809184302e-06, "loss": 0.07485342025756836, "step": 1868 }, { "epoch": 0.2604333588796767, "grad_norm": 1.0305694341659546, "learning_rate": 9.01298638346367e-06, "loss": 0.07793951034545898, "step": 1869 }, { "epoch": 0.2605727025708911, "grad_norm": 0.9338133335113525, "learning_rate": 9.011580066149081e-06, "loss": 0.09338569641113281, "step": 1870 }, { "epoch": 0.2607120462621055, "grad_norm": 1.4360418319702148, "learning_rate": 9.010172857552989e-06, "loss": 0.12504005432128906, "step": 1871 }, { "epoch": 0.26085138995331986, "grad_norm": 0.9553367495536804, "learning_rate": 9.008764757988042e-06, "loss": 0.06733417510986328, "step": 1872 }, { "epoch": 0.26099073364453423, "grad_norm": 0.875741183757782, "learning_rate": 9.007355767767085e-06, "loss": 0.07590770721435547, "step": 1873 }, { "epoch": 0.2611300773357486, "grad_norm": 1.9587358236312866, "learning_rate": 9.005945887203167e-06, "loss": 0.10192108154296875, "step": 1874 }, { "epoch": 0.261269421026963, "grad_norm": 0.7367907762527466, "learning_rate": 9.004535116609532e-06, "loss": 0.08180427551269531, "step": 1875 }, { "epoch": 0.26140876471817737, "grad_norm": 1.4912610054016113, "learning_rate": 9.003123456299617e-06, "loss": 0.11049652099609375, "step": 1876 }, { "epoch": 0.26154810840939174, "grad_norm": 1.3546006679534912, "learning_rate": 9.001710906587064e-06, "loss": 0.12143611907958984, "step": 1877 }, { "epoch": 0.2616874521006061, "grad_norm": 1.3133600950241089, "learning_rate": 9.000297467785708e-06, "loss": 0.0791006088256836, "step": 1878 }, { "epoch": 0.2618267957918205, "grad_norm": 0.959581732749939, "learning_rate": 8.998883140209582e-06, "loss": 0.11112594604492188, "step": 1879 }, { "epoch": 0.26196613948303493, "grad_norm": 0.5666657090187073, "learning_rate": 8.99746792417292e-06, "loss": 0.0843963623046875, "step": 1880 }, { "epoch": 0.2621054831742493, "grad_norm": 1.4835535287857056, "learning_rate": 8.996051819990148e-06, "loss": 0.10693931579589844, "step": 1881 }, { "epoch": 0.2622448268654637, "grad_norm": 1.5127451419830322, "learning_rate": 8.994634827975892e-06, "loss": 0.07558727264404297, "step": 1882 }, { "epoch": 0.26238417055667806, "grad_norm": 0.7722365856170654, "learning_rate": 8.993216948444978e-06, "loss": 0.08899259567260742, "step": 1883 }, { "epoch": 0.26252351424789244, "grad_norm": 0.6227282285690308, "learning_rate": 8.991798181712423e-06, "loss": 0.06479644775390625, "step": 1884 }, { "epoch": 0.2626628579391068, "grad_norm": 1.0089271068572998, "learning_rate": 8.99037852809345e-06, "loss": 0.07428836822509766, "step": 1885 }, { "epoch": 0.2628022016303212, "grad_norm": 0.6554375290870667, "learning_rate": 8.988957987903467e-06, "loss": 0.0838627815246582, "step": 1886 }, { "epoch": 0.2629415453215356, "grad_norm": 0.9858035445213318, "learning_rate": 8.987536561458088e-06, "loss": 0.08512020111083984, "step": 1887 }, { "epoch": 0.26308088901274995, "grad_norm": 0.8373844623565674, "learning_rate": 8.986114249073122e-06, "loss": 0.07900524139404297, "step": 1888 }, { "epoch": 0.26322023270396433, "grad_norm": 0.9890793561935425, "learning_rate": 8.984691051064576e-06, "loss": 0.07285213470458984, "step": 1889 }, { "epoch": 0.2633595763951787, "grad_norm": 1.1042349338531494, "learning_rate": 8.98326696774865e-06, "loss": 0.08740901947021484, "step": 1890 }, { "epoch": 0.2634989200863931, "grad_norm": 0.5926269888877869, "learning_rate": 8.981841999441743e-06, "loss": 0.06781959533691406, "step": 1891 }, { "epoch": 0.26363826377760746, "grad_norm": 0.712944746017456, "learning_rate": 8.980416146460452e-06, "loss": 0.0729818344116211, "step": 1892 }, { "epoch": 0.26377760746882184, "grad_norm": 0.8304662704467773, "learning_rate": 8.978989409121565e-06, "loss": 0.07844972610473633, "step": 1893 }, { "epoch": 0.2639169511600362, "grad_norm": 0.711720883846283, "learning_rate": 8.977561787742074e-06, "loss": 0.08012580871582031, "step": 1894 }, { "epoch": 0.2640562948512506, "grad_norm": 0.735150933265686, "learning_rate": 8.976133282639166e-06, "loss": 0.0751638412475586, "step": 1895 }, { "epoch": 0.264195638542465, "grad_norm": 1.271135687828064, "learning_rate": 8.974703894130218e-06, "loss": 0.09833908081054688, "step": 1896 }, { "epoch": 0.26433498223367935, "grad_norm": 0.7179635167121887, "learning_rate": 8.973273622532806e-06, "loss": 0.10897254943847656, "step": 1897 }, { "epoch": 0.2644743259248937, "grad_norm": 1.1391706466674805, "learning_rate": 8.97184246816471e-06, "loss": 0.10976839065551758, "step": 1898 }, { "epoch": 0.2646136696161081, "grad_norm": 1.7587957382202148, "learning_rate": 8.970410431343892e-06, "loss": 0.1234130859375, "step": 1899 }, { "epoch": 0.26475301330732254, "grad_norm": 0.7493435740470886, "learning_rate": 8.968977512388524e-06, "loss": 0.07396125793457031, "step": 1900 }, { "epoch": 0.2648923569985369, "grad_norm": 0.8240585327148438, "learning_rate": 8.967543711616968e-06, "loss": 0.11882972717285156, "step": 1901 }, { "epoch": 0.2650317006897513, "grad_norm": 1.155645728111267, "learning_rate": 8.966109029347777e-06, "loss": 0.1023101806640625, "step": 1902 }, { "epoch": 0.26517104438096567, "grad_norm": 0.8437185883522034, "learning_rate": 8.96467346589971e-06, "loss": 0.07821941375732422, "step": 1903 }, { "epoch": 0.26531038807218005, "grad_norm": 0.9237241744995117, "learning_rate": 8.963237021591714e-06, "loss": 0.08901691436767578, "step": 1904 }, { "epoch": 0.2654497317633944, "grad_norm": 0.746236264705658, "learning_rate": 8.961799696742933e-06, "loss": 0.06841278076171875, "step": 1905 }, { "epoch": 0.2655890754546088, "grad_norm": 0.9287621378898621, "learning_rate": 8.960361491672708e-06, "loss": 0.08880043029785156, "step": 1906 }, { "epoch": 0.2657284191458232, "grad_norm": 1.5094178915023804, "learning_rate": 8.958922406700578e-06, "loss": 0.11212825775146484, "step": 1907 }, { "epoch": 0.26586776283703756, "grad_norm": 1.003198504447937, "learning_rate": 8.957482442146271e-06, "loss": 0.09646987915039062, "step": 1908 }, { "epoch": 0.26600710652825194, "grad_norm": 0.7017812728881836, "learning_rate": 8.956041598329716e-06, "loss": 0.07152938842773438, "step": 1909 }, { "epoch": 0.2661464502194663, "grad_norm": 0.9073426723480225, "learning_rate": 8.954599875571039e-06, "loss": 0.10169410705566406, "step": 1910 }, { "epoch": 0.2662857939106807, "grad_norm": 1.4613763093948364, "learning_rate": 8.953157274190552e-06, "loss": 0.10907554626464844, "step": 1911 }, { "epoch": 0.26642513760189507, "grad_norm": 0.6498017907142639, "learning_rate": 8.951713794508771e-06, "loss": 0.07463645935058594, "step": 1912 }, { "epoch": 0.26656448129310945, "grad_norm": 0.5553117394447327, "learning_rate": 8.950269436846405e-06, "loss": 0.06511974334716797, "step": 1913 }, { "epoch": 0.2667038249843238, "grad_norm": 0.5700357556343079, "learning_rate": 8.948824201524355e-06, "loss": 0.07441139221191406, "step": 1914 }, { "epoch": 0.2668431686755382, "grad_norm": 1.9502449035644531, "learning_rate": 8.947378088863722e-06, "loss": 0.10321521759033203, "step": 1915 }, { "epoch": 0.2669825123667526, "grad_norm": 0.9645015597343445, "learning_rate": 8.945931099185798e-06, "loss": 0.0955801010131836, "step": 1916 }, { "epoch": 0.26712185605796696, "grad_norm": 1.2081489562988281, "learning_rate": 8.94448323281207e-06, "loss": 0.10311317443847656, "step": 1917 }, { "epoch": 0.26726119974918133, "grad_norm": 1.2094368934631348, "learning_rate": 8.943034490064222e-06, "loss": 0.07947540283203125, "step": 1918 }, { "epoch": 0.2674005434403957, "grad_norm": 0.5215896368026733, "learning_rate": 8.941584871264131e-06, "loss": 0.06679821014404297, "step": 1919 }, { "epoch": 0.26753988713161014, "grad_norm": 0.9012689590454102, "learning_rate": 8.940134376733869e-06, "loss": 0.08747196197509766, "step": 1920 }, { "epoch": 0.2676792308228245, "grad_norm": 0.8964726328849792, "learning_rate": 8.938683006795704e-06, "loss": 0.08469676971435547, "step": 1921 }, { "epoch": 0.2678185745140389, "grad_norm": 1.8107824325561523, "learning_rate": 8.937230761772098e-06, "loss": 0.1054830551147461, "step": 1922 }, { "epoch": 0.2679579182052533, "grad_norm": 1.4908901453018188, "learning_rate": 8.935777641985704e-06, "loss": 0.10370111465454102, "step": 1923 }, { "epoch": 0.26809726189646765, "grad_norm": 1.2533397674560547, "learning_rate": 8.934323647759373e-06, "loss": 0.10008430480957031, "step": 1924 }, { "epoch": 0.26823660558768203, "grad_norm": 0.7730728387832642, "learning_rate": 8.932868779416148e-06, "loss": 0.08400249481201172, "step": 1925 }, { "epoch": 0.2683759492788964, "grad_norm": 1.1243234872817993, "learning_rate": 8.931413037279271e-06, "loss": 0.09778022766113281, "step": 1926 }, { "epoch": 0.2685152929701108, "grad_norm": 1.4115355014801025, "learning_rate": 8.929956421672172e-06, "loss": 0.07506942749023438, "step": 1927 }, { "epoch": 0.26865463666132516, "grad_norm": 0.9335625171661377, "learning_rate": 8.92849893291848e-06, "loss": 0.08194684982299805, "step": 1928 }, { "epoch": 0.26879398035253954, "grad_norm": 0.6322800517082214, "learning_rate": 8.927040571342014e-06, "loss": 0.08020591735839844, "step": 1929 }, { "epoch": 0.2689333240437539, "grad_norm": 0.8666074275970459, "learning_rate": 8.92558133726679e-06, "loss": 0.08669376373291016, "step": 1930 }, { "epoch": 0.2690726677349683, "grad_norm": 0.8175033926963806, "learning_rate": 8.924121231017012e-06, "loss": 0.09830760955810547, "step": 1931 }, { "epoch": 0.2692120114261827, "grad_norm": 0.971854031085968, "learning_rate": 8.922660252917088e-06, "loss": 0.09667587280273438, "step": 1932 }, { "epoch": 0.26935135511739705, "grad_norm": 1.395241141319275, "learning_rate": 8.92119840329161e-06, "loss": 0.0833740234375, "step": 1933 }, { "epoch": 0.26949069880861143, "grad_norm": 0.8368918895721436, "learning_rate": 8.919735682465372e-06, "loss": 0.06121253967285156, "step": 1934 }, { "epoch": 0.2696300424998258, "grad_norm": 0.7911044955253601, "learning_rate": 8.918272090763352e-06, "loss": 0.0963449478149414, "step": 1935 }, { "epoch": 0.2697693861910402, "grad_norm": 0.8806444406509399, "learning_rate": 8.91680762851073e-06, "loss": 0.08098602294921875, "step": 1936 }, { "epoch": 0.26990872988225456, "grad_norm": 1.5873302221298218, "learning_rate": 8.915342296032874e-06, "loss": 0.11891937255859375, "step": 1937 }, { "epoch": 0.27004807357346894, "grad_norm": 1.0542356967926025, "learning_rate": 8.913876093655351e-06, "loss": 0.09415149688720703, "step": 1938 }, { "epoch": 0.2701874172646833, "grad_norm": 0.5372641086578369, "learning_rate": 8.912409021703914e-06, "loss": 0.07281112670898438, "step": 1939 }, { "epoch": 0.27032676095589775, "grad_norm": 0.9673345685005188, "learning_rate": 8.910941080504514e-06, "loss": 0.09323883056640625, "step": 1940 }, { "epoch": 0.2704661046471121, "grad_norm": 1.5458513498306274, "learning_rate": 8.909472270383293e-06, "loss": 0.11126232147216797, "step": 1941 }, { "epoch": 0.2706054483383265, "grad_norm": 0.6710543632507324, "learning_rate": 8.90800259166659e-06, "loss": 0.05685091018676758, "step": 1942 }, { "epoch": 0.2707447920295409, "grad_norm": 0.7092544436454773, "learning_rate": 8.906532044680933e-06, "loss": 0.10281753540039062, "step": 1943 }, { "epoch": 0.27088413572075526, "grad_norm": 1.542758822441101, "learning_rate": 8.905060629753041e-06, "loss": 0.09783554077148438, "step": 1944 }, { "epoch": 0.27102347941196964, "grad_norm": 0.9787654876708984, "learning_rate": 8.903588347209833e-06, "loss": 0.09380245208740234, "step": 1945 }, { "epoch": 0.271162823103184, "grad_norm": 0.6914438009262085, "learning_rate": 8.902115197378414e-06, "loss": 0.07847023010253906, "step": 1946 }, { "epoch": 0.2713021667943984, "grad_norm": 1.8657313585281372, "learning_rate": 8.900641180586086e-06, "loss": 0.10035324096679688, "step": 1947 }, { "epoch": 0.27144151048561277, "grad_norm": 0.9671610593795776, "learning_rate": 8.89916629716034e-06, "loss": 0.08743476867675781, "step": 1948 }, { "epoch": 0.27158085417682715, "grad_norm": 0.8431591391563416, "learning_rate": 8.897690547428861e-06, "loss": 0.08743000030517578, "step": 1949 }, { "epoch": 0.2717201978680415, "grad_norm": 0.7794982194900513, "learning_rate": 8.89621393171953e-06, "loss": 0.09055900573730469, "step": 1950 }, { "epoch": 0.2718595415592559, "grad_norm": 0.6581821441650391, "learning_rate": 8.894736450360415e-06, "loss": 0.07917976379394531, "step": 1951 }, { "epoch": 0.2719988852504703, "grad_norm": 1.187056541442871, "learning_rate": 8.893258103679779e-06, "loss": 0.12387847900390625, "step": 1952 }, { "epoch": 0.27213822894168466, "grad_norm": 2.0506067276000977, "learning_rate": 8.891778892006077e-06, "loss": 0.10088825225830078, "step": 1953 }, { "epoch": 0.27227757263289903, "grad_norm": 1.6562702655792236, "learning_rate": 8.890298815667956e-06, "loss": 0.09486198425292969, "step": 1954 }, { "epoch": 0.2724169163241134, "grad_norm": 1.5061537027359009, "learning_rate": 8.888817874994254e-06, "loss": 0.0809640884399414, "step": 1955 }, { "epoch": 0.2725562600153278, "grad_norm": 1.0307307243347168, "learning_rate": 8.887336070314005e-06, "loss": 0.07711601257324219, "step": 1956 }, { "epoch": 0.27269560370654217, "grad_norm": 0.829119086265564, "learning_rate": 8.88585340195643e-06, "loss": 0.0924072265625, "step": 1957 }, { "epoch": 0.27283494739775654, "grad_norm": 1.1860268115997314, "learning_rate": 8.884369870250945e-06, "loss": 0.09514141082763672, "step": 1958 }, { "epoch": 0.2729742910889709, "grad_norm": 0.9456478357315063, "learning_rate": 8.882885475527156e-06, "loss": 0.06373858451843262, "step": 1959 }, { "epoch": 0.27311363478018535, "grad_norm": 2.082304000854492, "learning_rate": 8.881400218114861e-06, "loss": 0.11374473571777344, "step": 1960 }, { "epoch": 0.27325297847139973, "grad_norm": 0.7287202477455139, "learning_rate": 8.879914098344053e-06, "loss": 0.07822036743164062, "step": 1961 }, { "epoch": 0.2733923221626141, "grad_norm": 1.2335907220840454, "learning_rate": 8.878427116544912e-06, "loss": 0.08290481567382812, "step": 1962 }, { "epoch": 0.2735316658538285, "grad_norm": 0.7864866852760315, "learning_rate": 8.876939273047813e-06, "loss": 0.08418464660644531, "step": 1963 }, { "epoch": 0.27367100954504286, "grad_norm": 1.0411803722381592, "learning_rate": 8.875450568183318e-06, "loss": 0.08042800426483154, "step": 1964 }, { "epoch": 0.27381035323625724, "grad_norm": 1.0757920742034912, "learning_rate": 8.873961002282185e-06, "loss": 0.08425331115722656, "step": 1965 }, { "epoch": 0.2739496969274716, "grad_norm": 0.8777008056640625, "learning_rate": 8.872470575675361e-06, "loss": 0.08690166473388672, "step": 1966 }, { "epoch": 0.274089040618686, "grad_norm": 0.9248372912406921, "learning_rate": 8.870979288693985e-06, "loss": 0.08208465576171875, "step": 1967 }, { "epoch": 0.2742283843099004, "grad_norm": 0.7667773365974426, "learning_rate": 8.86948714166939e-06, "loss": 0.07760429382324219, "step": 1968 }, { "epoch": 0.27436772800111475, "grad_norm": 0.8875576257705688, "learning_rate": 8.86799413493309e-06, "loss": 0.09407424926757812, "step": 1969 }, { "epoch": 0.27450707169232913, "grad_norm": 1.1820203065872192, "learning_rate": 8.866500268816803e-06, "loss": 0.08263492584228516, "step": 1970 }, { "epoch": 0.2746464153835435, "grad_norm": 0.7154573202133179, "learning_rate": 8.865005543652428e-06, "loss": 0.07873725891113281, "step": 1971 }, { "epoch": 0.2747857590747579, "grad_norm": 0.7812300324440002, "learning_rate": 8.863509959772064e-06, "loss": 0.07185840606689453, "step": 1972 }, { "epoch": 0.27492510276597226, "grad_norm": 1.1703327894210815, "learning_rate": 8.86201351750799e-06, "loss": 0.10485553741455078, "step": 1973 }, { "epoch": 0.27506444645718664, "grad_norm": 1.082531213760376, "learning_rate": 8.860516217192683e-06, "loss": 0.07223892211914062, "step": 1974 }, { "epoch": 0.275203790148401, "grad_norm": 0.8837197422981262, "learning_rate": 8.85901805915881e-06, "loss": 0.07831001281738281, "step": 1975 }, { "epoch": 0.2753431338396154, "grad_norm": 0.7603839635848999, "learning_rate": 8.85751904373923e-06, "loss": 0.07933282852172852, "step": 1976 }, { "epoch": 0.2754824775308298, "grad_norm": 0.8993011116981506, "learning_rate": 8.856019171266984e-06, "loss": 0.07560110092163086, "step": 1977 }, { "epoch": 0.27562182122204415, "grad_norm": 0.8624470829963684, "learning_rate": 8.854518442075313e-06, "loss": 0.08507728576660156, "step": 1978 }, { "epoch": 0.2757611649132585, "grad_norm": 1.0193110704421997, "learning_rate": 8.853016856497646e-06, "loss": 0.09072589874267578, "step": 1979 }, { "epoch": 0.2759005086044729, "grad_norm": 0.7214924693107605, "learning_rate": 8.8515144148676e-06, "loss": 0.08919811248779297, "step": 1980 }, { "epoch": 0.27603985229568734, "grad_norm": 1.0436816215515137, "learning_rate": 8.85001111751898e-06, "loss": 0.11246871948242188, "step": 1981 }, { "epoch": 0.2761791959869017, "grad_norm": 0.9097021818161011, "learning_rate": 8.848506964785789e-06, "loss": 0.07997608184814453, "step": 1982 }, { "epoch": 0.2763185396781161, "grad_norm": 1.0434629917144775, "learning_rate": 8.847001957002211e-06, "loss": 0.10440826416015625, "step": 1983 }, { "epoch": 0.27645788336933047, "grad_norm": 0.9401519298553467, "learning_rate": 8.845496094502628e-06, "loss": 0.10051155090332031, "step": 1984 }, { "epoch": 0.27659722706054485, "grad_norm": 0.6019932627677917, "learning_rate": 8.843989377621606e-06, "loss": 0.07954025268554688, "step": 1985 }, { "epoch": 0.2767365707517592, "grad_norm": 1.2223200798034668, "learning_rate": 8.842481806693906e-06, "loss": 0.08245849609375, "step": 1986 }, { "epoch": 0.2768759144429736, "grad_norm": 0.5927287340164185, "learning_rate": 8.840973382054472e-06, "loss": 0.07358455657958984, "step": 1987 }, { "epoch": 0.277015258134188, "grad_norm": 0.6523659229278564, "learning_rate": 8.839464104038445e-06, "loss": 0.08431148529052734, "step": 1988 }, { "epoch": 0.27715460182540236, "grad_norm": 1.046651840209961, "learning_rate": 8.83795397298115e-06, "loss": 0.08405184745788574, "step": 1989 }, { "epoch": 0.27729394551661674, "grad_norm": 0.43950361013412476, "learning_rate": 8.836442989218104e-06, "loss": 0.06280136108398438, "step": 1990 }, { "epoch": 0.2774332892078311, "grad_norm": 0.9880761504173279, "learning_rate": 8.834931153085014e-06, "loss": 0.09691715240478516, "step": 1991 }, { "epoch": 0.2775726328990455, "grad_norm": 0.5493127107620239, "learning_rate": 8.833418464917774e-06, "loss": 0.06577694416046143, "step": 1992 }, { "epoch": 0.27771197659025987, "grad_norm": 0.9212610721588135, "learning_rate": 8.831904925052468e-06, "loss": 0.11365318298339844, "step": 1993 }, { "epoch": 0.27785132028147425, "grad_norm": 1.1520116329193115, "learning_rate": 8.830390533825373e-06, "loss": 0.0783090591430664, "step": 1994 }, { "epoch": 0.2779906639726886, "grad_norm": 1.3671780824661255, "learning_rate": 8.828875291572951e-06, "loss": 0.09346961975097656, "step": 1995 }, { "epoch": 0.278130007663903, "grad_norm": 0.6482266187667847, "learning_rate": 8.827359198631854e-06, "loss": 0.07388877868652344, "step": 1996 }, { "epoch": 0.2782693513551174, "grad_norm": 1.5922425985336304, "learning_rate": 8.825842255338923e-06, "loss": 0.08919906616210938, "step": 1997 }, { "epoch": 0.27840869504633176, "grad_norm": 0.864870548248291, "learning_rate": 8.824324462031189e-06, "loss": 0.09281730651855469, "step": 1998 }, { "epoch": 0.27854803873754613, "grad_norm": 1.2560006380081177, "learning_rate": 8.822805819045869e-06, "loss": 0.14981460571289062, "step": 1999 }, { "epoch": 0.2786873824287605, "grad_norm": 0.6079005002975464, "learning_rate": 8.821286326720372e-06, "loss": 0.08889293670654297, "step": 2000 }, { "epoch": 0.27882672611997494, "grad_norm": 0.8998680710792542, "learning_rate": 8.819765985392297e-06, "loss": 0.08246994018554688, "step": 2001 }, { "epoch": 0.2789660698111893, "grad_norm": 0.5348104238510132, "learning_rate": 8.818244795399425e-06, "loss": 0.08488750457763672, "step": 2002 }, { "epoch": 0.2791054135024037, "grad_norm": 0.7081894874572754, "learning_rate": 8.81672275707973e-06, "loss": 0.10284996032714844, "step": 2003 }, { "epoch": 0.2792447571936181, "grad_norm": 0.7892764806747437, "learning_rate": 8.815199870771378e-06, "loss": 0.08935260772705078, "step": 2004 }, { "epoch": 0.27938410088483245, "grad_norm": 0.5394920110702515, "learning_rate": 8.813676136812717e-06, "loss": 0.06775236129760742, "step": 2005 }, { "epoch": 0.27952344457604683, "grad_norm": 1.5949044227600098, "learning_rate": 8.812151555542286e-06, "loss": 0.09868717193603516, "step": 2006 }, { "epoch": 0.2796627882672612, "grad_norm": 1.4620091915130615, "learning_rate": 8.81062612729881e-06, "loss": 0.08924579620361328, "step": 2007 }, { "epoch": 0.2798021319584756, "grad_norm": 0.8795568943023682, "learning_rate": 8.80909985242121e-06, "loss": 0.08490848541259766, "step": 2008 }, { "epoch": 0.27994147564968996, "grad_norm": 0.6895430684089661, "learning_rate": 8.807572731248583e-06, "loss": 0.06254196166992188, "step": 2009 }, { "epoch": 0.28008081934090434, "grad_norm": 2.03169584274292, "learning_rate": 8.806044764120226e-06, "loss": 0.08522176742553711, "step": 2010 }, { "epoch": 0.2802201630321187, "grad_norm": 1.3908004760742188, "learning_rate": 8.804515951375615e-06, "loss": 0.12482643127441406, "step": 2011 }, { "epoch": 0.2803595067233331, "grad_norm": 0.8777686357498169, "learning_rate": 8.802986293354418e-06, "loss": 0.09493637084960938, "step": 2012 }, { "epoch": 0.2804988504145475, "grad_norm": 0.9445438385009766, "learning_rate": 8.80145579039649e-06, "loss": 0.07470989227294922, "step": 2013 }, { "epoch": 0.28063819410576185, "grad_norm": 0.691261887550354, "learning_rate": 8.799924442841873e-06, "loss": 0.06782817840576172, "step": 2014 }, { "epoch": 0.28077753779697623, "grad_norm": 0.7661568522453308, "learning_rate": 8.798392251030801e-06, "loss": 0.07251501083374023, "step": 2015 }, { "epoch": 0.2809168814881906, "grad_norm": 1.5974255800247192, "learning_rate": 8.796859215303688e-06, "loss": 0.07700109481811523, "step": 2016 }, { "epoch": 0.281056225179405, "grad_norm": 1.8504077196121216, "learning_rate": 8.795325336001143e-06, "loss": 0.08443641662597656, "step": 2017 }, { "epoch": 0.28119556887061936, "grad_norm": 1.6375428438186646, "learning_rate": 8.793790613463956e-06, "loss": 0.09210968017578125, "step": 2018 }, { "epoch": 0.28133491256183374, "grad_norm": 2.288983106613159, "learning_rate": 8.792255048033106e-06, "loss": 0.11819839477539062, "step": 2019 }, { "epoch": 0.2814742562530481, "grad_norm": 1.2053519487380981, "learning_rate": 8.790718640049767e-06, "loss": 0.08749771118164062, "step": 2020 }, { "epoch": 0.28161359994426255, "grad_norm": 1.0230112075805664, "learning_rate": 8.789181389855288e-06, "loss": 0.10604286193847656, "step": 2021 }, { "epoch": 0.2817529436354769, "grad_norm": 1.229174017906189, "learning_rate": 8.787643297791214e-06, "loss": 0.1434803009033203, "step": 2022 }, { "epoch": 0.2818922873266913, "grad_norm": 1.1008126735687256, "learning_rate": 8.78610436419927e-06, "loss": 0.07787418365478516, "step": 2023 }, { "epoch": 0.2820316310179057, "grad_norm": 2.9900169372558594, "learning_rate": 8.784564589421373e-06, "loss": 0.09169769287109375, "step": 2024 }, { "epoch": 0.28217097470912006, "grad_norm": 1.5972179174423218, "learning_rate": 8.783023973799632e-06, "loss": 0.0986175537109375, "step": 2025 }, { "epoch": 0.28231031840033444, "grad_norm": 2.124523162841797, "learning_rate": 8.78148251767633e-06, "loss": 0.07121753692626953, "step": 2026 }, { "epoch": 0.2824496620915488, "grad_norm": 0.8061181306838989, "learning_rate": 8.779940221393946e-06, "loss": 0.09157133102416992, "step": 2027 }, { "epoch": 0.2825890057827632, "grad_norm": 1.5349068641662598, "learning_rate": 8.778397085295141e-06, "loss": 0.10101890563964844, "step": 2028 }, { "epoch": 0.28272834947397757, "grad_norm": 0.740436315536499, "learning_rate": 8.776853109722765e-06, "loss": 0.08171224594116211, "step": 2029 }, { "epoch": 0.28286769316519195, "grad_norm": 0.5558489561080933, "learning_rate": 8.775308295019857e-06, "loss": 0.07909584045410156, "step": 2030 }, { "epoch": 0.2830070368564063, "grad_norm": 1.7052137851715088, "learning_rate": 8.773762641529637e-06, "loss": 0.11815834045410156, "step": 2031 }, { "epoch": 0.2831463805476207, "grad_norm": 1.4187288284301758, "learning_rate": 8.772216149595515e-06, "loss": 0.09934520721435547, "step": 2032 }, { "epoch": 0.2832857242388351, "grad_norm": 1.0558619499206543, "learning_rate": 8.770668819561085e-06, "loss": 0.07709312438964844, "step": 2033 }, { "epoch": 0.28342506793004946, "grad_norm": 0.9842582941055298, "learning_rate": 8.769120651770128e-06, "loss": 0.0854635238647461, "step": 2034 }, { "epoch": 0.28356441162126383, "grad_norm": 0.7488988637924194, "learning_rate": 8.767571646566615e-06, "loss": 0.0883941650390625, "step": 2035 }, { "epoch": 0.2837037553124782, "grad_norm": 0.8447861075401306, "learning_rate": 8.766021804294697e-06, "loss": 0.07397079467773438, "step": 2036 }, { "epoch": 0.2838430990036926, "grad_norm": 1.438050389289856, "learning_rate": 8.764471125298712e-06, "loss": 0.07922935485839844, "step": 2037 }, { "epoch": 0.28398244269490697, "grad_norm": 1.776219367980957, "learning_rate": 8.76291960992319e-06, "loss": 0.09369039535522461, "step": 2038 }, { "epoch": 0.28412178638612134, "grad_norm": 0.6577638387680054, "learning_rate": 8.761367258512838e-06, "loss": 0.08309745788574219, "step": 2039 }, { "epoch": 0.2842611300773357, "grad_norm": 0.777477502822876, "learning_rate": 8.759814071412554e-06, "loss": 0.11754608154296875, "step": 2040 }, { "epoch": 0.28440047376855015, "grad_norm": 0.9376115202903748, "learning_rate": 8.758260048967421e-06, "loss": 0.0841827392578125, "step": 2041 }, { "epoch": 0.28453981745976453, "grad_norm": 0.9424386024475098, "learning_rate": 8.75670519152271e-06, "loss": 0.06678962707519531, "step": 2042 }, { "epoch": 0.2846791611509789, "grad_norm": 0.8986284732818604, "learning_rate": 8.755149499423871e-06, "loss": 0.06999397277832031, "step": 2043 }, { "epoch": 0.2848185048421933, "grad_norm": 0.7017866969108582, "learning_rate": 8.753592973016545e-06, "loss": 0.06967449188232422, "step": 2044 }, { "epoch": 0.28495784853340766, "grad_norm": 2.360574722290039, "learning_rate": 8.752035612646557e-06, "loss": 0.1204366683959961, "step": 2045 }, { "epoch": 0.28509719222462204, "grad_norm": 1.4621301889419556, "learning_rate": 8.750477418659914e-06, "loss": 0.09738636016845703, "step": 2046 }, { "epoch": 0.2852365359158364, "grad_norm": 2.077066421508789, "learning_rate": 8.748918391402816e-06, "loss": 0.08427238464355469, "step": 2047 }, { "epoch": 0.2853758796070508, "grad_norm": 0.6387795805931091, "learning_rate": 8.74735853122164e-06, "loss": 0.06015205383300781, "step": 2048 }, { "epoch": 0.2855152232982652, "grad_norm": 0.8982558250427246, "learning_rate": 8.745797838462951e-06, "loss": 0.09414529800415039, "step": 2049 }, { "epoch": 0.28565456698947955, "grad_norm": 0.7574140429496765, "learning_rate": 8.7442363134735e-06, "loss": 0.09012794494628906, "step": 2050 }, { "epoch": 0.28579391068069393, "grad_norm": 1.1924594640731812, "learning_rate": 8.742673956600225e-06, "loss": 0.09916305541992188, "step": 2051 }, { "epoch": 0.2859332543719083, "grad_norm": 0.9646893739700317, "learning_rate": 8.741110768190242e-06, "loss": 0.08440351486206055, "step": 2052 }, { "epoch": 0.2860725980631227, "grad_norm": 0.7222622632980347, "learning_rate": 8.739546748590857e-06, "loss": 0.065093994140625, "step": 2053 }, { "epoch": 0.28621194175433706, "grad_norm": 0.4520653188228607, "learning_rate": 8.73798189814956e-06, "loss": 0.06419181823730469, "step": 2054 }, { "epoch": 0.28635128544555144, "grad_norm": 0.8775712847709656, "learning_rate": 8.736416217214026e-06, "loss": 0.09205436706542969, "step": 2055 }, { "epoch": 0.2864906291367658, "grad_norm": 0.9574913382530212, "learning_rate": 8.734849706132112e-06, "loss": 0.08139324188232422, "step": 2056 }, { "epoch": 0.2866299728279802, "grad_norm": 1.107048511505127, "learning_rate": 8.733282365251858e-06, "loss": 0.08784294128417969, "step": 2057 }, { "epoch": 0.2867693165191946, "grad_norm": 0.7797505259513855, "learning_rate": 8.731714194921498e-06, "loss": 0.0740652084350586, "step": 2058 }, { "epoch": 0.28690866021040895, "grad_norm": 1.2246898412704468, "learning_rate": 8.73014519548944e-06, "loss": 0.10801410675048828, "step": 2059 }, { "epoch": 0.2870480039016233, "grad_norm": 1.6978572607040405, "learning_rate": 8.72857536730428e-06, "loss": 0.1084756851196289, "step": 2060 }, { "epoch": 0.28718734759283776, "grad_norm": 1.1246411800384521, "learning_rate": 8.7270047107148e-06, "loss": 0.09037542343139648, "step": 2061 }, { "epoch": 0.28732669128405214, "grad_norm": 1.8195666074752808, "learning_rate": 8.72543322606996e-06, "loss": 0.10853195190429688, "step": 2062 }, { "epoch": 0.2874660349752665, "grad_norm": 0.7569425106048584, "learning_rate": 8.72386091371891e-06, "loss": 0.084716796875, "step": 2063 }, { "epoch": 0.2876053786664809, "grad_norm": 0.7829339504241943, "learning_rate": 8.722287774010983e-06, "loss": 0.08115053176879883, "step": 2064 }, { "epoch": 0.28774472235769527, "grad_norm": 0.7751170992851257, "learning_rate": 8.720713807295692e-06, "loss": 0.0970296859741211, "step": 2065 }, { "epoch": 0.28788406604890965, "grad_norm": 0.526141881942749, "learning_rate": 8.71913901392274e-06, "loss": 0.08362293243408203, "step": 2066 }, { "epoch": 0.288023409740124, "grad_norm": 1.1417443752288818, "learning_rate": 8.71756339424201e-06, "loss": 0.10819625854492188, "step": 2067 }, { "epoch": 0.2881627534313384, "grad_norm": 1.0547823905944824, "learning_rate": 8.715986948603566e-06, "loss": 0.0901336669921875, "step": 2068 }, { "epoch": 0.2883020971225528, "grad_norm": 0.9163026809692383, "learning_rate": 8.71440967735766e-06, "loss": 0.10595989227294922, "step": 2069 }, { "epoch": 0.28844144081376716, "grad_norm": 1.305404782295227, "learning_rate": 8.712831580854724e-06, "loss": 0.08587074279785156, "step": 2070 }, { "epoch": 0.28858078450498154, "grad_norm": 1.1870008707046509, "learning_rate": 8.711252659445378e-06, "loss": 0.0955801010131836, "step": 2071 }, { "epoch": 0.2887201281961959, "grad_norm": 0.9621111750602722, "learning_rate": 8.709672913480418e-06, "loss": 0.09336662292480469, "step": 2072 }, { "epoch": 0.2888594718874103, "grad_norm": 0.7290777564048767, "learning_rate": 8.70809234331083e-06, "loss": 0.09493255615234375, "step": 2073 }, { "epoch": 0.28899881557862467, "grad_norm": 1.1854972839355469, "learning_rate": 8.706510949287782e-06, "loss": 0.11667156219482422, "step": 2074 }, { "epoch": 0.28913815926983905, "grad_norm": 0.652292788028717, "learning_rate": 8.70492873176262e-06, "loss": 0.07904577255249023, "step": 2075 }, { "epoch": 0.2892775029610534, "grad_norm": 0.8177866339683533, "learning_rate": 8.703345691086882e-06, "loss": 0.0792093276977539, "step": 2076 }, { "epoch": 0.2894168466522678, "grad_norm": 0.6670788526535034, "learning_rate": 8.701761827612278e-06, "loss": 0.08463859558105469, "step": 2077 }, { "epoch": 0.2895561903434822, "grad_norm": 1.3907780647277832, "learning_rate": 8.700177141690708e-06, "loss": 0.1142282485961914, "step": 2078 }, { "epoch": 0.28969553403469656, "grad_norm": 1.0824675559997559, "learning_rate": 8.698591633674256e-06, "loss": 0.0889277458190918, "step": 2079 }, { "epoch": 0.28983487772591093, "grad_norm": 1.4093124866485596, "learning_rate": 8.697005303915183e-06, "loss": 0.09013557434082031, "step": 2080 }, { "epoch": 0.28997422141712537, "grad_norm": 2.008183717727661, "learning_rate": 8.695418152765933e-06, "loss": 0.08732223510742188, "step": 2081 }, { "epoch": 0.29011356510833974, "grad_norm": 0.6405354142189026, "learning_rate": 8.693830180579139e-06, "loss": 0.08936691284179688, "step": 2082 }, { "epoch": 0.2902529087995541, "grad_norm": 0.9124606251716614, "learning_rate": 8.69224138770761e-06, "loss": 0.08582878112792969, "step": 2083 }, { "epoch": 0.2903922524907685, "grad_norm": 0.7798373699188232, "learning_rate": 8.69065177450434e-06, "loss": 0.10259246826171875, "step": 2084 }, { "epoch": 0.2905315961819829, "grad_norm": 0.7154884934425354, "learning_rate": 8.689061341322505e-06, "loss": 0.07267284393310547, "step": 2085 }, { "epoch": 0.29067093987319725, "grad_norm": 0.8316805362701416, "learning_rate": 8.687470088515464e-06, "loss": 0.07840824127197266, "step": 2086 }, { "epoch": 0.29081028356441163, "grad_norm": 1.569994568824768, "learning_rate": 8.685878016436753e-06, "loss": 0.09800529479980469, "step": 2087 }, { "epoch": 0.290949627255626, "grad_norm": 0.6091328263282776, "learning_rate": 8.684285125440099e-06, "loss": 0.08677530288696289, "step": 2088 }, { "epoch": 0.2910889709468404, "grad_norm": 0.4927259385585785, "learning_rate": 8.682691415879402e-06, "loss": 0.0617828369140625, "step": 2089 }, { "epoch": 0.29122831463805476, "grad_norm": 0.8006260991096497, "learning_rate": 8.681096888108751e-06, "loss": 0.08755159378051758, "step": 2090 }, { "epoch": 0.29136765832926914, "grad_norm": 0.6847650408744812, "learning_rate": 8.679501542482412e-06, "loss": 0.0951833724975586, "step": 2091 }, { "epoch": 0.2915070020204835, "grad_norm": 0.6976614594459534, "learning_rate": 8.677905379354834e-06, "loss": 0.09594988822937012, "step": 2092 }, { "epoch": 0.2916463457116979, "grad_norm": 1.1610217094421387, "learning_rate": 8.67630839908065e-06, "loss": 0.1112661361694336, "step": 2093 }, { "epoch": 0.2917856894029123, "grad_norm": 1.2573426961898804, "learning_rate": 8.674710602014672e-06, "loss": 0.1058492660522461, "step": 2094 }, { "epoch": 0.29192503309412665, "grad_norm": 1.0053138732910156, "learning_rate": 8.673111988511892e-06, "loss": 0.09017276763916016, "step": 2095 }, { "epoch": 0.29206437678534103, "grad_norm": 0.6061331629753113, "learning_rate": 8.671512558927483e-06, "loss": 0.077850341796875, "step": 2096 }, { "epoch": 0.2922037204765554, "grad_norm": 1.2217581272125244, "learning_rate": 8.669912313616811e-06, "loss": 0.10404586791992188, "step": 2097 }, { "epoch": 0.2923430641677698, "grad_norm": 0.7568792700767517, "learning_rate": 8.668311252935407e-06, "loss": 0.07505416870117188, "step": 2098 }, { "epoch": 0.29248240785898416, "grad_norm": 0.629003643989563, "learning_rate": 8.66670937723899e-06, "loss": 0.076141357421875, "step": 2099 }, { "epoch": 0.29262175155019854, "grad_norm": 0.6341797113418579, "learning_rate": 8.665106686883461e-06, "loss": 0.09064292907714844, "step": 2100 }, { "epoch": 0.29276109524141297, "grad_norm": 0.5981340408325195, "learning_rate": 8.663503182224906e-06, "loss": 0.0750570297241211, "step": 2101 }, { "epoch": 0.29290043893262735, "grad_norm": 0.5973974466323853, "learning_rate": 8.66189886361958e-06, "loss": 0.08006906509399414, "step": 2102 }, { "epoch": 0.2930397826238417, "grad_norm": 0.7184560298919678, "learning_rate": 8.660293731423929e-06, "loss": 0.12359809875488281, "step": 2103 }, { "epoch": 0.2931791263150561, "grad_norm": 0.6656991243362427, "learning_rate": 8.658687785994579e-06, "loss": 0.087371826171875, "step": 2104 }, { "epoch": 0.2933184700062705, "grad_norm": 0.8175217509269714, "learning_rate": 8.657081027688332e-06, "loss": 0.10372447967529297, "step": 2105 }, { "epoch": 0.29345781369748486, "grad_norm": 0.6420020461082458, "learning_rate": 8.655473456862172e-06, "loss": 0.07936763763427734, "step": 2106 }, { "epoch": 0.29359715738869924, "grad_norm": 1.0575510263442993, "learning_rate": 8.653865073873265e-06, "loss": 0.09548377990722656, "step": 2107 }, { "epoch": 0.2937365010799136, "grad_norm": 0.6056646704673767, "learning_rate": 8.652255879078959e-06, "loss": 0.07453632354736328, "step": 2108 }, { "epoch": 0.293875844771128, "grad_norm": 0.8483715057373047, "learning_rate": 8.650645872836779e-06, "loss": 0.07739734649658203, "step": 2109 }, { "epoch": 0.29401518846234237, "grad_norm": 0.8951723575592041, "learning_rate": 8.649035055504431e-06, "loss": 0.08705711364746094, "step": 2110 }, { "epoch": 0.29415453215355675, "grad_norm": 0.8994393944740295, "learning_rate": 8.647423427439804e-06, "loss": 0.07184886932373047, "step": 2111 }, { "epoch": 0.2942938758447711, "grad_norm": 0.6455232501029968, "learning_rate": 8.645810989000962e-06, "loss": 0.0737462043762207, "step": 2112 }, { "epoch": 0.2944332195359855, "grad_norm": 0.8188624978065491, "learning_rate": 8.644197740546153e-06, "loss": 0.08896160125732422, "step": 2113 }, { "epoch": 0.2945725632271999, "grad_norm": 0.5473856925964355, "learning_rate": 8.642583682433808e-06, "loss": 0.08001899719238281, "step": 2114 }, { "epoch": 0.29471190691841426, "grad_norm": 0.8595117926597595, "learning_rate": 8.640968815022529e-06, "loss": 0.08186721801757812, "step": 2115 }, { "epoch": 0.29485125060962863, "grad_norm": 1.3678762912750244, "learning_rate": 8.6393531386711e-06, "loss": 0.09826850891113281, "step": 2116 }, { "epoch": 0.294990594300843, "grad_norm": 0.8948662877082825, "learning_rate": 8.637736653738496e-06, "loss": 0.09350013732910156, "step": 2117 }, { "epoch": 0.2951299379920574, "grad_norm": 0.9334714412689209, "learning_rate": 8.636119360583857e-06, "loss": 0.09440231323242188, "step": 2118 }, { "epoch": 0.29526928168327177, "grad_norm": 0.5694173574447632, "learning_rate": 8.63450125956651e-06, "loss": 0.08408594131469727, "step": 2119 }, { "epoch": 0.29540862537448614, "grad_norm": 0.8462336659431458, "learning_rate": 8.63288235104596e-06, "loss": 0.07866573333740234, "step": 2120 }, { "epoch": 0.2955479690657006, "grad_norm": 0.6869650483131409, "learning_rate": 8.631262635381892e-06, "loss": 0.07947444915771484, "step": 2121 }, { "epoch": 0.29568731275691496, "grad_norm": 1.4070547819137573, "learning_rate": 8.629642112934169e-06, "loss": 0.08382225036621094, "step": 2122 }, { "epoch": 0.29582665644812933, "grad_norm": 0.8953807950019836, "learning_rate": 8.628020784062837e-06, "loss": 0.06452751159667969, "step": 2123 }, { "epoch": 0.2959660001393437, "grad_norm": 1.6418455839157104, "learning_rate": 8.626398649128113e-06, "loss": 0.08953475952148438, "step": 2124 }, { "epoch": 0.2961053438305581, "grad_norm": 0.5641131401062012, "learning_rate": 8.624775708490403e-06, "loss": 0.0678257942199707, "step": 2125 }, { "epoch": 0.29624468752177247, "grad_norm": 0.7644439339637756, "learning_rate": 8.623151962510284e-06, "loss": 0.08138751983642578, "step": 2126 }, { "epoch": 0.29638403121298684, "grad_norm": 1.0807390213012695, "learning_rate": 8.621527411548517e-06, "loss": 0.09132671356201172, "step": 2127 }, { "epoch": 0.2965233749042012, "grad_norm": 1.9383904933929443, "learning_rate": 8.619902055966043e-06, "loss": 0.12588882446289062, "step": 2128 }, { "epoch": 0.2966627185954156, "grad_norm": 0.9463763236999512, "learning_rate": 8.618275896123973e-06, "loss": 0.10366630554199219, "step": 2129 }, { "epoch": 0.29680206228663, "grad_norm": 0.8296477794647217, "learning_rate": 8.616648932383607e-06, "loss": 0.08958244323730469, "step": 2130 }, { "epoch": 0.29694140597784435, "grad_norm": 0.6432927250862122, "learning_rate": 8.615021165106415e-06, "loss": 0.09516620635986328, "step": 2131 }, { "epoch": 0.29708074966905873, "grad_norm": 1.0939606428146362, "learning_rate": 8.613392594654056e-06, "loss": 0.0977020263671875, "step": 2132 }, { "epoch": 0.2972200933602731, "grad_norm": 0.903468906879425, "learning_rate": 8.611763221388356e-06, "loss": 0.07220458984375, "step": 2133 }, { "epoch": 0.2973594370514875, "grad_norm": 0.7725651860237122, "learning_rate": 8.610133045671325e-06, "loss": 0.08638191223144531, "step": 2134 }, { "epoch": 0.29749878074270186, "grad_norm": 0.9396308064460754, "learning_rate": 8.608502067865155e-06, "loss": 0.08393287658691406, "step": 2135 }, { "epoch": 0.29763812443391624, "grad_norm": 0.8822774887084961, "learning_rate": 8.606870288332206e-06, "loss": 0.0779876708984375, "step": 2136 }, { "epoch": 0.2977774681251306, "grad_norm": 1.7653034925460815, "learning_rate": 8.605237707435028e-06, "loss": 0.10386848449707031, "step": 2137 }, { "epoch": 0.297916811816345, "grad_norm": 0.6704120635986328, "learning_rate": 8.603604325536338e-06, "loss": 0.06981372833251953, "step": 2138 }, { "epoch": 0.2980561555075594, "grad_norm": 1.6614582538604736, "learning_rate": 8.60197014299904e-06, "loss": 0.07895946502685547, "step": 2139 }, { "epoch": 0.29819549919877375, "grad_norm": 1.030929684638977, "learning_rate": 8.600335160186208e-06, "loss": 0.10648870468139648, "step": 2140 }, { "epoch": 0.2983348428899882, "grad_norm": 0.5384858846664429, "learning_rate": 8.598699377461104e-06, "loss": 0.07451820373535156, "step": 2141 }, { "epoch": 0.29847418658120256, "grad_norm": 0.7924405336380005, "learning_rate": 8.597062795187157e-06, "loss": 0.06044292449951172, "step": 2142 }, { "epoch": 0.29861353027241694, "grad_norm": 0.942072331905365, "learning_rate": 8.595425413727979e-06, "loss": 0.09102630615234375, "step": 2143 }, { "epoch": 0.2987528739636313, "grad_norm": 0.9462290406227112, "learning_rate": 8.593787233447357e-06, "loss": 0.09266090393066406, "step": 2144 }, { "epoch": 0.2988922176548457, "grad_norm": 0.9915546774864197, "learning_rate": 8.592148254709262e-06, "loss": 0.06447792053222656, "step": 2145 }, { "epoch": 0.29903156134606007, "grad_norm": 0.7130007743835449, "learning_rate": 8.590508477877834e-06, "loss": 0.08551597595214844, "step": 2146 }, { "epoch": 0.29917090503727445, "grad_norm": 1.5043166875839233, "learning_rate": 8.588867903317395e-06, "loss": 0.11897087097167969, "step": 2147 }, { "epoch": 0.2993102487284888, "grad_norm": 1.6133610010147095, "learning_rate": 8.587226531392443e-06, "loss": 0.11695289611816406, "step": 2148 }, { "epoch": 0.2994495924197032, "grad_norm": 1.5567411184310913, "learning_rate": 8.585584362467652e-06, "loss": 0.11531352996826172, "step": 2149 }, { "epoch": 0.2995889361109176, "grad_norm": 0.8552863001823425, "learning_rate": 8.583941396907877e-06, "loss": 0.0735464096069336, "step": 2150 }, { "epoch": 0.29972827980213196, "grad_norm": 0.5807859897613525, "learning_rate": 8.582297635078149e-06, "loss": 0.07973766326904297, "step": 2151 }, { "epoch": 0.29986762349334634, "grad_norm": 0.6837108731269836, "learning_rate": 8.58065307734367e-06, "loss": 0.08455562591552734, "step": 2152 }, { "epoch": 0.3000069671845607, "grad_norm": 0.8977828025817871, "learning_rate": 8.579007724069823e-06, "loss": 0.10951042175292969, "step": 2153 }, { "epoch": 0.3001463108757751, "grad_norm": 0.4293058514595032, "learning_rate": 8.577361575622171e-06, "loss": 0.0708169937133789, "step": 2154 }, { "epoch": 0.30028565456698947, "grad_norm": 0.9975361227989197, "learning_rate": 8.575714632366451e-06, "loss": 0.08041095733642578, "step": 2155 }, { "epoch": 0.30042499825820385, "grad_norm": 1.263604998588562, "learning_rate": 8.574066894668573e-06, "loss": 0.07907962799072266, "step": 2156 }, { "epoch": 0.3005643419494182, "grad_norm": 1.5974538326263428, "learning_rate": 8.57241836289463e-06, "loss": 0.11662101745605469, "step": 2157 }, { "epoch": 0.3007036856406326, "grad_norm": 1.1293810606002808, "learning_rate": 8.570769037410885e-06, "loss": 0.08738994598388672, "step": 2158 }, { "epoch": 0.300843029331847, "grad_norm": 1.6091716289520264, "learning_rate": 8.56911891858378e-06, "loss": 0.1271343231201172, "step": 2159 }, { "epoch": 0.30098237302306136, "grad_norm": 1.9789330959320068, "learning_rate": 8.56746800677994e-06, "loss": 0.1245279312133789, "step": 2160 }, { "epoch": 0.3011217167142758, "grad_norm": 0.8777437210083008, "learning_rate": 8.565816302366151e-06, "loss": 0.06792736053466797, "step": 2161 }, { "epoch": 0.30126106040549017, "grad_norm": 1.3675481081008911, "learning_rate": 8.564163805709393e-06, "loss": 0.0852670669555664, "step": 2162 }, { "epoch": 0.30140040409670454, "grad_norm": 0.9543792605400085, "learning_rate": 8.562510517176807e-06, "loss": 0.0736856460571289, "step": 2163 }, { "epoch": 0.3015397477879189, "grad_norm": 1.705502986907959, "learning_rate": 8.560856437135716e-06, "loss": 0.12122535705566406, "step": 2164 }, { "epoch": 0.3016790914791333, "grad_norm": 0.4973025321960449, "learning_rate": 8.559201565953623e-06, "loss": 0.07338905334472656, "step": 2165 }, { "epoch": 0.3018184351703477, "grad_norm": 0.9812244772911072, "learning_rate": 8.557545903998197e-06, "loss": 0.08078193664550781, "step": 2166 }, { "epoch": 0.30195777886156205, "grad_norm": 0.7600473165512085, "learning_rate": 8.555889451637294e-06, "loss": 0.10844039916992188, "step": 2167 }, { "epoch": 0.30209712255277643, "grad_norm": 1.182442307472229, "learning_rate": 8.554232209238935e-06, "loss": 0.09427642822265625, "step": 2168 }, { "epoch": 0.3022364662439908, "grad_norm": 0.8704204559326172, "learning_rate": 8.552574177171326e-06, "loss": 0.07486152648925781, "step": 2169 }, { "epoch": 0.3023758099352052, "grad_norm": 0.5308424234390259, "learning_rate": 8.55091535580284e-06, "loss": 0.07269573211669922, "step": 2170 }, { "epoch": 0.30251515362641956, "grad_norm": 0.7725077867507935, "learning_rate": 8.54925574550203e-06, "loss": 0.10294914245605469, "step": 2171 }, { "epoch": 0.30265449731763394, "grad_norm": 0.6277885437011719, "learning_rate": 8.547595346637624e-06, "loss": 0.07301092147827148, "step": 2172 }, { "epoch": 0.3027938410088483, "grad_norm": 0.5651081204414368, "learning_rate": 8.545934159578527e-06, "loss": 0.08434915542602539, "step": 2173 }, { "epoch": 0.3029331847000627, "grad_norm": 1.3969718217849731, "learning_rate": 8.544272184693814e-06, "loss": 0.10215568542480469, "step": 2174 }, { "epoch": 0.3030725283912771, "grad_norm": 2.7309093475341797, "learning_rate": 8.542609422352738e-06, "loss": 0.11323976516723633, "step": 2175 }, { "epoch": 0.30321187208249145, "grad_norm": 1.05276620388031, "learning_rate": 8.540945872924728e-06, "loss": 0.10246515274047852, "step": 2176 }, { "epoch": 0.30335121577370583, "grad_norm": 0.6433766484260559, "learning_rate": 8.539281536779388e-06, "loss": 0.09054279327392578, "step": 2177 }, { "epoch": 0.3034905594649202, "grad_norm": 0.9180610775947571, "learning_rate": 8.537616414286491e-06, "loss": 0.07713508605957031, "step": 2178 }, { "epoch": 0.3036299031561346, "grad_norm": 0.5981694459915161, "learning_rate": 8.535950505815993e-06, "loss": 0.07453632354736328, "step": 2179 }, { "epoch": 0.30376924684734896, "grad_norm": 1.1323362588882446, "learning_rate": 8.53428381173802e-06, "loss": 0.10132503509521484, "step": 2180 }, { "epoch": 0.30390859053856334, "grad_norm": 0.8439390063285828, "learning_rate": 8.532616332422872e-06, "loss": 0.06961774826049805, "step": 2181 }, { "epoch": 0.30404793422977777, "grad_norm": 1.0338084697723389, "learning_rate": 8.530948068241028e-06, "loss": 0.07565975189208984, "step": 2182 }, { "epoch": 0.30418727792099215, "grad_norm": 0.6262402534484863, "learning_rate": 8.529279019563133e-06, "loss": 0.07147026062011719, "step": 2183 }, { "epoch": 0.3043266216122065, "grad_norm": 0.9646878838539124, "learning_rate": 8.527609186760017e-06, "loss": 0.0778799057006836, "step": 2184 }, { "epoch": 0.3044659653034209, "grad_norm": 0.5512875318527222, "learning_rate": 8.525938570202676e-06, "loss": 0.07062244415283203, "step": 2185 }, { "epoch": 0.3046053089946353, "grad_norm": 1.50796377658844, "learning_rate": 8.524267170262283e-06, "loss": 0.10388755798339844, "step": 2186 }, { "epoch": 0.30474465268584966, "grad_norm": 0.7904286980628967, "learning_rate": 8.522594987310184e-06, "loss": 0.06228065490722656, "step": 2187 }, { "epoch": 0.30488399637706404, "grad_norm": 1.2268038988113403, "learning_rate": 8.520922021717903e-06, "loss": 0.08226680755615234, "step": 2188 }, { "epoch": 0.3050233400682784, "grad_norm": 1.5158767700195312, "learning_rate": 8.519248273857132e-06, "loss": 0.09724807739257812, "step": 2189 }, { "epoch": 0.3051626837594928, "grad_norm": 0.5486024022102356, "learning_rate": 8.51757374409974e-06, "loss": 0.07810449600219727, "step": 2190 }, { "epoch": 0.30530202745070717, "grad_norm": 1.1090306043624878, "learning_rate": 8.51589843281777e-06, "loss": 0.11723136901855469, "step": 2191 }, { "epoch": 0.30544137114192155, "grad_norm": 0.5282971262931824, "learning_rate": 8.514222340383438e-06, "loss": 0.07062053680419922, "step": 2192 }, { "epoch": 0.3055807148331359, "grad_norm": 0.6125077605247498, "learning_rate": 8.512545467169133e-06, "loss": 0.0757741928100586, "step": 2193 }, { "epoch": 0.3057200585243503, "grad_norm": 0.5861700773239136, "learning_rate": 8.510867813547417e-06, "loss": 0.07012939453125, "step": 2194 }, { "epoch": 0.3058594022155647, "grad_norm": 1.5003774166107178, "learning_rate": 8.509189379891029e-06, "loss": 0.09200453758239746, "step": 2195 }, { "epoch": 0.30599874590677906, "grad_norm": 0.9484015703201294, "learning_rate": 8.507510166572875e-06, "loss": 0.0842132568359375, "step": 2196 }, { "epoch": 0.30613808959799343, "grad_norm": 1.277471899986267, "learning_rate": 8.50583017396604e-06, "loss": 0.09514999389648438, "step": 2197 }, { "epoch": 0.3062774332892078, "grad_norm": 0.7126684188842773, "learning_rate": 8.504149402443782e-06, "loss": 0.08029747009277344, "step": 2198 }, { "epoch": 0.3064167769804222, "grad_norm": 1.0180084705352783, "learning_rate": 8.502467852379526e-06, "loss": 0.08971595764160156, "step": 2199 }, { "epoch": 0.30655612067163657, "grad_norm": 0.5407505631446838, "learning_rate": 8.500785524146875e-06, "loss": 0.07877731323242188, "step": 2200 }, { "epoch": 0.30669546436285094, "grad_norm": 0.9133316874504089, "learning_rate": 8.499102418119607e-06, "loss": 0.08736371994018555, "step": 2201 }, { "epoch": 0.3068348080540654, "grad_norm": 0.8091353178024292, "learning_rate": 8.497418534671666e-06, "loss": 0.07645988464355469, "step": 2202 }, { "epoch": 0.30697415174527976, "grad_norm": 0.5753692388534546, "learning_rate": 8.495733874177176e-06, "loss": 0.07828617095947266, "step": 2203 }, { "epoch": 0.30711349543649413, "grad_norm": 0.8383010029792786, "learning_rate": 8.494048437010427e-06, "loss": 0.07613277435302734, "step": 2204 }, { "epoch": 0.3072528391277085, "grad_norm": 0.8571507334709167, "learning_rate": 8.492362223545884e-06, "loss": 0.0679788589477539, "step": 2205 }, { "epoch": 0.3073921828189229, "grad_norm": 0.7074180245399475, "learning_rate": 8.49067523415819e-06, "loss": 0.07999515533447266, "step": 2206 }, { "epoch": 0.30753152651013727, "grad_norm": 0.9128747582435608, "learning_rate": 8.48898746922215e-06, "loss": 0.09042167663574219, "step": 2207 }, { "epoch": 0.30767087020135164, "grad_norm": 0.8261674642562866, "learning_rate": 8.487298929112751e-06, "loss": 0.09132957458496094, "step": 2208 }, { "epoch": 0.307810213892566, "grad_norm": 0.824998676776886, "learning_rate": 8.485609614205146e-06, "loss": 0.09494733810424805, "step": 2209 }, { "epoch": 0.3079495575837804, "grad_norm": 0.8243038058280945, "learning_rate": 8.483919524874661e-06, "loss": 0.08062553405761719, "step": 2210 }, { "epoch": 0.3080889012749948, "grad_norm": 1.2237964868545532, "learning_rate": 8.482228661496797e-06, "loss": 0.0896916389465332, "step": 2211 }, { "epoch": 0.30822824496620915, "grad_norm": 0.6550543904304504, "learning_rate": 8.480537024447227e-06, "loss": 0.0928955078125, "step": 2212 }, { "epoch": 0.30836758865742353, "grad_norm": 0.6859322190284729, "learning_rate": 8.478844614101792e-06, "loss": 0.0772085189819336, "step": 2213 }, { "epoch": 0.3085069323486379, "grad_norm": 0.6553457975387573, "learning_rate": 8.477151430836505e-06, "loss": 0.07047748565673828, "step": 2214 }, { "epoch": 0.3086462760398523, "grad_norm": 1.1357512474060059, "learning_rate": 8.475457475027555e-06, "loss": 0.06381988525390625, "step": 2215 }, { "epoch": 0.30878561973106666, "grad_norm": 1.0025972127914429, "learning_rate": 8.473762747051302e-06, "loss": 0.0807962417602539, "step": 2216 }, { "epoch": 0.30892496342228104, "grad_norm": 0.7199293375015259, "learning_rate": 8.472067247284272e-06, "loss": 0.07199859619140625, "step": 2217 }, { "epoch": 0.3090643071134954, "grad_norm": 0.9559110999107361, "learning_rate": 8.470370976103171e-06, "loss": 0.07659053802490234, "step": 2218 }, { "epoch": 0.3092036508047098, "grad_norm": 1.227152705192566, "learning_rate": 8.468673933884867e-06, "loss": 0.08491325378417969, "step": 2219 }, { "epoch": 0.3093429944959242, "grad_norm": 0.511256754398346, "learning_rate": 8.466976121006407e-06, "loss": 0.07811832427978516, "step": 2220 }, { "epoch": 0.30948233818713855, "grad_norm": 0.5627778172492981, "learning_rate": 8.465277537845004e-06, "loss": 0.08562755584716797, "step": 2221 }, { "epoch": 0.309621681878353, "grad_norm": 0.8646585941314697, "learning_rate": 8.463578184778047e-06, "loss": 0.08127021789550781, "step": 2222 }, { "epoch": 0.30976102556956736, "grad_norm": 0.5921614170074463, "learning_rate": 8.461878062183092e-06, "loss": 0.0836029052734375, "step": 2223 }, { "epoch": 0.30990036926078174, "grad_norm": 0.8883531093597412, "learning_rate": 8.460177170437865e-06, "loss": 0.079559326171875, "step": 2224 }, { "epoch": 0.3100397129519961, "grad_norm": 1.0964345932006836, "learning_rate": 8.458475509920272e-06, "loss": 0.09262943267822266, "step": 2225 }, { "epoch": 0.3101790566432105, "grad_norm": 0.8963426351547241, "learning_rate": 8.456773081008376e-06, "loss": 0.08443212509155273, "step": 2226 }, { "epoch": 0.31031840033442487, "grad_norm": 2.0676350593566895, "learning_rate": 8.455069884080422e-06, "loss": 0.08599853515625, "step": 2227 }, { "epoch": 0.31045774402563925, "grad_norm": 1.046981692314148, "learning_rate": 8.45336591951482e-06, "loss": 0.08581829071044922, "step": 2228 }, { "epoch": 0.3105970877168536, "grad_norm": 0.9089792370796204, "learning_rate": 8.451661187690154e-06, "loss": 0.06389617919921875, "step": 2229 }, { "epoch": 0.310736431408068, "grad_norm": 1.2040011882781982, "learning_rate": 8.449955688985174e-06, "loss": 0.07545185089111328, "step": 2230 }, { "epoch": 0.3108757750992824, "grad_norm": 0.7204625606536865, "learning_rate": 8.448249423778802e-06, "loss": 0.08959007263183594, "step": 2231 }, { "epoch": 0.31101511879049676, "grad_norm": 1.5000907182693481, "learning_rate": 8.446542392450134e-06, "loss": 0.12596654891967773, "step": 2232 }, { "epoch": 0.31115446248171114, "grad_norm": 1.0760009288787842, "learning_rate": 8.444834595378434e-06, "loss": 0.08817672729492188, "step": 2233 }, { "epoch": 0.3112938061729255, "grad_norm": 1.0233217477798462, "learning_rate": 8.443126032943132e-06, "loss": 0.09027576446533203, "step": 2234 }, { "epoch": 0.3114331498641399, "grad_norm": 0.5961911678314209, "learning_rate": 8.441416705523834e-06, "loss": 0.07495784759521484, "step": 2235 }, { "epoch": 0.31157249355535427, "grad_norm": 1.3523112535476685, "learning_rate": 8.439706613500312e-06, "loss": 0.09783363342285156, "step": 2236 }, { "epoch": 0.31171183724656865, "grad_norm": 1.1741892099380493, "learning_rate": 8.43799575725251e-06, "loss": 0.08705520629882812, "step": 2237 }, { "epoch": 0.311851180937783, "grad_norm": 0.9110184907913208, "learning_rate": 8.436284137160544e-06, "loss": 0.08658981323242188, "step": 2238 }, { "epoch": 0.3119905246289974, "grad_norm": 1.1394414901733398, "learning_rate": 8.434571753604693e-06, "loss": 0.0838613510131836, "step": 2239 }, { "epoch": 0.3121298683202118, "grad_norm": 1.1230945587158203, "learning_rate": 8.432858606965411e-06, "loss": 0.08248043060302734, "step": 2240 }, { "epoch": 0.31226921201142616, "grad_norm": 0.6842249035835266, "learning_rate": 8.43114469762332e-06, "loss": 0.06394100189208984, "step": 2241 }, { "epoch": 0.3124085557026406, "grad_norm": 0.7235643863677979, "learning_rate": 8.429430025959212e-06, "loss": 0.075408935546875, "step": 2242 }, { "epoch": 0.31254789939385497, "grad_norm": 0.8545743823051453, "learning_rate": 8.427714592354046e-06, "loss": 0.09814691543579102, "step": 2243 }, { "epoch": 0.31268724308506934, "grad_norm": 0.5609846115112305, "learning_rate": 8.425998397188955e-06, "loss": 0.07217121124267578, "step": 2244 }, { "epoch": 0.3128265867762837, "grad_norm": 0.9388143420219421, "learning_rate": 8.424281440845236e-06, "loss": 0.08744335174560547, "step": 2245 }, { "epoch": 0.3129659304674981, "grad_norm": 0.9136983752250671, "learning_rate": 8.42256372370436e-06, "loss": 0.08860206604003906, "step": 2246 }, { "epoch": 0.3131052741587125, "grad_norm": 1.0605475902557373, "learning_rate": 8.420845246147961e-06, "loss": 0.08744621276855469, "step": 2247 }, { "epoch": 0.31324461784992685, "grad_norm": 0.9320101737976074, "learning_rate": 8.41912600855785e-06, "loss": 0.1117391586303711, "step": 2248 }, { "epoch": 0.31338396154114123, "grad_norm": 0.9168702363967896, "learning_rate": 8.417406011316e-06, "loss": 0.08674049377441406, "step": 2249 }, { "epoch": 0.3135233052323556, "grad_norm": 1.5678057670593262, "learning_rate": 8.415685254804552e-06, "loss": 0.07920217514038086, "step": 2250 }, { "epoch": 0.31366264892357, "grad_norm": 0.7268530130386353, "learning_rate": 8.413963739405824e-06, "loss": 0.07273578643798828, "step": 2251 }, { "epoch": 0.31380199261478436, "grad_norm": 1.2473057508468628, "learning_rate": 8.412241465502294e-06, "loss": 0.09773063659667969, "step": 2252 }, { "epoch": 0.31394133630599874, "grad_norm": 0.8080025315284729, "learning_rate": 8.410518433476613e-06, "loss": 0.10939979553222656, "step": 2253 }, { "epoch": 0.3140806799972131, "grad_norm": 0.8592463731765747, "learning_rate": 8.408794643711601e-06, "loss": 0.06766891479492188, "step": 2254 }, { "epoch": 0.3142200236884275, "grad_norm": 0.8630502223968506, "learning_rate": 8.407070096590243e-06, "loss": 0.07241153717041016, "step": 2255 }, { "epoch": 0.3143593673796419, "grad_norm": 0.5052098035812378, "learning_rate": 8.405344792495694e-06, "loss": 0.07845163345336914, "step": 2256 }, { "epoch": 0.31449871107085625, "grad_norm": 0.6263428330421448, "learning_rate": 8.403618731811277e-06, "loss": 0.062030792236328125, "step": 2257 }, { "epoch": 0.31463805476207063, "grad_norm": 0.7053384780883789, "learning_rate": 8.401891914920483e-06, "loss": 0.09267425537109375, "step": 2258 }, { "epoch": 0.314777398453285, "grad_norm": 0.868972659111023, "learning_rate": 8.400164342206973e-06, "loss": 0.09022140502929688, "step": 2259 }, { "epoch": 0.3149167421444994, "grad_norm": 0.6817874908447266, "learning_rate": 8.398436014054575e-06, "loss": 0.06769847869873047, "step": 2260 }, { "epoch": 0.31505608583571376, "grad_norm": 0.6151371002197266, "learning_rate": 8.39670693084728e-06, "loss": 0.09355735778808594, "step": 2261 }, { "epoch": 0.3151954295269282, "grad_norm": 0.5432854890823364, "learning_rate": 8.394977092969253e-06, "loss": 0.06827354431152344, "step": 2262 }, { "epoch": 0.31533477321814257, "grad_norm": 0.7284162640571594, "learning_rate": 8.393246500804825e-06, "loss": 0.0899953842163086, "step": 2263 }, { "epoch": 0.31547411690935695, "grad_norm": 0.7801299691200256, "learning_rate": 8.391515154738495e-06, "loss": 0.10808944702148438, "step": 2264 }, { "epoch": 0.3156134606005713, "grad_norm": 0.6416293382644653, "learning_rate": 8.389783055154925e-06, "loss": 0.07366085052490234, "step": 2265 }, { "epoch": 0.3157528042917857, "grad_norm": 0.48739197850227356, "learning_rate": 8.388050202438952e-06, "loss": 0.07299232482910156, "step": 2266 }, { "epoch": 0.3158921479830001, "grad_norm": 0.8842048645019531, "learning_rate": 8.386316596975574e-06, "loss": 0.07181167602539062, "step": 2267 }, { "epoch": 0.31603149167421446, "grad_norm": 1.2927935123443604, "learning_rate": 8.38458223914996e-06, "loss": 0.07740974426269531, "step": 2268 }, { "epoch": 0.31617083536542884, "grad_norm": 0.9556062817573547, "learning_rate": 8.38284712934744e-06, "loss": 0.0912485122680664, "step": 2269 }, { "epoch": 0.3163101790566432, "grad_norm": 1.2276915311813354, "learning_rate": 8.381111267953523e-06, "loss": 0.0818324089050293, "step": 2270 }, { "epoch": 0.3164495227478576, "grad_norm": 0.7975842952728271, "learning_rate": 8.379374655353874e-06, "loss": 0.09335994720458984, "step": 2271 }, { "epoch": 0.31658886643907197, "grad_norm": 1.1156959533691406, "learning_rate": 8.377637291934329e-06, "loss": 0.08867073059082031, "step": 2272 }, { "epoch": 0.31672821013028635, "grad_norm": 1.1069656610488892, "learning_rate": 8.37589917808089e-06, "loss": 0.08121967315673828, "step": 2273 }, { "epoch": 0.3168675538215007, "grad_norm": 1.4566051959991455, "learning_rate": 8.374160314179727e-06, "loss": 0.10086822509765625, "step": 2274 }, { "epoch": 0.3170068975127151, "grad_norm": 0.7128390669822693, "learning_rate": 8.372420700617176e-06, "loss": 0.0730900764465332, "step": 2275 }, { "epoch": 0.3171462412039295, "grad_norm": 0.9305303692817688, "learning_rate": 8.370680337779737e-06, "loss": 0.08122920989990234, "step": 2276 }, { "epoch": 0.31728558489514386, "grad_norm": 1.0414397716522217, "learning_rate": 8.368939226054083e-06, "loss": 0.0748586654663086, "step": 2277 }, { "epoch": 0.31742492858635823, "grad_norm": 1.5961631536483765, "learning_rate": 8.367197365827047e-06, "loss": 0.10221481323242188, "step": 2278 }, { "epoch": 0.3175642722775726, "grad_norm": 2.189021110534668, "learning_rate": 8.36545475748563e-06, "loss": 0.12546348571777344, "step": 2279 }, { "epoch": 0.317703615968787, "grad_norm": 0.9551593065261841, "learning_rate": 8.363711401417e-06, "loss": 0.10326671600341797, "step": 2280 }, { "epoch": 0.31784295966000137, "grad_norm": 0.9821987152099609, "learning_rate": 8.361967298008494e-06, "loss": 0.07493782043457031, "step": 2281 }, { "epoch": 0.3179823033512158, "grad_norm": 1.1865662336349487, "learning_rate": 8.360222447647606e-06, "loss": 0.0908346176147461, "step": 2282 }, { "epoch": 0.3181216470424302, "grad_norm": 1.2592206001281738, "learning_rate": 8.358476850722007e-06, "loss": 0.08658027648925781, "step": 2283 }, { "epoch": 0.31826099073364456, "grad_norm": 0.8634379506111145, "learning_rate": 8.356730507619526e-06, "loss": 0.08583354949951172, "step": 2284 }, { "epoch": 0.31840033442485893, "grad_norm": 0.8430758118629456, "learning_rate": 8.354983418728165e-06, "loss": 0.061163902282714844, "step": 2285 }, { "epoch": 0.3185396781160733, "grad_norm": 0.6839690208435059, "learning_rate": 8.353235584436082e-06, "loss": 0.10448646545410156, "step": 2286 }, { "epoch": 0.3186790218072877, "grad_norm": 0.9330122470855713, "learning_rate": 8.351487005131606e-06, "loss": 0.08021736145019531, "step": 2287 }, { "epoch": 0.31881836549850207, "grad_norm": 0.7730129957199097, "learning_rate": 8.349737681203234e-06, "loss": 0.06754398345947266, "step": 2288 }, { "epoch": 0.31895770918971644, "grad_norm": 1.563416838645935, "learning_rate": 8.347987613039626e-06, "loss": 0.09560346603393555, "step": 2289 }, { "epoch": 0.3190970528809308, "grad_norm": 1.1100720167160034, "learning_rate": 8.346236801029605e-06, "loss": 0.0867462158203125, "step": 2290 }, { "epoch": 0.3192363965721452, "grad_norm": 0.8072900176048279, "learning_rate": 8.344485245562165e-06, "loss": 0.0935201644897461, "step": 2291 }, { "epoch": 0.3193757402633596, "grad_norm": 0.699910581111908, "learning_rate": 8.342732947026457e-06, "loss": 0.06891632080078125, "step": 2292 }, { "epoch": 0.31951508395457395, "grad_norm": 0.8567329049110413, "learning_rate": 8.340979905811805e-06, "loss": 0.08520889282226562, "step": 2293 }, { "epoch": 0.31965442764578833, "grad_norm": 0.7472473978996277, "learning_rate": 8.339226122307696e-06, "loss": 0.0883035659790039, "step": 2294 }, { "epoch": 0.3197937713370027, "grad_norm": 1.1779406070709229, "learning_rate": 8.337471596903774e-06, "loss": 0.06636714935302734, "step": 2295 }, { "epoch": 0.3199331150282171, "grad_norm": 0.59389328956604, "learning_rate": 8.335716329989863e-06, "loss": 0.07095956802368164, "step": 2296 }, { "epoch": 0.32007245871943146, "grad_norm": 1.3420838117599487, "learning_rate": 8.333960321955937e-06, "loss": 0.09176301956176758, "step": 2297 }, { "epoch": 0.32021180241064584, "grad_norm": 0.46818527579307556, "learning_rate": 8.332203573192143e-06, "loss": 0.07974648475646973, "step": 2298 }, { "epoch": 0.3203511461018602, "grad_norm": 0.4250245988368988, "learning_rate": 8.330446084088791e-06, "loss": 0.05578470230102539, "step": 2299 }, { "epoch": 0.3204904897930746, "grad_norm": 0.9606395959854126, "learning_rate": 8.328687855036355e-06, "loss": 0.0841827392578125, "step": 2300 }, { "epoch": 0.320629833484289, "grad_norm": 0.642802357673645, "learning_rate": 8.326928886425471e-06, "loss": 0.06262493133544922, "step": 2301 }, { "epoch": 0.3207691771755034, "grad_norm": 0.5342490077018738, "learning_rate": 8.325169178646946e-06, "loss": 0.0868988037109375, "step": 2302 }, { "epoch": 0.3209085208667178, "grad_norm": 0.6020740866661072, "learning_rate": 8.323408732091743e-06, "loss": 0.0927734375, "step": 2303 }, { "epoch": 0.32104786455793216, "grad_norm": 0.828797459602356, "learning_rate": 8.321647547150995e-06, "loss": 0.08411312103271484, "step": 2304 }, { "epoch": 0.32118720824914654, "grad_norm": 0.901246964931488, "learning_rate": 8.319885624215996e-06, "loss": 0.1111288070678711, "step": 2305 }, { "epoch": 0.3213265519403609, "grad_norm": 0.7288615703582764, "learning_rate": 8.318122963678206e-06, "loss": 0.08388423919677734, "step": 2306 }, { "epoch": 0.3214658956315753, "grad_norm": 0.7426939010620117, "learning_rate": 8.316359565929248e-06, "loss": 0.06998729705810547, "step": 2307 }, { "epoch": 0.32160523932278967, "grad_norm": 0.6909757852554321, "learning_rate": 8.314595431360906e-06, "loss": 0.07602787017822266, "step": 2308 }, { "epoch": 0.32174458301400405, "grad_norm": 0.5793853998184204, "learning_rate": 8.312830560365136e-06, "loss": 0.05685997009277344, "step": 2309 }, { "epoch": 0.3218839267052184, "grad_norm": 1.1108983755111694, "learning_rate": 8.311064953334046e-06, "loss": 0.10171127319335938, "step": 2310 }, { "epoch": 0.3220232703964328, "grad_norm": 0.7620674967765808, "learning_rate": 8.309298610659917e-06, "loss": 0.09737014770507812, "step": 2311 }, { "epoch": 0.3221626140876472, "grad_norm": 1.025177001953125, "learning_rate": 8.307531532735188e-06, "loss": 0.09389686584472656, "step": 2312 }, { "epoch": 0.32230195777886156, "grad_norm": 0.8109764456748962, "learning_rate": 8.305763719952467e-06, "loss": 0.09369182586669922, "step": 2313 }, { "epoch": 0.32244130147007594, "grad_norm": 0.6665458679199219, "learning_rate": 8.303995172704519e-06, "loss": 0.07583236694335938, "step": 2314 }, { "epoch": 0.3225806451612903, "grad_norm": 0.6873588562011719, "learning_rate": 8.302225891384275e-06, "loss": 0.09257125854492188, "step": 2315 }, { "epoch": 0.3227199888525047, "grad_norm": 0.7518826723098755, "learning_rate": 8.300455876384827e-06, "loss": 0.07145404815673828, "step": 2316 }, { "epoch": 0.32285933254371907, "grad_norm": 0.9229847192764282, "learning_rate": 8.298685128099437e-06, "loss": 0.10346031188964844, "step": 2317 }, { "epoch": 0.32299867623493345, "grad_norm": 0.7972732782363892, "learning_rate": 8.29691364692152e-06, "loss": 0.09389591217041016, "step": 2318 }, { "epoch": 0.3231380199261478, "grad_norm": 1.079792857170105, "learning_rate": 8.29514143324466e-06, "loss": 0.08335685729980469, "step": 2319 }, { "epoch": 0.3232773636173622, "grad_norm": 0.7806012034416199, "learning_rate": 8.293368487462604e-06, "loss": 0.09685707092285156, "step": 2320 }, { "epoch": 0.3234167073085766, "grad_norm": 1.1631860733032227, "learning_rate": 8.29159480996926e-06, "loss": 0.090179443359375, "step": 2321 }, { "epoch": 0.323556050999791, "grad_norm": 0.4444945454597473, "learning_rate": 8.289820401158695e-06, "loss": 0.07556724548339844, "step": 2322 }, { "epoch": 0.3236953946910054, "grad_norm": 0.9523869752883911, "learning_rate": 8.288045261425146e-06, "loss": 0.07016372680664062, "step": 2323 }, { "epoch": 0.32383473838221977, "grad_norm": 0.9669080376625061, "learning_rate": 8.286269391163006e-06, "loss": 0.10128355026245117, "step": 2324 }, { "epoch": 0.32397408207343414, "grad_norm": 0.7390457391738892, "learning_rate": 8.284492790766835e-06, "loss": 0.10270309448242188, "step": 2325 }, { "epoch": 0.3241134257646485, "grad_norm": 0.9157395362854004, "learning_rate": 8.282715460631354e-06, "loss": 0.08992910385131836, "step": 2326 }, { "epoch": 0.3242527694558629, "grad_norm": 1.0448065996170044, "learning_rate": 8.280937401151441e-06, "loss": 0.11753559112548828, "step": 2327 }, { "epoch": 0.3243921131470773, "grad_norm": 1.0165349245071411, "learning_rate": 8.279158612722145e-06, "loss": 0.10638046264648438, "step": 2328 }, { "epoch": 0.32453145683829165, "grad_norm": 0.9160779714584351, "learning_rate": 8.277379095738668e-06, "loss": 0.09615421295166016, "step": 2329 }, { "epoch": 0.32467080052950603, "grad_norm": 0.5286878943443298, "learning_rate": 8.27559885059638e-06, "loss": 0.06644320487976074, "step": 2330 }, { "epoch": 0.3248101442207204, "grad_norm": 1.043996810913086, "learning_rate": 8.273817877690809e-06, "loss": 0.1045379638671875, "step": 2331 }, { "epoch": 0.3249494879119348, "grad_norm": 0.7028215527534485, "learning_rate": 8.272036177417649e-06, "loss": 0.07167816162109375, "step": 2332 }, { "epoch": 0.32508883160314916, "grad_norm": 1.1719770431518555, "learning_rate": 8.270253750172754e-06, "loss": 0.08515405654907227, "step": 2333 }, { "epoch": 0.32522817529436354, "grad_norm": 0.7033340334892273, "learning_rate": 8.268470596352134e-06, "loss": 0.06874454021453857, "step": 2334 }, { "epoch": 0.3253675189855779, "grad_norm": 0.9609919786453247, "learning_rate": 8.26668671635197e-06, "loss": 0.0803709626197815, "step": 2335 }, { "epoch": 0.3255068626767923, "grad_norm": 2.389446258544922, "learning_rate": 8.264902110568598e-06, "loss": 0.14016151428222656, "step": 2336 }, { "epoch": 0.3256462063680067, "grad_norm": 0.7040911316871643, "learning_rate": 8.263116779398514e-06, "loss": 0.07584714889526367, "step": 2337 }, { "epoch": 0.32578555005922105, "grad_norm": 0.9109993577003479, "learning_rate": 8.261330723238381e-06, "loss": 0.10421371459960938, "step": 2338 }, { "epoch": 0.32592489375043543, "grad_norm": 0.8204297423362732, "learning_rate": 8.25954394248502e-06, "loss": 0.07875823974609375, "step": 2339 }, { "epoch": 0.3260642374416498, "grad_norm": 1.7799170017242432, "learning_rate": 8.25775643753541e-06, "loss": 0.09245491027832031, "step": 2340 }, { "epoch": 0.3262035811328642, "grad_norm": 0.6801718473434448, "learning_rate": 8.255968208786694e-06, "loss": 0.0784158706665039, "step": 2341 }, { "epoch": 0.3263429248240786, "grad_norm": 0.6752045750617981, "learning_rate": 8.25417925663618e-06, "loss": 0.07034921646118164, "step": 2342 }, { "epoch": 0.326482268515293, "grad_norm": 0.5105093717575073, "learning_rate": 8.252389581481328e-06, "loss": 0.06596183776855469, "step": 2343 }, { "epoch": 0.3266216122065074, "grad_norm": 0.6262010931968689, "learning_rate": 8.250599183719763e-06, "loss": 0.07599258422851562, "step": 2344 }, { "epoch": 0.32676095589772175, "grad_norm": 0.8691280484199524, "learning_rate": 8.248808063749273e-06, "loss": 0.09274101257324219, "step": 2345 }, { "epoch": 0.3269002995889361, "grad_norm": 0.5653651356697083, "learning_rate": 8.247016221967802e-06, "loss": 0.0704813003540039, "step": 2346 }, { "epoch": 0.3270396432801505, "grad_norm": 0.7200907468795776, "learning_rate": 8.245223658773459e-06, "loss": 0.08005189895629883, "step": 2347 }, { "epoch": 0.3271789869713649, "grad_norm": 1.0619548559188843, "learning_rate": 8.243430374564507e-06, "loss": 0.0931406021118164, "step": 2348 }, { "epoch": 0.32731833066257926, "grad_norm": 0.4867735505104065, "learning_rate": 8.241636369739376e-06, "loss": 0.07717704772949219, "step": 2349 }, { "epoch": 0.32745767435379364, "grad_norm": 1.75877046585083, "learning_rate": 8.23984164469665e-06, "loss": 0.10586738586425781, "step": 2350 }, { "epoch": 0.327597018045008, "grad_norm": 0.7595279812812805, "learning_rate": 8.23804619983508e-06, "loss": 0.09648323059082031, "step": 2351 }, { "epoch": 0.3277363617362224, "grad_norm": 0.9098833799362183, "learning_rate": 8.236250035553569e-06, "loss": 0.08111763000488281, "step": 2352 }, { "epoch": 0.32787570542743677, "grad_norm": 0.9982900023460388, "learning_rate": 8.234453152251183e-06, "loss": 0.08910942077636719, "step": 2353 }, { "epoch": 0.32801504911865115, "grad_norm": 0.6130339503288269, "learning_rate": 8.23265555032715e-06, "loss": 0.07439422607421875, "step": 2354 }, { "epoch": 0.3281543928098655, "grad_norm": 0.7207854390144348, "learning_rate": 8.23085723018086e-06, "loss": 0.09689617156982422, "step": 2355 }, { "epoch": 0.3282937365010799, "grad_norm": 0.7353227734565735, "learning_rate": 8.229058192211851e-06, "loss": 0.07605695724487305, "step": 2356 }, { "epoch": 0.3284330801922943, "grad_norm": 0.9271976351737976, "learning_rate": 8.227258436819836e-06, "loss": 0.09377861022949219, "step": 2357 }, { "epoch": 0.32857242388350866, "grad_norm": 0.7118405699729919, "learning_rate": 8.225457964404675e-06, "loss": 0.07985115051269531, "step": 2358 }, { "epoch": 0.32871176757472303, "grad_norm": 0.6699024438858032, "learning_rate": 8.223656775366393e-06, "loss": 0.08173179626464844, "step": 2359 }, { "epoch": 0.3288511112659374, "grad_norm": 0.7842735052108765, "learning_rate": 8.221854870105172e-06, "loss": 0.08438301086425781, "step": 2360 }, { "epoch": 0.3289904549571518, "grad_norm": 1.2397898435592651, "learning_rate": 8.220052249021356e-06, "loss": 0.08911705017089844, "step": 2361 }, { "epoch": 0.32912979864836617, "grad_norm": 0.7249873876571655, "learning_rate": 8.218248912515443e-06, "loss": 0.08191299438476562, "step": 2362 }, { "epoch": 0.3292691423395806, "grad_norm": 0.9606716632843018, "learning_rate": 8.216444860988098e-06, "loss": 0.10704660415649414, "step": 2363 }, { "epoch": 0.329408486030795, "grad_norm": 1.1787946224212646, "learning_rate": 8.214640094840136e-06, "loss": 0.09135246276855469, "step": 2364 }, { "epoch": 0.32954782972200936, "grad_norm": 0.8025174736976624, "learning_rate": 8.212834614472538e-06, "loss": 0.1000528335571289, "step": 2365 }, { "epoch": 0.32968717341322373, "grad_norm": 1.2117594480514526, "learning_rate": 8.211028420286437e-06, "loss": 0.08167839050292969, "step": 2366 }, { "epoch": 0.3298265171044381, "grad_norm": 0.5955885052680969, "learning_rate": 8.209221512683132e-06, "loss": 0.08491992950439453, "step": 2367 }, { "epoch": 0.3299658607956525, "grad_norm": 1.053816795349121, "learning_rate": 8.207413892064073e-06, "loss": 0.10182380676269531, "step": 2368 }, { "epoch": 0.33010520448686687, "grad_norm": 1.7479174137115479, "learning_rate": 8.205605558830873e-06, "loss": 0.10239458084106445, "step": 2369 }, { "epoch": 0.33024454817808124, "grad_norm": 0.9363861680030823, "learning_rate": 8.203796513385307e-06, "loss": 0.09715080261230469, "step": 2370 }, { "epoch": 0.3303838918692956, "grad_norm": 0.5032840967178345, "learning_rate": 8.201986756129297e-06, "loss": 0.062044620513916016, "step": 2371 }, { "epoch": 0.33052323556051, "grad_norm": 0.7852966785430908, "learning_rate": 8.200176287464931e-06, "loss": 0.07601737976074219, "step": 2372 }, { "epoch": 0.3306625792517244, "grad_norm": 0.7507002949714661, "learning_rate": 8.198365107794457e-06, "loss": 0.08159637451171875, "step": 2373 }, { "epoch": 0.33080192294293875, "grad_norm": 0.6685326099395752, "learning_rate": 8.196553217520275e-06, "loss": 0.0729818344116211, "step": 2374 }, { "epoch": 0.33094126663415313, "grad_norm": 0.9915950298309326, "learning_rate": 8.194740617044948e-06, "loss": 0.07766151428222656, "step": 2375 }, { "epoch": 0.3310806103253675, "grad_norm": 0.8235864043235779, "learning_rate": 8.192927306771193e-06, "loss": 0.06873273849487305, "step": 2376 }, { "epoch": 0.3312199540165819, "grad_norm": 0.7408896088600159, "learning_rate": 8.191113287101884e-06, "loss": 0.07235908508300781, "step": 2377 }, { "epoch": 0.33135929770779626, "grad_norm": 0.8619838953018188, "learning_rate": 8.18929855844006e-06, "loss": 0.09593725204467773, "step": 2378 }, { "epoch": 0.33149864139901064, "grad_norm": 1.0809077024459839, "learning_rate": 8.187483121188908e-06, "loss": 0.10452079772949219, "step": 2379 }, { "epoch": 0.331637985090225, "grad_norm": 1.2327499389648438, "learning_rate": 8.185666975751778e-06, "loss": 0.07661104202270508, "step": 2380 }, { "epoch": 0.3317773287814394, "grad_norm": 1.3312463760375977, "learning_rate": 8.183850122532174e-06, "loss": 0.0703744888305664, "step": 2381 }, { "epoch": 0.3319166724726538, "grad_norm": 1.445785641670227, "learning_rate": 8.182032561933764e-06, "loss": 0.09288787841796875, "step": 2382 }, { "epoch": 0.3320560161638682, "grad_norm": 1.2910727262496948, "learning_rate": 8.180214294360365e-06, "loss": 0.09213018417358398, "step": 2383 }, { "epoch": 0.3321953598550826, "grad_norm": 0.5857879519462585, "learning_rate": 8.178395320215953e-06, "loss": 0.07391834259033203, "step": 2384 }, { "epoch": 0.33233470354629696, "grad_norm": 0.9388836026191711, "learning_rate": 8.176575639904668e-06, "loss": 0.11075973510742188, "step": 2385 }, { "epoch": 0.33247404723751134, "grad_norm": 1.2984305620193481, "learning_rate": 8.174755253830797e-06, "loss": 0.11437606811523438, "step": 2386 }, { "epoch": 0.3326133909287257, "grad_norm": 1.589389443397522, "learning_rate": 8.17293416239879e-06, "loss": 0.09708976745605469, "step": 2387 }, { "epoch": 0.3327527346199401, "grad_norm": 0.6162965893745422, "learning_rate": 8.171112366013252e-06, "loss": 0.08622169494628906, "step": 2388 }, { "epoch": 0.33289207831115447, "grad_norm": 0.9034294486045837, "learning_rate": 8.169289865078942e-06, "loss": 0.09507942199707031, "step": 2389 }, { "epoch": 0.33303142200236885, "grad_norm": 0.7755340337753296, "learning_rate": 8.167466660000781e-06, "loss": 0.11056137084960938, "step": 2390 }, { "epoch": 0.3331707656935832, "grad_norm": 0.8565540313720703, "learning_rate": 8.165642751183844e-06, "loss": 0.09539985656738281, "step": 2391 }, { "epoch": 0.3333101093847976, "grad_norm": 0.7482793927192688, "learning_rate": 8.163818139033359e-06, "loss": 0.080352783203125, "step": 2392 }, { "epoch": 0.333449453076012, "grad_norm": 0.8873681426048279, "learning_rate": 8.161992823954715e-06, "loss": 0.08632183074951172, "step": 2393 }, { "epoch": 0.33358879676722636, "grad_norm": 1.0693751573562622, "learning_rate": 8.160166806353455e-06, "loss": 0.09862709045410156, "step": 2394 }, { "epoch": 0.33372814045844074, "grad_norm": 0.8154848217964172, "learning_rate": 8.15834008663528e-06, "loss": 0.08835983276367188, "step": 2395 }, { "epoch": 0.3338674841496551, "grad_norm": 1.0293772220611572, "learning_rate": 8.156512665206043e-06, "loss": 0.08512687683105469, "step": 2396 }, { "epoch": 0.3340068278408695, "grad_norm": 0.5756683349609375, "learning_rate": 8.154684542471754e-06, "loss": 0.0704641342163086, "step": 2397 }, { "epoch": 0.33414617153208387, "grad_norm": 0.5936179757118225, "learning_rate": 8.152855718838583e-06, "loss": 0.07682132720947266, "step": 2398 }, { "epoch": 0.33428551522329825, "grad_norm": 1.2190465927124023, "learning_rate": 8.151026194712854e-06, "loss": 0.09655380249023438, "step": 2399 }, { "epoch": 0.3344248589145126, "grad_norm": 0.9039480090141296, "learning_rate": 8.149195970501043e-06, "loss": 0.06423377990722656, "step": 2400 }, { "epoch": 0.334564202605727, "grad_norm": 0.7532091736793518, "learning_rate": 8.147365046609786e-06, "loss": 0.08183097839355469, "step": 2401 }, { "epoch": 0.3347035462969414, "grad_norm": 0.7361121773719788, "learning_rate": 8.145533423445869e-06, "loss": 0.09115123748779297, "step": 2402 }, { "epoch": 0.3348428899881558, "grad_norm": 1.212907314300537, "learning_rate": 8.14370110141624e-06, "loss": 0.07736539840698242, "step": 2403 }, { "epoch": 0.3349822336793702, "grad_norm": 0.9067147374153137, "learning_rate": 8.141868080927998e-06, "loss": 0.06038856506347656, "step": 2404 }, { "epoch": 0.33512157737058457, "grad_norm": 1.3133604526519775, "learning_rate": 8.140034362388398e-06, "loss": 0.10853290557861328, "step": 2405 }, { "epoch": 0.33526092106179894, "grad_norm": 0.8666226863861084, "learning_rate": 8.13819994620485e-06, "loss": 0.0752105712890625, "step": 2406 }, { "epoch": 0.3354002647530133, "grad_norm": 0.5247369408607483, "learning_rate": 8.136364832784923e-06, "loss": 0.07808876037597656, "step": 2407 }, { "epoch": 0.3355396084442277, "grad_norm": 1.5295449495315552, "learning_rate": 8.134529022536332e-06, "loss": 0.11401844024658203, "step": 2408 }, { "epoch": 0.3356789521354421, "grad_norm": 2.0500810146331787, "learning_rate": 8.132692515866959e-06, "loss": 0.07606697082519531, "step": 2409 }, { "epoch": 0.33581829582665645, "grad_norm": 1.1816741228103638, "learning_rate": 8.130855313184824e-06, "loss": 0.08957862854003906, "step": 2410 }, { "epoch": 0.33595763951787083, "grad_norm": 0.7637178301811218, "learning_rate": 8.129017414898121e-06, "loss": 0.07096147537231445, "step": 2411 }, { "epoch": 0.3360969832090852, "grad_norm": 0.8508808016777039, "learning_rate": 8.127178821415183e-06, "loss": 0.0853729248046875, "step": 2412 }, { "epoch": 0.3362363269002996, "grad_norm": 0.6166628003120422, "learning_rate": 8.125339533144507e-06, "loss": 0.08512210845947266, "step": 2413 }, { "epoch": 0.33637567059151396, "grad_norm": 0.8284225463867188, "learning_rate": 8.123499550494737e-06, "loss": 0.06224203109741211, "step": 2414 }, { "epoch": 0.33651501428272834, "grad_norm": 0.6630139350891113, "learning_rate": 8.12165887387468e-06, "loss": 0.08673858642578125, "step": 2415 }, { "epoch": 0.3366543579739427, "grad_norm": 0.6992603540420532, "learning_rate": 8.11981750369329e-06, "loss": 0.07621192932128906, "step": 2416 }, { "epoch": 0.3367937016651571, "grad_norm": 0.5528787970542908, "learning_rate": 8.117975440359677e-06, "loss": 0.07060384750366211, "step": 2417 }, { "epoch": 0.3369330453563715, "grad_norm": 0.7501078248023987, "learning_rate": 8.116132684283104e-06, "loss": 0.08551597595214844, "step": 2418 }, { "epoch": 0.33707238904758585, "grad_norm": 1.023911476135254, "learning_rate": 8.114289235872993e-06, "loss": 0.07639217376708984, "step": 2419 }, { "epoch": 0.33721173273880023, "grad_norm": 0.8780986070632935, "learning_rate": 8.112445095538915e-06, "loss": 0.09820413589477539, "step": 2420 }, { "epoch": 0.3373510764300146, "grad_norm": 0.8668622374534607, "learning_rate": 8.110600263690592e-06, "loss": 0.06812000274658203, "step": 2421 }, { "epoch": 0.337490420121229, "grad_norm": 0.7151896357536316, "learning_rate": 8.10875474073791e-06, "loss": 0.08232879638671875, "step": 2422 }, { "epoch": 0.3376297638124434, "grad_norm": 0.7096531391143799, "learning_rate": 8.106908527090895e-06, "loss": 0.10668754577636719, "step": 2423 }, { "epoch": 0.3377691075036578, "grad_norm": 0.5994778275489807, "learning_rate": 8.10506162315974e-06, "loss": 0.06497573852539062, "step": 2424 }, { "epoch": 0.3379084511948722, "grad_norm": 2.775935173034668, "learning_rate": 8.103214029354783e-06, "loss": 0.12358283996582031, "step": 2425 }, { "epoch": 0.33804779488608655, "grad_norm": 1.0953580141067505, "learning_rate": 8.101365746086514e-06, "loss": 0.08455848693847656, "step": 2426 }, { "epoch": 0.3381871385773009, "grad_norm": 0.958086371421814, "learning_rate": 8.099516773765581e-06, "loss": 0.08588218688964844, "step": 2427 }, { "epoch": 0.3383264822685153, "grad_norm": 0.8771002888679504, "learning_rate": 8.097667112802784e-06, "loss": 0.07766246795654297, "step": 2428 }, { "epoch": 0.3384658259597297, "grad_norm": 0.6823744177818298, "learning_rate": 8.095816763609077e-06, "loss": 0.07571887969970703, "step": 2429 }, { "epoch": 0.33860516965094406, "grad_norm": 1.1742933988571167, "learning_rate": 8.093965726595565e-06, "loss": 0.08587265014648438, "step": 2430 }, { "epoch": 0.33874451334215844, "grad_norm": 1.2936736345291138, "learning_rate": 8.092114002173503e-06, "loss": 0.09670257568359375, "step": 2431 }, { "epoch": 0.3388838570333728, "grad_norm": 0.7744086980819702, "learning_rate": 8.090261590754304e-06, "loss": 0.11195182800292969, "step": 2432 }, { "epoch": 0.3390232007245872, "grad_norm": 0.789297342300415, "learning_rate": 8.088408492749534e-06, "loss": 0.09445667266845703, "step": 2433 }, { "epoch": 0.33916254441580157, "grad_norm": 1.1086280345916748, "learning_rate": 8.086554708570901e-06, "loss": 0.08029699325561523, "step": 2434 }, { "epoch": 0.33930188810701595, "grad_norm": 1.362607479095459, "learning_rate": 8.084700238630283e-06, "loss": 0.09674358367919922, "step": 2435 }, { "epoch": 0.3394412317982303, "grad_norm": 0.9426361322402954, "learning_rate": 8.082845083339698e-06, "loss": 0.11686897277832031, "step": 2436 }, { "epoch": 0.3395805754894447, "grad_norm": 1.08428156375885, "learning_rate": 8.080989243111315e-06, "loss": 0.07917118072509766, "step": 2437 }, { "epoch": 0.3397199191806591, "grad_norm": 0.6650174260139465, "learning_rate": 8.079132718357465e-06, "loss": 0.0731649398803711, "step": 2438 }, { "epoch": 0.33985926287187346, "grad_norm": 1.4139176607131958, "learning_rate": 8.07727550949062e-06, "loss": 0.0783548355102539, "step": 2439 }, { "epoch": 0.33999860656308784, "grad_norm": 0.4257599115371704, "learning_rate": 8.075417616923413e-06, "loss": 0.05915260314941406, "step": 2440 }, { "epoch": 0.3401379502543022, "grad_norm": 1.2096762657165527, "learning_rate": 8.073559041068626e-06, "loss": 0.06562328338623047, "step": 2441 }, { "epoch": 0.3402772939455166, "grad_norm": 0.4745026230812073, "learning_rate": 8.071699782339188e-06, "loss": 0.061469078063964844, "step": 2442 }, { "epoch": 0.340416637636731, "grad_norm": 0.9319677948951721, "learning_rate": 8.06983984114819e-06, "loss": 0.09056854248046875, "step": 2443 }, { "epoch": 0.3405559813279454, "grad_norm": 1.1444536447525024, "learning_rate": 8.067979217908864e-06, "loss": 0.12309074401855469, "step": 2444 }, { "epoch": 0.3406953250191598, "grad_norm": 0.7643681764602661, "learning_rate": 8.066117913034597e-06, "loss": 0.08051872253417969, "step": 2445 }, { "epoch": 0.34083466871037416, "grad_norm": 1.4196667671203613, "learning_rate": 8.06425592693893e-06, "loss": 0.0954122543334961, "step": 2446 }, { "epoch": 0.34097401240158853, "grad_norm": 1.0112167596817017, "learning_rate": 8.062393260035557e-06, "loss": 0.10174846649169922, "step": 2447 }, { "epoch": 0.3411133560928029, "grad_norm": 1.0480726957321167, "learning_rate": 8.060529912738316e-06, "loss": 0.0880887508392334, "step": 2448 }, { "epoch": 0.3412526997840173, "grad_norm": 1.8875120878219604, "learning_rate": 8.058665885461201e-06, "loss": 0.10004520416259766, "step": 2449 }, { "epoch": 0.34139204347523167, "grad_norm": 1.1535167694091797, "learning_rate": 8.056801178618357e-06, "loss": 0.08121395111083984, "step": 2450 }, { "epoch": 0.34153138716644604, "grad_norm": 1.148619294166565, "learning_rate": 8.05493579262408e-06, "loss": 0.07886028289794922, "step": 2451 }, { "epoch": 0.3416707308576604, "grad_norm": 1.2839406728744507, "learning_rate": 8.053069727892813e-06, "loss": 0.09816932678222656, "step": 2452 }, { "epoch": 0.3418100745488748, "grad_norm": 0.7431702613830566, "learning_rate": 8.051202984839157e-06, "loss": 0.08443880081176758, "step": 2453 }, { "epoch": 0.3419494182400892, "grad_norm": 0.4935437738895416, "learning_rate": 8.049335563877858e-06, "loss": 0.07974624633789062, "step": 2454 }, { "epoch": 0.34208876193130355, "grad_norm": 1.0616700649261475, "learning_rate": 8.047467465423813e-06, "loss": 0.07525348663330078, "step": 2455 }, { "epoch": 0.34222810562251793, "grad_norm": 1.30780029296875, "learning_rate": 8.045598689892072e-06, "loss": 0.10790634155273438, "step": 2456 }, { "epoch": 0.3423674493137323, "grad_norm": 0.8720300793647766, "learning_rate": 8.043729237697835e-06, "loss": 0.08790206909179688, "step": 2457 }, { "epoch": 0.3425067930049467, "grad_norm": 0.768800675868988, "learning_rate": 8.041859109256452e-06, "loss": 0.06554794311523438, "step": 2458 }, { "epoch": 0.34264613669616106, "grad_norm": 0.9551355838775635, "learning_rate": 8.03998830498342e-06, "loss": 0.10021495819091797, "step": 2459 }, { "epoch": 0.34278548038737544, "grad_norm": 0.7474813461303711, "learning_rate": 8.038116825294393e-06, "loss": 0.09720611572265625, "step": 2460 }, { "epoch": 0.3429248240785898, "grad_norm": 1.0555129051208496, "learning_rate": 8.036244670605166e-06, "loss": 0.0929107666015625, "step": 2461 }, { "epoch": 0.3430641677698042, "grad_norm": 1.661611795425415, "learning_rate": 8.034371841331693e-06, "loss": 0.09596824645996094, "step": 2462 }, { "epoch": 0.34320351146101863, "grad_norm": 1.0374996662139893, "learning_rate": 8.032498337890073e-06, "loss": 0.08242225646972656, "step": 2463 }, { "epoch": 0.343342855152233, "grad_norm": 1.010939121246338, "learning_rate": 8.030624160696554e-06, "loss": 0.10797882080078125, "step": 2464 }, { "epoch": 0.3434821988434474, "grad_norm": 0.7874815464019775, "learning_rate": 8.02874931016754e-06, "loss": 0.07332658767700195, "step": 2465 }, { "epoch": 0.34362154253466176, "grad_norm": 1.2881945371627808, "learning_rate": 8.026873786719574e-06, "loss": 0.08231925964355469, "step": 2466 }, { "epoch": 0.34376088622587614, "grad_norm": 1.2741271257400513, "learning_rate": 8.024997590769359e-06, "loss": 0.08589935302734375, "step": 2467 }, { "epoch": 0.3439002299170905, "grad_norm": 0.8923823237419128, "learning_rate": 8.02312072273374e-06, "loss": 0.11910247802734375, "step": 2468 }, { "epoch": 0.3440395736083049, "grad_norm": 0.8138046264648438, "learning_rate": 8.021243183029715e-06, "loss": 0.08091115951538086, "step": 2469 }, { "epoch": 0.34417891729951927, "grad_norm": 1.0381330251693726, "learning_rate": 8.019364972074432e-06, "loss": 0.08838272094726562, "step": 2470 }, { "epoch": 0.34431826099073365, "grad_norm": 0.6821053624153137, "learning_rate": 8.017486090285185e-06, "loss": 0.08275413513183594, "step": 2471 }, { "epoch": 0.344457604681948, "grad_norm": 0.6055604815483093, "learning_rate": 8.01560653807942e-06, "loss": 0.08212900161743164, "step": 2472 }, { "epoch": 0.3445969483731624, "grad_norm": 0.6600486040115356, "learning_rate": 8.013726315874729e-06, "loss": 0.07773685455322266, "step": 2473 }, { "epoch": 0.3447362920643768, "grad_norm": 0.7903696894645691, "learning_rate": 8.011845424088856e-06, "loss": 0.07867813110351562, "step": 2474 }, { "epoch": 0.34487563575559116, "grad_norm": 0.7427158951759338, "learning_rate": 8.009963863139689e-06, "loss": 0.08192634582519531, "step": 2475 }, { "epoch": 0.34501497944680554, "grad_norm": 1.282131314277649, "learning_rate": 8.008081633445272e-06, "loss": 0.11011600494384766, "step": 2476 }, { "epoch": 0.3451543231380199, "grad_norm": 0.648571252822876, "learning_rate": 8.00619873542379e-06, "loss": 0.06974506378173828, "step": 2477 }, { "epoch": 0.3452936668292343, "grad_norm": 1.3761569261550903, "learning_rate": 8.004315169493586e-06, "loss": 0.0838479995727539, "step": 2478 }, { "epoch": 0.34543301052044867, "grad_norm": 0.8598304390907288, "learning_rate": 8.002430936073137e-06, "loss": 0.07538986206054688, "step": 2479 }, { "epoch": 0.34557235421166305, "grad_norm": 1.0300288200378418, "learning_rate": 8.000546035581083e-06, "loss": 0.08543777465820312, "step": 2480 }, { "epoch": 0.3457116979028774, "grad_norm": 0.7735865116119385, "learning_rate": 7.998660468436202e-06, "loss": 0.07118606567382812, "step": 2481 }, { "epoch": 0.3458510415940918, "grad_norm": 0.9387574791908264, "learning_rate": 7.996774235057425e-06, "loss": 0.09685420989990234, "step": 2482 }, { "epoch": 0.34599038528530623, "grad_norm": 0.5813558101654053, "learning_rate": 7.994887335863832e-06, "loss": 0.07659053802490234, "step": 2483 }, { "epoch": 0.3461297289765206, "grad_norm": 0.5882776975631714, "learning_rate": 7.992999771274646e-06, "loss": 0.07764244079589844, "step": 2484 }, { "epoch": 0.346269072667735, "grad_norm": 0.6490259766578674, "learning_rate": 7.991111541709244e-06, "loss": 0.0723409652709961, "step": 2485 }, { "epoch": 0.34640841635894937, "grad_norm": 0.7375901341438293, "learning_rate": 7.989222647587146e-06, "loss": 0.08004283905029297, "step": 2486 }, { "epoch": 0.34654776005016374, "grad_norm": 0.7985485196113586, "learning_rate": 7.987333089328018e-06, "loss": 0.09004783630371094, "step": 2487 }, { "epoch": 0.3466871037413781, "grad_norm": 0.582326352596283, "learning_rate": 7.985442867351682e-06, "loss": 0.06791305541992188, "step": 2488 }, { "epoch": 0.3468264474325925, "grad_norm": 0.5133151412010193, "learning_rate": 7.983551982078097e-06, "loss": 0.0757894515991211, "step": 2489 }, { "epoch": 0.3469657911238069, "grad_norm": 0.7241908311843872, "learning_rate": 7.98166043392738e-06, "loss": 0.09418296813964844, "step": 2490 }, { "epoch": 0.34710513481502125, "grad_norm": 0.6397019028663635, "learning_rate": 7.979768223319786e-06, "loss": 0.084381103515625, "step": 2491 }, { "epoch": 0.34724447850623563, "grad_norm": 0.5223606824874878, "learning_rate": 7.977875350675721e-06, "loss": 0.07660388946533203, "step": 2492 }, { "epoch": 0.34738382219745, "grad_norm": 0.8995939493179321, "learning_rate": 7.975981816415741e-06, "loss": 0.08357429504394531, "step": 2493 }, { "epoch": 0.3475231658886644, "grad_norm": 1.1446863412857056, "learning_rate": 7.974087620960543e-06, "loss": 0.09307003021240234, "step": 2494 }, { "epoch": 0.34766250957987876, "grad_norm": 1.4055012464523315, "learning_rate": 7.972192764730975e-06, "loss": 0.13277244567871094, "step": 2495 }, { "epoch": 0.34780185327109314, "grad_norm": 0.5682347416877747, "learning_rate": 7.970297248148033e-06, "loss": 0.0909566879272461, "step": 2496 }, { "epoch": 0.3479411969623075, "grad_norm": 0.6006300449371338, "learning_rate": 7.968401071632854e-06, "loss": 0.06154632568359375, "step": 2497 }, { "epoch": 0.3480805406535219, "grad_norm": 0.9628617763519287, "learning_rate": 7.966504235606726e-06, "loss": 0.08425045013427734, "step": 2498 }, { "epoch": 0.3482198843447363, "grad_norm": 1.077836036682129, "learning_rate": 7.964606740491085e-06, "loss": 0.08373546600341797, "step": 2499 }, { "epoch": 0.34835922803595065, "grad_norm": 1.1104822158813477, "learning_rate": 7.962708586707508e-06, "loss": 0.07819747924804688, "step": 2500 }, { "epoch": 0.34849857172716503, "grad_norm": 1.0296963453292847, "learning_rate": 7.960809774677722e-06, "loss": 0.09218406677246094, "step": 2501 }, { "epoch": 0.3486379154183794, "grad_norm": 0.9830087423324585, "learning_rate": 7.958910304823603e-06, "loss": 0.08407783508300781, "step": 2502 }, { "epoch": 0.34877725910959384, "grad_norm": 0.7738969922065735, "learning_rate": 7.957010177567167e-06, "loss": 0.08727550506591797, "step": 2503 }, { "epoch": 0.3489166028008082, "grad_norm": 0.8567898869514465, "learning_rate": 7.955109393330577e-06, "loss": 0.0779256820678711, "step": 2504 }, { "epoch": 0.3490559464920226, "grad_norm": 0.9638645648956299, "learning_rate": 7.953207952536147e-06, "loss": 0.07408428192138672, "step": 2505 }, { "epoch": 0.349195290183237, "grad_norm": 1.1865112781524658, "learning_rate": 7.951305855606333e-06, "loss": 0.07045650482177734, "step": 2506 }, { "epoch": 0.34933463387445135, "grad_norm": 1.4023674726486206, "learning_rate": 7.949403102963738e-06, "loss": 0.1055135726928711, "step": 2507 }, { "epoch": 0.3494739775656657, "grad_norm": 1.0862840414047241, "learning_rate": 7.947499695031108e-06, "loss": 0.0840444564819336, "step": 2508 }, { "epoch": 0.3496133212568801, "grad_norm": 0.7001329660415649, "learning_rate": 7.94559563223134e-06, "loss": 0.07542610168457031, "step": 2509 }, { "epoch": 0.3497526649480945, "grad_norm": 0.812516450881958, "learning_rate": 7.943690914987472e-06, "loss": 0.08437538146972656, "step": 2510 }, { "epoch": 0.34989200863930886, "grad_norm": 0.9956689476966858, "learning_rate": 7.941785543722686e-06, "loss": 0.0952301025390625, "step": 2511 }, { "epoch": 0.35003135233052324, "grad_norm": 1.1694018840789795, "learning_rate": 7.939879518860316e-06, "loss": 0.08660221099853516, "step": 2512 }, { "epoch": 0.3501706960217376, "grad_norm": 0.8363291621208191, "learning_rate": 7.937972840823836e-06, "loss": 0.08429527282714844, "step": 2513 }, { "epoch": 0.350310039712952, "grad_norm": 0.8796363472938538, "learning_rate": 7.936065510036863e-06, "loss": 0.12857437133789062, "step": 2514 }, { "epoch": 0.35044938340416637, "grad_norm": 1.6067878007888794, "learning_rate": 7.934157526923167e-06, "loss": 0.08567285537719727, "step": 2515 }, { "epoch": 0.35058872709538075, "grad_norm": 1.2975683212280273, "learning_rate": 7.932248891906657e-06, "loss": 0.07257843017578125, "step": 2516 }, { "epoch": 0.3507280707865951, "grad_norm": 0.7814555764198303, "learning_rate": 7.930339605411387e-06, "loss": 0.0926976203918457, "step": 2517 }, { "epoch": 0.3508674144778095, "grad_norm": 1.6810593605041504, "learning_rate": 7.92842966786156e-06, "loss": 0.11494636535644531, "step": 2518 }, { "epoch": 0.3510067581690239, "grad_norm": 0.9727789759635925, "learning_rate": 7.926519079681514e-06, "loss": 0.09048652648925781, "step": 2519 }, { "epoch": 0.35114610186023826, "grad_norm": 1.1215541362762451, "learning_rate": 7.924607841295744e-06, "loss": 0.11396408081054688, "step": 2520 }, { "epoch": 0.35128544555145264, "grad_norm": 0.9242516756057739, "learning_rate": 7.92269595312888e-06, "loss": 0.06469058990478516, "step": 2521 }, { "epoch": 0.351424789242667, "grad_norm": 0.8391236066818237, "learning_rate": 7.920783415605703e-06, "loss": 0.061382293701171875, "step": 2522 }, { "epoch": 0.35156413293388145, "grad_norm": 0.7093157768249512, "learning_rate": 7.918870229151134e-06, "loss": 0.10103988647460938, "step": 2523 }, { "epoch": 0.3517034766250958, "grad_norm": 1.1511173248291016, "learning_rate": 7.916956394190238e-06, "loss": 0.07048988342285156, "step": 2524 }, { "epoch": 0.3518428203163102, "grad_norm": 0.4693710207939148, "learning_rate": 7.915041911148229e-06, "loss": 0.07709789276123047, "step": 2525 }, { "epoch": 0.3519821640075246, "grad_norm": 1.1878539323806763, "learning_rate": 7.913126780450455e-06, "loss": 0.08587455749511719, "step": 2526 }, { "epoch": 0.35212150769873896, "grad_norm": 1.0335111618041992, "learning_rate": 7.911211002522422e-06, "loss": 0.08645343780517578, "step": 2527 }, { "epoch": 0.35226085138995333, "grad_norm": 0.8037947416305542, "learning_rate": 7.909294577789765e-06, "loss": 0.07410192489624023, "step": 2528 }, { "epoch": 0.3524001950811677, "grad_norm": 0.4797542095184326, "learning_rate": 7.907377506678274e-06, "loss": 0.04849720001220703, "step": 2529 }, { "epoch": 0.3525395387723821, "grad_norm": 0.7911386489868164, "learning_rate": 7.905459789613878e-06, "loss": 0.08145904541015625, "step": 2530 }, { "epoch": 0.35267888246359647, "grad_norm": 0.5809268355369568, "learning_rate": 7.90354142702265e-06, "loss": 0.0785074234008789, "step": 2531 }, { "epoch": 0.35281822615481084, "grad_norm": 0.9537519216537476, "learning_rate": 7.901622419330805e-06, "loss": 0.090240478515625, "step": 2532 }, { "epoch": 0.3529575698460252, "grad_norm": 1.1549042463302612, "learning_rate": 7.899702766964705e-06, "loss": 0.08337974548339844, "step": 2533 }, { "epoch": 0.3530969135372396, "grad_norm": 0.841750979423523, "learning_rate": 7.89778247035085e-06, "loss": 0.08504104614257812, "step": 2534 }, { "epoch": 0.353236257228454, "grad_norm": 0.6705127954483032, "learning_rate": 7.895861529915889e-06, "loss": 0.08661651611328125, "step": 2535 }, { "epoch": 0.35337560091966835, "grad_norm": 1.0051122903823853, "learning_rate": 7.893939946086609e-06, "loss": 0.07691669464111328, "step": 2536 }, { "epoch": 0.35351494461088273, "grad_norm": 1.1454713344573975, "learning_rate": 7.892017719289941e-06, "loss": 0.08671951293945312, "step": 2537 }, { "epoch": 0.3536542883020971, "grad_norm": 1.1867554187774658, "learning_rate": 7.890094849952964e-06, "loss": 0.06505680084228516, "step": 2538 }, { "epoch": 0.3537936319933115, "grad_norm": 1.5588940382003784, "learning_rate": 7.888171338502893e-06, "loss": 0.08378982543945312, "step": 2539 }, { "epoch": 0.35393297568452586, "grad_norm": 1.2296957969665527, "learning_rate": 7.886247185367088e-06, "loss": 0.08979129791259766, "step": 2540 }, { "epoch": 0.35407231937574024, "grad_norm": 0.9566522240638733, "learning_rate": 7.884322390973053e-06, "loss": 0.07467937469482422, "step": 2541 }, { "epoch": 0.3542116630669546, "grad_norm": 0.6592894792556763, "learning_rate": 7.882396955748432e-06, "loss": 0.08335208892822266, "step": 2542 }, { "epoch": 0.35435100675816905, "grad_norm": 0.4936702251434326, "learning_rate": 7.880470880121015e-06, "loss": 0.06434249877929688, "step": 2543 }, { "epoch": 0.35449035044938343, "grad_norm": 0.537987232208252, "learning_rate": 7.878544164518731e-06, "loss": 0.0763092041015625, "step": 2544 }, { "epoch": 0.3546296941405978, "grad_norm": 0.4606279134750366, "learning_rate": 7.87661680936965e-06, "loss": 0.06121253967285156, "step": 2545 }, { "epoch": 0.3547690378318122, "grad_norm": 0.7453613877296448, "learning_rate": 7.87468881510199e-06, "loss": 0.08784675598144531, "step": 2546 }, { "epoch": 0.35490838152302656, "grad_norm": 0.6051658987998962, "learning_rate": 7.872760182144104e-06, "loss": 0.09263420104980469, "step": 2547 }, { "epoch": 0.35504772521424094, "grad_norm": 0.4451640248298645, "learning_rate": 7.870830910924491e-06, "loss": 0.06343364715576172, "step": 2548 }, { "epoch": 0.3551870689054553, "grad_norm": 0.5799259543418884, "learning_rate": 7.868901001871797e-06, "loss": 0.07011127471923828, "step": 2549 }, { "epoch": 0.3553264125966697, "grad_norm": 0.7521007657051086, "learning_rate": 7.866970455414793e-06, "loss": 0.09176445007324219, "step": 2550 }, { "epoch": 0.35546575628788407, "grad_norm": 0.5633769631385803, "learning_rate": 7.86503927198241e-06, "loss": 0.06924629211425781, "step": 2551 }, { "epoch": 0.35560509997909845, "grad_norm": 1.165006399154663, "learning_rate": 7.863107452003711e-06, "loss": 0.09539127349853516, "step": 2552 }, { "epoch": 0.3557444436703128, "grad_norm": 0.8779934644699097, "learning_rate": 7.861174995907901e-06, "loss": 0.10828590393066406, "step": 2553 }, { "epoch": 0.3558837873615272, "grad_norm": 1.1772582530975342, "learning_rate": 7.85924190412433e-06, "loss": 0.0894317626953125, "step": 2554 }, { "epoch": 0.3560231310527416, "grad_norm": 1.2454321384429932, "learning_rate": 7.857308177082484e-06, "loss": 0.12717008590698242, "step": 2555 }, { "epoch": 0.35616247474395596, "grad_norm": 0.8996168375015259, "learning_rate": 7.855373815211995e-06, "loss": 0.07836723327636719, "step": 2556 }, { "epoch": 0.35630181843517034, "grad_norm": 0.6191667914390564, "learning_rate": 7.853438818942633e-06, "loss": 0.07089900970458984, "step": 2557 }, { "epoch": 0.3564411621263847, "grad_norm": 0.7614480257034302, "learning_rate": 7.851503188704312e-06, "loss": 0.09478569030761719, "step": 2558 }, { "epoch": 0.3565805058175991, "grad_norm": 0.9282375574111938, "learning_rate": 7.849566924927082e-06, "loss": 0.12229156494140625, "step": 2559 }, { "epoch": 0.35671984950881347, "grad_norm": 0.7000117301940918, "learning_rate": 7.84763002804114e-06, "loss": 0.09119415283203125, "step": 2560 }, { "epoch": 0.35685919320002785, "grad_norm": 0.5494346618652344, "learning_rate": 7.845692498476816e-06, "loss": 0.07916545867919922, "step": 2561 }, { "epoch": 0.3569985368912422, "grad_norm": 0.7083926796913147, "learning_rate": 7.843754336664589e-06, "loss": 0.0933675765991211, "step": 2562 }, { "epoch": 0.3571378805824566, "grad_norm": 0.5070476531982422, "learning_rate": 7.84181554303507e-06, "loss": 0.07337188720703125, "step": 2563 }, { "epoch": 0.35727722427367103, "grad_norm": 0.875178873538971, "learning_rate": 7.839876118019019e-06, "loss": 0.0921640396118164, "step": 2564 }, { "epoch": 0.3574165679648854, "grad_norm": 0.5979111194610596, "learning_rate": 7.837936062047329e-06, "loss": 0.07002830505371094, "step": 2565 }, { "epoch": 0.3575559116560998, "grad_norm": 0.781689465045929, "learning_rate": 7.835995375551038e-06, "loss": 0.08608055114746094, "step": 2566 }, { "epoch": 0.35769525534731417, "grad_norm": 0.8437834978103638, "learning_rate": 7.83405405896132e-06, "loss": 0.08032703399658203, "step": 2567 }, { "epoch": 0.35783459903852854, "grad_norm": 0.7197310924530029, "learning_rate": 7.832112112709496e-06, "loss": 0.06568717956542969, "step": 2568 }, { "epoch": 0.3579739427297429, "grad_norm": 0.4903898239135742, "learning_rate": 7.830169537227015e-06, "loss": 0.05640268325805664, "step": 2569 }, { "epoch": 0.3581132864209573, "grad_norm": 0.9672321081161499, "learning_rate": 7.828226332945479e-06, "loss": 0.09278488159179688, "step": 2570 }, { "epoch": 0.3582526301121717, "grad_norm": 0.7727029323577881, "learning_rate": 7.82628250029662e-06, "loss": 0.11037349700927734, "step": 2571 }, { "epoch": 0.35839197380338605, "grad_norm": 0.8429627418518066, "learning_rate": 7.824338039712316e-06, "loss": 0.09538459777832031, "step": 2572 }, { "epoch": 0.35853131749460043, "grad_norm": 0.6073665022850037, "learning_rate": 7.82239295162458e-06, "loss": 0.08740425109863281, "step": 2573 }, { "epoch": 0.3586706611858148, "grad_norm": 0.9243742823600769, "learning_rate": 7.820447236465565e-06, "loss": 0.10733604431152344, "step": 2574 }, { "epoch": 0.3588100048770292, "grad_norm": 0.9180453419685364, "learning_rate": 7.818500894667566e-06, "loss": 0.10819053649902344, "step": 2575 }, { "epoch": 0.35894934856824356, "grad_norm": 0.6912559270858765, "learning_rate": 7.816553926663018e-06, "loss": 0.08607292175292969, "step": 2576 }, { "epoch": 0.35908869225945794, "grad_norm": 0.5758349895477295, "learning_rate": 7.81460633288449e-06, "loss": 0.06936120986938477, "step": 2577 }, { "epoch": 0.3592280359506723, "grad_norm": 0.7979291677474976, "learning_rate": 7.812658113764691e-06, "loss": 0.09093761444091797, "step": 2578 }, { "epoch": 0.3593673796418867, "grad_norm": 1.162279486656189, "learning_rate": 7.810709269736476e-06, "loss": 0.09502983093261719, "step": 2579 }, { "epoch": 0.3595067233331011, "grad_norm": 1.0829483270645142, "learning_rate": 7.808759801232829e-06, "loss": 0.09488677978515625, "step": 2580 }, { "epoch": 0.35964606702431545, "grad_norm": 0.8542974591255188, "learning_rate": 7.80680970868688e-06, "loss": 0.06791973114013672, "step": 2581 }, { "epoch": 0.35978541071552983, "grad_norm": 0.7154315114021301, "learning_rate": 7.804858992531893e-06, "loss": 0.09444379806518555, "step": 2582 }, { "epoch": 0.3599247544067442, "grad_norm": 0.8000752329826355, "learning_rate": 7.802907653201275e-06, "loss": 0.07003021240234375, "step": 2583 }, { "epoch": 0.36006409809795864, "grad_norm": 0.9094294309616089, "learning_rate": 7.800955691128568e-06, "loss": 0.07892227172851562, "step": 2584 }, { "epoch": 0.360203441789173, "grad_norm": 0.7771764397621155, "learning_rate": 7.799003106747453e-06, "loss": 0.1177215576171875, "step": 2585 }, { "epoch": 0.3603427854803874, "grad_norm": 0.9721798896789551, "learning_rate": 7.79704990049175e-06, "loss": 0.09323501586914062, "step": 2586 }, { "epoch": 0.3604821291716018, "grad_norm": 0.6632961630821228, "learning_rate": 7.795096072795418e-06, "loss": 0.07337570190429688, "step": 2587 }, { "epoch": 0.36062147286281615, "grad_norm": 0.9232608079910278, "learning_rate": 7.793141624092551e-06, "loss": 0.07319355010986328, "step": 2588 }, { "epoch": 0.36076081655403053, "grad_norm": 1.1907747983932495, "learning_rate": 7.791186554817383e-06, "loss": 0.07949638366699219, "step": 2589 }, { "epoch": 0.3609001602452449, "grad_norm": 0.711012065410614, "learning_rate": 7.789230865404287e-06, "loss": 0.06953048706054688, "step": 2590 }, { "epoch": 0.3610395039364593, "grad_norm": 0.871390163898468, "learning_rate": 7.787274556287771e-06, "loss": 0.07203292846679688, "step": 2591 }, { "epoch": 0.36117884762767366, "grad_norm": 1.0389418601989746, "learning_rate": 7.785317627902484e-06, "loss": 0.07558155059814453, "step": 2592 }, { "epoch": 0.36131819131888804, "grad_norm": 0.6727098822593689, "learning_rate": 7.783360080683212e-06, "loss": 0.07115936279296875, "step": 2593 }, { "epoch": 0.3614575350101024, "grad_norm": 1.090455412864685, "learning_rate": 7.781401915064873e-06, "loss": 0.09476470947265625, "step": 2594 }, { "epoch": 0.3615968787013168, "grad_norm": 0.4747145175933838, "learning_rate": 7.779443131482529e-06, "loss": 0.07231616973876953, "step": 2595 }, { "epoch": 0.36173622239253117, "grad_norm": 0.7010016441345215, "learning_rate": 7.777483730371375e-06, "loss": 0.08092784881591797, "step": 2596 }, { "epoch": 0.36187556608374555, "grad_norm": 0.972288191318512, "learning_rate": 7.77552371216675e-06, "loss": 0.08857536315917969, "step": 2597 }, { "epoch": 0.3620149097749599, "grad_norm": 0.6682993769645691, "learning_rate": 7.773563077304123e-06, "loss": 0.08491134643554688, "step": 2598 }, { "epoch": 0.3621542534661743, "grad_norm": 0.6804406642913818, "learning_rate": 7.7716018262191e-06, "loss": 0.06784296035766602, "step": 2599 }, { "epoch": 0.3622935971573887, "grad_norm": 1.5142929553985596, "learning_rate": 7.769639959347428e-06, "loss": 0.08536720275878906, "step": 2600 }, { "epoch": 0.36243294084860306, "grad_norm": 0.9806750416755676, "learning_rate": 7.767677477124988e-06, "loss": 0.09057331085205078, "step": 2601 }, { "epoch": 0.36257228453981744, "grad_norm": 1.958040475845337, "learning_rate": 7.765714379987804e-06, "loss": 0.11027669906616211, "step": 2602 }, { "epoch": 0.3627116282310318, "grad_norm": 1.0045607089996338, "learning_rate": 7.763750668372023e-06, "loss": 0.10410881042480469, "step": 2603 }, { "epoch": 0.36285097192224625, "grad_norm": 0.9200315475463867, "learning_rate": 7.761786342713941e-06, "loss": 0.09335041046142578, "step": 2604 }, { "epoch": 0.3629903156134606, "grad_norm": 1.7376667261123657, "learning_rate": 7.75982140344999e-06, "loss": 0.10590934753417969, "step": 2605 }, { "epoch": 0.363129659304675, "grad_norm": 0.958050549030304, "learning_rate": 7.757855851016727e-06, "loss": 0.083984375, "step": 2606 }, { "epoch": 0.3632690029958894, "grad_norm": 0.825197696685791, "learning_rate": 7.755889685850858e-06, "loss": 0.08127212524414062, "step": 2607 }, { "epoch": 0.36340834668710376, "grad_norm": 1.0657991170883179, "learning_rate": 7.75392290838922e-06, "loss": 0.09208393096923828, "step": 2608 }, { "epoch": 0.36354769037831813, "grad_norm": 1.0911575555801392, "learning_rate": 7.751955519068783e-06, "loss": 0.08900260925292969, "step": 2609 }, { "epoch": 0.3636870340695325, "grad_norm": 0.6347920894622803, "learning_rate": 7.74998751832666e-06, "loss": 0.07616424560546875, "step": 2610 }, { "epoch": 0.3638263777607469, "grad_norm": 0.5840832591056824, "learning_rate": 7.748018906600092e-06, "loss": 0.08516597747802734, "step": 2611 }, { "epoch": 0.36396572145196127, "grad_norm": 1.1382755041122437, "learning_rate": 7.746049684326462e-06, "loss": 0.09422492980957031, "step": 2612 }, { "epoch": 0.36410506514317564, "grad_norm": 1.6290960311889648, "learning_rate": 7.744079851943286e-06, "loss": 0.09244632720947266, "step": 2613 }, { "epoch": 0.36424440883439, "grad_norm": 1.1712534427642822, "learning_rate": 7.742109409888213e-06, "loss": 0.082977294921875, "step": 2614 }, { "epoch": 0.3643837525256044, "grad_norm": 0.6151729226112366, "learning_rate": 7.740138358599035e-06, "loss": 0.0731058120727539, "step": 2615 }, { "epoch": 0.3645230962168188, "grad_norm": 0.5480495095252991, "learning_rate": 7.73816669851367e-06, "loss": 0.0842132568359375, "step": 2616 }, { "epoch": 0.36466243990803315, "grad_norm": 0.7947592735290527, "learning_rate": 7.73619443007018e-06, "loss": 0.07993125915527344, "step": 2617 }, { "epoch": 0.36480178359924753, "grad_norm": 0.5267719626426697, "learning_rate": 7.734221553706756e-06, "loss": 0.07902908325195312, "step": 2618 }, { "epoch": 0.3649411272904619, "grad_norm": 0.6616519689559937, "learning_rate": 7.732248069861726e-06, "loss": 0.07404708862304688, "step": 2619 }, { "epoch": 0.3650804709816763, "grad_norm": 0.7887855172157288, "learning_rate": 7.730273978973552e-06, "loss": 0.07187175750732422, "step": 2620 }, { "epoch": 0.36521981467289066, "grad_norm": 0.7277547717094421, "learning_rate": 7.728299281480833e-06, "loss": 0.0738992691040039, "step": 2621 }, { "epoch": 0.36535915836410504, "grad_norm": 1.0431654453277588, "learning_rate": 7.726323977822304e-06, "loss": 0.10507392883300781, "step": 2622 }, { "epoch": 0.3654985020553194, "grad_norm": 0.6217294931411743, "learning_rate": 7.72434806843683e-06, "loss": 0.056260108947753906, "step": 2623 }, { "epoch": 0.36563784574653385, "grad_norm": 0.508542537689209, "learning_rate": 7.72237155376341e-06, "loss": 0.07157158851623535, "step": 2624 }, { "epoch": 0.36577718943774823, "grad_norm": 0.9044026136398315, "learning_rate": 7.720394434241185e-06, "loss": 0.09566879272460938, "step": 2625 }, { "epoch": 0.3659165331289626, "grad_norm": 0.9133670926094055, "learning_rate": 7.718416710309425e-06, "loss": 0.07425785064697266, "step": 2626 }, { "epoch": 0.366055876820177, "grad_norm": 1.2255603075027466, "learning_rate": 7.716438382407534e-06, "loss": 0.10577392578125, "step": 2627 }, { "epoch": 0.36619522051139136, "grad_norm": 1.0323132276535034, "learning_rate": 7.714459450975052e-06, "loss": 0.10939216613769531, "step": 2628 }, { "epoch": 0.36633456420260574, "grad_norm": 1.0379787683486938, "learning_rate": 7.712479916451651e-06, "loss": 0.08617973327636719, "step": 2629 }, { "epoch": 0.3664739078938201, "grad_norm": 1.4132089614868164, "learning_rate": 7.710499779277141e-06, "loss": 0.08183479309082031, "step": 2630 }, { "epoch": 0.3666132515850345, "grad_norm": 0.5982921123504639, "learning_rate": 7.708519039891462e-06, "loss": 0.07595634460449219, "step": 2631 }, { "epoch": 0.36675259527624887, "grad_norm": 0.7340589761734009, "learning_rate": 7.70653769873469e-06, "loss": 0.07286643981933594, "step": 2632 }, { "epoch": 0.36689193896746325, "grad_norm": 1.5082430839538574, "learning_rate": 7.70455575624703e-06, "loss": 0.09918975830078125, "step": 2633 }, { "epoch": 0.3670312826586776, "grad_norm": 1.7792028188705444, "learning_rate": 7.702573212868827e-06, "loss": 0.10655856132507324, "step": 2634 }, { "epoch": 0.367170626349892, "grad_norm": 0.5507242679595947, "learning_rate": 7.70059006904056e-06, "loss": 0.05953407287597656, "step": 2635 }, { "epoch": 0.3673099700411064, "grad_norm": 0.7908042669296265, "learning_rate": 7.698606325202832e-06, "loss": 0.08454227447509766, "step": 2636 }, { "epoch": 0.36744931373232076, "grad_norm": 0.7829980254173279, "learning_rate": 7.69662198179639e-06, "loss": 0.08136940002441406, "step": 2637 }, { "epoch": 0.36758865742353514, "grad_norm": 1.7653625011444092, "learning_rate": 7.694637039262109e-06, "loss": 0.09466171264648438, "step": 2638 }, { "epoch": 0.3677280011147495, "grad_norm": 0.646449863910675, "learning_rate": 7.692651498040996e-06, "loss": 0.06605052947998047, "step": 2639 }, { "epoch": 0.3678673448059639, "grad_norm": 0.8971694707870483, "learning_rate": 7.690665358574197e-06, "loss": 0.09752464294433594, "step": 2640 }, { "epoch": 0.36800668849717827, "grad_norm": 0.8899959921836853, "learning_rate": 7.688678621302981e-06, "loss": 0.07327079772949219, "step": 2641 }, { "epoch": 0.36814603218839265, "grad_norm": 0.8870139122009277, "learning_rate": 7.686691286668761e-06, "loss": 0.08171749114990234, "step": 2642 }, { "epoch": 0.368285375879607, "grad_norm": 0.9316130876541138, "learning_rate": 7.684703355113074e-06, "loss": 0.10041332244873047, "step": 2643 }, { "epoch": 0.36842471957082146, "grad_norm": 0.5469236373901367, "learning_rate": 7.682714827077595e-06, "loss": 0.06499195098876953, "step": 2644 }, { "epoch": 0.36856406326203583, "grad_norm": 0.7529701590538025, "learning_rate": 7.68072570300413e-06, "loss": 0.07437896728515625, "step": 2645 }, { "epoch": 0.3687034069532502, "grad_norm": 0.9068670868873596, "learning_rate": 7.678735983334615e-06, "loss": 0.09282732009887695, "step": 2646 }, { "epoch": 0.3688427506444646, "grad_norm": 0.6586835384368896, "learning_rate": 7.676745668511121e-06, "loss": 0.082489013671875, "step": 2647 }, { "epoch": 0.36898209433567897, "grad_norm": 0.6249326467514038, "learning_rate": 7.67475475897585e-06, "loss": 0.08157920837402344, "step": 2648 }, { "epoch": 0.36912143802689334, "grad_norm": 0.4753190577030182, "learning_rate": 7.672763255171138e-06, "loss": 0.0839071273803711, "step": 2649 }, { "epoch": 0.3692607817181077, "grad_norm": 0.7253779172897339, "learning_rate": 7.67077115753945e-06, "loss": 0.06676435470581055, "step": 2650 }, { "epoch": 0.3694001254093221, "grad_norm": 0.9307167530059814, "learning_rate": 7.668778466523386e-06, "loss": 0.0856637954711914, "step": 2651 }, { "epoch": 0.3695394691005365, "grad_norm": 0.8212217688560486, "learning_rate": 7.666785182565676e-06, "loss": 0.06630897521972656, "step": 2652 }, { "epoch": 0.36967881279175085, "grad_norm": 0.9174131155014038, "learning_rate": 7.664791306109183e-06, "loss": 0.081817626953125, "step": 2653 }, { "epoch": 0.36981815648296523, "grad_norm": 1.0756802558898926, "learning_rate": 7.6627968375969e-06, "loss": 0.09341621398925781, "step": 2654 }, { "epoch": 0.3699575001741796, "grad_norm": 1.33078932762146, "learning_rate": 7.660801777471951e-06, "loss": 0.11439132690429688, "step": 2655 }, { "epoch": 0.370096843865394, "grad_norm": 0.7479738593101501, "learning_rate": 7.658806126177596e-06, "loss": 0.08093929290771484, "step": 2656 }, { "epoch": 0.37023618755660836, "grad_norm": 0.8458172678947449, "learning_rate": 7.65680988415722e-06, "loss": 0.09115791320800781, "step": 2657 }, { "epoch": 0.37037553124782274, "grad_norm": 0.4499773383140564, "learning_rate": 7.654813051854345e-06, "loss": 0.05493879318237305, "step": 2658 }, { "epoch": 0.3705148749390371, "grad_norm": 0.8247842788696289, "learning_rate": 7.652815629712616e-06, "loss": 0.0848989486694336, "step": 2659 }, { "epoch": 0.3706542186302515, "grad_norm": 0.7024835348129272, "learning_rate": 7.650817618175824e-06, "loss": 0.09824562072753906, "step": 2660 }, { "epoch": 0.3707935623214659, "grad_norm": 0.8205183148384094, "learning_rate": 7.648819017687875e-06, "loss": 0.06380081176757812, "step": 2661 }, { "epoch": 0.37093290601268025, "grad_norm": 0.5720780491828918, "learning_rate": 7.646819828692813e-06, "loss": 0.0692129135131836, "step": 2662 }, { "epoch": 0.37107224970389463, "grad_norm": 0.9787342548370361, "learning_rate": 7.644820051634813e-06, "loss": 0.08259391784667969, "step": 2663 }, { "epoch": 0.37121159339510906, "grad_norm": 0.69764643907547, "learning_rate": 7.64281968695818e-06, "loss": 0.08504295349121094, "step": 2664 }, { "epoch": 0.37135093708632344, "grad_norm": 0.716813325881958, "learning_rate": 7.640818735107351e-06, "loss": 0.0761256217956543, "step": 2665 }, { "epoch": 0.3714902807775378, "grad_norm": 1.0837507247924805, "learning_rate": 7.638817196526887e-06, "loss": 0.10441398620605469, "step": 2666 }, { "epoch": 0.3716296244687522, "grad_norm": 0.6398321986198425, "learning_rate": 7.636815071661488e-06, "loss": 0.08826828002929688, "step": 2667 }, { "epoch": 0.3717689681599666, "grad_norm": 1.0568841695785522, "learning_rate": 7.634812360955982e-06, "loss": 0.07541704177856445, "step": 2668 }, { "epoch": 0.37190831185118095, "grad_norm": 1.3446956872940063, "learning_rate": 7.63280906485532e-06, "loss": 0.0881500244140625, "step": 2669 }, { "epoch": 0.37204765554239533, "grad_norm": 0.8110485076904297, "learning_rate": 7.630805183804593e-06, "loss": 0.07161903381347656, "step": 2670 }, { "epoch": 0.3721869992336097, "grad_norm": 1.23555588722229, "learning_rate": 7.628800718249017e-06, "loss": 0.08330631256103516, "step": 2671 }, { "epoch": 0.3723263429248241, "grad_norm": 0.6492530703544617, "learning_rate": 7.626795668633938e-06, "loss": 0.05550861358642578, "step": 2672 }, { "epoch": 0.37246568661603846, "grad_norm": 0.8002157211303711, "learning_rate": 7.624790035404831e-06, "loss": 0.0768594741821289, "step": 2673 }, { "epoch": 0.37260503030725284, "grad_norm": 0.7821376919746399, "learning_rate": 7.622783819007305e-06, "loss": 0.07555484771728516, "step": 2674 }, { "epoch": 0.3727443739984672, "grad_norm": 1.0111761093139648, "learning_rate": 7.620777019887091e-06, "loss": 0.08247852325439453, "step": 2675 }, { "epoch": 0.3728837176896816, "grad_norm": 1.3666675090789795, "learning_rate": 7.6187696384900585e-06, "loss": 0.07376766204833984, "step": 2676 }, { "epoch": 0.37302306138089597, "grad_norm": 1.9225456714630127, "learning_rate": 7.616761675262199e-06, "loss": 0.10454177856445312, "step": 2677 }, { "epoch": 0.37316240507211035, "grad_norm": 1.5450276136398315, "learning_rate": 7.614753130649638e-06, "loss": 0.1150970458984375, "step": 2678 }, { "epoch": 0.3733017487633247, "grad_norm": 0.7999786734580994, "learning_rate": 7.612744005098625e-06, "loss": 0.07184314727783203, "step": 2679 }, { "epoch": 0.3734410924545391, "grad_norm": 0.9141650199890137, "learning_rate": 7.6107342990555466e-06, "loss": 0.06713008880615234, "step": 2680 }, { "epoch": 0.3735804361457535, "grad_norm": 0.6972759962081909, "learning_rate": 7.60872401296691e-06, "loss": 0.05791759490966797, "step": 2681 }, { "epoch": 0.37371977983696786, "grad_norm": 1.0369212627410889, "learning_rate": 7.606713147279356e-06, "loss": 0.08782386779785156, "step": 2682 }, { "epoch": 0.37385912352818224, "grad_norm": 1.6610957384109497, "learning_rate": 7.604701702439652e-06, "loss": 0.09601402282714844, "step": 2683 }, { "epoch": 0.37399846721939667, "grad_norm": 0.5688138604164124, "learning_rate": 7.602689678894697e-06, "loss": 0.07945728302001953, "step": 2684 }, { "epoch": 0.37413781091061105, "grad_norm": 0.6434688568115234, "learning_rate": 7.6006770770915165e-06, "loss": 0.06956291198730469, "step": 2685 }, { "epoch": 0.3742771546018254, "grad_norm": 0.847429633140564, "learning_rate": 7.598663897477263e-06, "loss": 0.07684803009033203, "step": 2686 }, { "epoch": 0.3744164982930398, "grad_norm": 1.1769245862960815, "learning_rate": 7.59665014049922e-06, "loss": 0.0970144271850586, "step": 2687 }, { "epoch": 0.3745558419842542, "grad_norm": 0.8741790056228638, "learning_rate": 7.594635806604797e-06, "loss": 0.08122825622558594, "step": 2688 }, { "epoch": 0.37469518567546856, "grad_norm": 1.4301233291625977, "learning_rate": 7.592620896241536e-06, "loss": 0.07085943222045898, "step": 2689 }, { "epoch": 0.37483452936668293, "grad_norm": 0.4564513564109802, "learning_rate": 7.590605409857103e-06, "loss": 0.07400989532470703, "step": 2690 }, { "epoch": 0.3749738730578973, "grad_norm": 0.8931390047073364, "learning_rate": 7.58858934789929e-06, "loss": 0.08571338653564453, "step": 2691 }, { "epoch": 0.3751132167491117, "grad_norm": 0.6073525547981262, "learning_rate": 7.586572710816025e-06, "loss": 0.09908485412597656, "step": 2692 }, { "epoch": 0.37525256044032607, "grad_norm": 0.8095736503601074, "learning_rate": 7.584555499055355e-06, "loss": 0.0841531753540039, "step": 2693 }, { "epoch": 0.37539190413154044, "grad_norm": 0.9932839870452881, "learning_rate": 7.58253771306546e-06, "loss": 0.08294439315795898, "step": 2694 }, { "epoch": 0.3755312478227548, "grad_norm": 1.146530032157898, "learning_rate": 7.5805193532946445e-06, "loss": 0.07583212852478027, "step": 2695 }, { "epoch": 0.3756705915139692, "grad_norm": 2.1950857639312744, "learning_rate": 7.578500420191344e-06, "loss": 0.10927486419677734, "step": 2696 }, { "epoch": 0.3758099352051836, "grad_norm": 0.5196940302848816, "learning_rate": 7.576480914204118e-06, "loss": 0.0759420394897461, "step": 2697 }, { "epoch": 0.37594927889639795, "grad_norm": 0.603842556476593, "learning_rate": 7.574460835781654e-06, "loss": 0.07901334762573242, "step": 2698 }, { "epoch": 0.37608862258761233, "grad_norm": 0.7488144636154175, "learning_rate": 7.572440185372769e-06, "loss": 0.06744098663330078, "step": 2699 }, { "epoch": 0.3762279662788267, "grad_norm": 0.8422919511795044, "learning_rate": 7.570418963426405e-06, "loss": 0.08395957946777344, "step": 2700 }, { "epoch": 0.3763673099700411, "grad_norm": 0.8178014159202576, "learning_rate": 7.568397170391631e-06, "loss": 0.10261154174804688, "step": 2701 }, { "epoch": 0.37650665366125546, "grad_norm": 0.7565035820007324, "learning_rate": 7.566374806717642e-06, "loss": 0.07626914978027344, "step": 2702 }, { "epoch": 0.37664599735246984, "grad_norm": 0.9089927077293396, "learning_rate": 7.564351872853763e-06, "loss": 0.08075523376464844, "step": 2703 }, { "epoch": 0.3767853410436843, "grad_norm": 0.7533407211303711, "learning_rate": 7.562328369249443e-06, "loss": 0.07968616485595703, "step": 2704 }, { "epoch": 0.37692468473489865, "grad_norm": 0.6005935072898865, "learning_rate": 7.560304296354259e-06, "loss": 0.08162689208984375, "step": 2705 }, { "epoch": 0.37706402842611303, "grad_norm": 0.9276357889175415, "learning_rate": 7.5582796546179125e-06, "loss": 0.08451175689697266, "step": 2706 }, { "epoch": 0.3772033721173274, "grad_norm": 0.5791804194450378, "learning_rate": 7.556254444490232e-06, "loss": 0.07740020751953125, "step": 2707 }, { "epoch": 0.3773427158085418, "grad_norm": 0.5267724990844727, "learning_rate": 7.554228666421176e-06, "loss": 0.08624982833862305, "step": 2708 }, { "epoch": 0.37748205949975616, "grad_norm": 0.6022740006446838, "learning_rate": 7.552202320860823e-06, "loss": 0.07076835632324219, "step": 2709 }, { "epoch": 0.37762140319097054, "grad_norm": 1.2527458667755127, "learning_rate": 7.550175408259383e-06, "loss": 0.10514545440673828, "step": 2710 }, { "epoch": 0.3777607468821849, "grad_norm": 0.730991005897522, "learning_rate": 7.548147929067189e-06, "loss": 0.08952522277832031, "step": 2711 }, { "epoch": 0.3779000905733993, "grad_norm": 0.550774335861206, "learning_rate": 7.546119883734699e-06, "loss": 0.07566452026367188, "step": 2712 }, { "epoch": 0.37803943426461367, "grad_norm": 0.6575709581375122, "learning_rate": 7.544091272712501e-06, "loss": 0.08932304382324219, "step": 2713 }, { "epoch": 0.37817877795582805, "grad_norm": 0.6478431224822998, "learning_rate": 7.542062096451306e-06, "loss": 0.0907449722290039, "step": 2714 }, { "epoch": 0.3783181216470424, "grad_norm": 1.5081934928894043, "learning_rate": 7.540032355401948e-06, "loss": 0.07513856887817383, "step": 2715 }, { "epoch": 0.3784574653382568, "grad_norm": 0.9497724771499634, "learning_rate": 7.53800205001539e-06, "loss": 0.08305740356445312, "step": 2716 }, { "epoch": 0.3785968090294712, "grad_norm": 0.8349165320396423, "learning_rate": 7.53597118074272e-06, "loss": 0.07631397247314453, "step": 2717 }, { "epoch": 0.37873615272068556, "grad_norm": 0.6747779250144958, "learning_rate": 7.5339397480351525e-06, "loss": 0.09174346923828125, "step": 2718 }, { "epoch": 0.37887549641189994, "grad_norm": 0.8024663329124451, "learning_rate": 7.531907752344023e-06, "loss": 0.0903635025024414, "step": 2719 }, { "epoch": 0.3790148401031143, "grad_norm": 1.028855562210083, "learning_rate": 7.529875194120795e-06, "loss": 0.09843635559082031, "step": 2720 }, { "epoch": 0.3791541837943287, "grad_norm": 0.6499218940734863, "learning_rate": 7.527842073817056e-06, "loss": 0.07778310775756836, "step": 2721 }, { "epoch": 0.37929352748554307, "grad_norm": 0.7268052101135254, "learning_rate": 7.525808391884521e-06, "loss": 0.06620502471923828, "step": 2722 }, { "epoch": 0.37943287117675745, "grad_norm": 0.7501247525215149, "learning_rate": 7.523774148775027e-06, "loss": 0.07212972640991211, "step": 2723 }, { "epoch": 0.3795722148679719, "grad_norm": 0.9379137754440308, "learning_rate": 7.521739344940535e-06, "loss": 0.06747722625732422, "step": 2724 }, { "epoch": 0.37971155855918626, "grad_norm": 0.6225761771202087, "learning_rate": 7.519703980833133e-06, "loss": 0.06524419784545898, "step": 2725 }, { "epoch": 0.37985090225040063, "grad_norm": 0.6423172354698181, "learning_rate": 7.517668056905033e-06, "loss": 0.07418060302734375, "step": 2726 }, { "epoch": 0.379990245941615, "grad_norm": 0.8856378793716431, "learning_rate": 7.515631573608568e-06, "loss": 0.08090591430664062, "step": 2727 }, { "epoch": 0.3801295896328294, "grad_norm": 0.9612736105918884, "learning_rate": 7.513594531396202e-06, "loss": 0.07844257354736328, "step": 2728 }, { "epoch": 0.38026893332404377, "grad_norm": 0.9308280944824219, "learning_rate": 7.511556930720517e-06, "loss": 0.07761478424072266, "step": 2729 }, { "epoch": 0.38040827701525814, "grad_norm": 0.6985786557197571, "learning_rate": 7.5095187720342224e-06, "loss": 0.07812714576721191, "step": 2730 }, { "epoch": 0.3805476207064725, "grad_norm": 1.2959858179092407, "learning_rate": 7.50748005579015e-06, "loss": 0.09183311462402344, "step": 2731 }, { "epoch": 0.3806869643976869, "grad_norm": 0.581131100654602, "learning_rate": 7.505440782441256e-06, "loss": 0.08038043975830078, "step": 2732 }, { "epoch": 0.3808263080889013, "grad_norm": 1.0076380968093872, "learning_rate": 7.503400952440618e-06, "loss": 0.10664081573486328, "step": 2733 }, { "epoch": 0.38096565178011566, "grad_norm": 1.1080387830734253, "learning_rate": 7.501360566241444e-06, "loss": 0.0878000259399414, "step": 2734 }, { "epoch": 0.38110499547133003, "grad_norm": 0.8145520687103271, "learning_rate": 7.499319624297059e-06, "loss": 0.08355522155761719, "step": 2735 }, { "epoch": 0.3812443391625444, "grad_norm": 1.026914358139038, "learning_rate": 7.497278127060914e-06, "loss": 0.0764780044555664, "step": 2736 }, { "epoch": 0.3813836828537588, "grad_norm": 0.7853649854660034, "learning_rate": 7.4952360749865825e-06, "loss": 0.09726715087890625, "step": 2737 }, { "epoch": 0.38152302654497317, "grad_norm": 1.0847467184066772, "learning_rate": 7.493193468527764e-06, "loss": 0.10290050506591797, "step": 2738 }, { "epoch": 0.38166237023618754, "grad_norm": 0.9819028973579407, "learning_rate": 7.491150308138275e-06, "loss": 0.1021575927734375, "step": 2739 }, { "epoch": 0.3818017139274019, "grad_norm": 0.8928560614585876, "learning_rate": 7.489106594272063e-06, "loss": 0.08910274505615234, "step": 2740 }, { "epoch": 0.3819410576186163, "grad_norm": 0.4917358160018921, "learning_rate": 7.487062327383192e-06, "loss": 0.07389354705810547, "step": 2741 }, { "epoch": 0.3820804013098307, "grad_norm": 0.8668930530548096, "learning_rate": 7.485017507925853e-06, "loss": 0.09579849243164062, "step": 2742 }, { "epoch": 0.38221974500104505, "grad_norm": 0.5070192217826843, "learning_rate": 7.482972136354359e-06, "loss": 0.07442665100097656, "step": 2743 }, { "epoch": 0.3823590886922595, "grad_norm": 0.6972740888595581, "learning_rate": 7.480926213123142e-06, "loss": 0.08638763427734375, "step": 2744 }, { "epoch": 0.38249843238347386, "grad_norm": 0.5299133658409119, "learning_rate": 7.4788797386867596e-06, "loss": 0.07789230346679688, "step": 2745 }, { "epoch": 0.38263777607468824, "grad_norm": 0.645393431186676, "learning_rate": 7.476832713499896e-06, "loss": 0.09000778198242188, "step": 2746 }, { "epoch": 0.3827771197659026, "grad_norm": 0.9788236021995544, "learning_rate": 7.474785138017349e-06, "loss": 0.09709453582763672, "step": 2747 }, { "epoch": 0.382916463457117, "grad_norm": 1.0893936157226562, "learning_rate": 7.472737012694045e-06, "loss": 0.09432601928710938, "step": 2748 }, { "epoch": 0.3830558071483314, "grad_norm": 1.0883512496948242, "learning_rate": 7.470688337985029e-06, "loss": 0.0628662109375, "step": 2749 }, { "epoch": 0.38319515083954575, "grad_norm": 0.6415817737579346, "learning_rate": 7.468639114345473e-06, "loss": 0.08015060424804688, "step": 2750 }, { "epoch": 0.38333449453076013, "grad_norm": 0.8888212442398071, "learning_rate": 7.466589342230664e-06, "loss": 0.08739662170410156, "step": 2751 }, { "epoch": 0.3834738382219745, "grad_norm": 0.8971450924873352, "learning_rate": 7.464539022096018e-06, "loss": 0.06837868690490723, "step": 2752 }, { "epoch": 0.3836131819131889, "grad_norm": 0.9321552515029907, "learning_rate": 7.462488154397067e-06, "loss": 0.09991455078125, "step": 2753 }, { "epoch": 0.38375252560440326, "grad_norm": 0.9326894879341125, "learning_rate": 7.460436739589467e-06, "loss": 0.10342788696289062, "step": 2754 }, { "epoch": 0.38389186929561764, "grad_norm": 0.5734491944313049, "learning_rate": 7.458384778128997e-06, "loss": 0.06654834747314453, "step": 2755 }, { "epoch": 0.384031212986832, "grad_norm": 1.0574592351913452, "learning_rate": 7.4563322704715556e-06, "loss": 0.10016727447509766, "step": 2756 }, { "epoch": 0.3841705566780464, "grad_norm": 0.9514192938804626, "learning_rate": 7.45427921707316e-06, "loss": 0.06857824325561523, "step": 2757 }, { "epoch": 0.38430990036926077, "grad_norm": 1.535798192024231, "learning_rate": 7.452225618389959e-06, "loss": 0.11680030822753906, "step": 2758 }, { "epoch": 0.38444924406047515, "grad_norm": 0.47457030415534973, "learning_rate": 7.450171474878207e-06, "loss": 0.06452131271362305, "step": 2759 }, { "epoch": 0.3845885877516895, "grad_norm": 1.2165168523788452, "learning_rate": 7.4481167869942934e-06, "loss": 0.0758657455444336, "step": 2760 }, { "epoch": 0.3847279314429039, "grad_norm": 0.529148519039154, "learning_rate": 7.446061555194721e-06, "loss": 0.06499862670898438, "step": 2761 }, { "epoch": 0.3848672751341183, "grad_norm": 0.7944236993789673, "learning_rate": 7.4440057799361155e-06, "loss": 0.0699615478515625, "step": 2762 }, { "epoch": 0.38500661882533266, "grad_norm": 0.5478975176811218, "learning_rate": 7.441949461675223e-06, "loss": 0.06954383850097656, "step": 2763 }, { "epoch": 0.38514596251654704, "grad_norm": 0.8544939160346985, "learning_rate": 7.439892600868911e-06, "loss": 0.09129619598388672, "step": 2764 }, { "epoch": 0.38528530620776147, "grad_norm": 0.9792113304138184, "learning_rate": 7.437835197974167e-06, "loss": 0.07068443298339844, "step": 2765 }, { "epoch": 0.38542464989897585, "grad_norm": 1.081852912902832, "learning_rate": 7.435777253448099e-06, "loss": 0.10139274597167969, "step": 2766 }, { "epoch": 0.3855639935901902, "grad_norm": 0.5184458494186401, "learning_rate": 7.433718767747934e-06, "loss": 0.07453346252441406, "step": 2767 }, { "epoch": 0.3857033372814046, "grad_norm": 0.9029498100280762, "learning_rate": 7.431659741331022e-06, "loss": 0.06912803649902344, "step": 2768 }, { "epoch": 0.385842680972619, "grad_norm": 1.5757396221160889, "learning_rate": 7.429600174654832e-06, "loss": 0.08682823181152344, "step": 2769 }, { "epoch": 0.38598202466383336, "grad_norm": 0.7857512831687927, "learning_rate": 7.427540068176951e-06, "loss": 0.07526683807373047, "step": 2770 }, { "epoch": 0.38612136835504773, "grad_norm": 0.6008813381195068, "learning_rate": 7.4254794223550885e-06, "loss": 0.08019351959228516, "step": 2771 }, { "epoch": 0.3862607120462621, "grad_norm": 0.6175569295883179, "learning_rate": 7.423418237647073e-06, "loss": 0.08988666534423828, "step": 2772 }, { "epoch": 0.3864000557374765, "grad_norm": 0.5725687742233276, "learning_rate": 7.421356514510853e-06, "loss": 0.0775289535522461, "step": 2773 }, { "epoch": 0.38653939942869087, "grad_norm": 0.7775299549102783, "learning_rate": 7.419294253404497e-06, "loss": 0.0818033218383789, "step": 2774 }, { "epoch": 0.38667874311990524, "grad_norm": 0.7272475361824036, "learning_rate": 7.417231454786189e-06, "loss": 0.07677221298217773, "step": 2775 }, { "epoch": 0.3868180868111196, "grad_norm": 1.0698126554489136, "learning_rate": 7.41516811911424e-06, "loss": 0.08776473999023438, "step": 2776 }, { "epoch": 0.386957430502334, "grad_norm": 1.2633225917816162, "learning_rate": 7.4131042468470725e-06, "loss": 0.10316848754882812, "step": 2777 }, { "epoch": 0.3870967741935484, "grad_norm": 0.9609826803207397, "learning_rate": 7.411039838443234e-06, "loss": 0.09290027618408203, "step": 2778 }, { "epoch": 0.38723611788476275, "grad_norm": 1.1015483140945435, "learning_rate": 7.4089748943613895e-06, "loss": 0.08405876159667969, "step": 2779 }, { "epoch": 0.38737546157597713, "grad_norm": 1.28415048122406, "learning_rate": 7.406909415060321e-06, "loss": 0.09618663787841797, "step": 2780 }, { "epoch": 0.3875148052671915, "grad_norm": 1.035758376121521, "learning_rate": 7.404843400998931e-06, "loss": 0.08970069885253906, "step": 2781 }, { "epoch": 0.3876541489584059, "grad_norm": 0.7850913405418396, "learning_rate": 7.4027768526362395e-06, "loss": 0.08628177642822266, "step": 2782 }, { "epoch": 0.38779349264962026, "grad_norm": 1.2251321077346802, "learning_rate": 7.4007097704313894e-06, "loss": 0.0946044921875, "step": 2783 }, { "epoch": 0.38793283634083464, "grad_norm": 0.9090900421142578, "learning_rate": 7.398642154843637e-06, "loss": 0.09199094772338867, "step": 2784 }, { "epoch": 0.3880721800320491, "grad_norm": 0.9956454038619995, "learning_rate": 7.39657400633236e-06, "loss": 0.07219982147216797, "step": 2785 }, { "epoch": 0.38821152372326345, "grad_norm": 0.7334786653518677, "learning_rate": 7.394505325357053e-06, "loss": 0.07517147064208984, "step": 2786 }, { "epoch": 0.38835086741447783, "grad_norm": 0.8552923202514648, "learning_rate": 7.392436112377331e-06, "loss": 0.07552385330200195, "step": 2787 }, { "epoch": 0.3884902111056922, "grad_norm": 0.9910157918930054, "learning_rate": 7.390366367852923e-06, "loss": 0.09176921844482422, "step": 2788 }, { "epoch": 0.3886295547969066, "grad_norm": 0.5950527787208557, "learning_rate": 7.388296092243683e-06, "loss": 0.06326866149902344, "step": 2789 }, { "epoch": 0.38876889848812096, "grad_norm": 0.6134446263313293, "learning_rate": 7.386225286009576e-06, "loss": 0.06852436065673828, "step": 2790 }, { "epoch": 0.38890824217933534, "grad_norm": 0.670769989490509, "learning_rate": 7.384153949610689e-06, "loss": 0.07479667663574219, "step": 2791 }, { "epoch": 0.3890475858705497, "grad_norm": 0.9890813827514648, "learning_rate": 7.382082083507226e-06, "loss": 0.07194042205810547, "step": 2792 }, { "epoch": 0.3891869295617641, "grad_norm": 0.7290378212928772, "learning_rate": 7.380009688159507e-06, "loss": 0.09028148651123047, "step": 2793 }, { "epoch": 0.38932627325297847, "grad_norm": 1.041959524154663, "learning_rate": 7.377936764027973e-06, "loss": 0.09438228607177734, "step": 2794 }, { "epoch": 0.38946561694419285, "grad_norm": 0.8684142231941223, "learning_rate": 7.375863311573179e-06, "loss": 0.07942628860473633, "step": 2795 }, { "epoch": 0.3896049606354072, "grad_norm": 0.6791969537734985, "learning_rate": 7.373789331255799e-06, "loss": 0.09012222290039062, "step": 2796 }, { "epoch": 0.3897443043266216, "grad_norm": 1.1632301807403564, "learning_rate": 7.371714823536624e-06, "loss": 0.11510658264160156, "step": 2797 }, { "epoch": 0.389883648017836, "grad_norm": 0.636650025844574, "learning_rate": 7.369639788876561e-06, "loss": 0.08714008331298828, "step": 2798 }, { "epoch": 0.39002299170905036, "grad_norm": 1.0342156887054443, "learning_rate": 7.367564227736639e-06, "loss": 0.10584545135498047, "step": 2799 }, { "epoch": 0.39016233540026474, "grad_norm": 1.0222145318984985, "learning_rate": 7.365488140577997e-06, "loss": 0.06946659088134766, "step": 2800 }, { "epoch": 0.3903016790914791, "grad_norm": 0.964540958404541, "learning_rate": 7.3634115278618955e-06, "loss": 0.12349700927734375, "step": 2801 }, { "epoch": 0.3904410227826935, "grad_norm": 0.6944901347160339, "learning_rate": 7.36133439004971e-06, "loss": 0.10066413879394531, "step": 2802 }, { "epoch": 0.39058036647390787, "grad_norm": 0.7348858118057251, "learning_rate": 7.3592567276029336e-06, "loss": 0.08729743957519531, "step": 2803 }, { "epoch": 0.39071971016512225, "grad_norm": 0.5194405317306519, "learning_rate": 7.357178540983174e-06, "loss": 0.05084419250488281, "step": 2804 }, { "epoch": 0.3908590538563367, "grad_norm": 0.5796899199485779, "learning_rate": 7.355099830652159e-06, "loss": 0.088623046875, "step": 2805 }, { "epoch": 0.39099839754755106, "grad_norm": 0.9852120280265808, "learning_rate": 7.353020597071729e-06, "loss": 0.0766897201538086, "step": 2806 }, { "epoch": 0.39113774123876544, "grad_norm": 0.7509356737136841, "learning_rate": 7.350940840703842e-06, "loss": 0.08759307861328125, "step": 2807 }, { "epoch": 0.3912770849299798, "grad_norm": 0.7986595034599304, "learning_rate": 7.348860562010574e-06, "loss": 0.08228397369384766, "step": 2808 }, { "epoch": 0.3914164286211942, "grad_norm": 0.9932718873023987, "learning_rate": 7.346779761454113e-06, "loss": 0.10608673095703125, "step": 2809 }, { "epoch": 0.39155577231240857, "grad_norm": 0.8298692107200623, "learning_rate": 7.3446984394967705e-06, "loss": 0.10663509368896484, "step": 2810 }, { "epoch": 0.39169511600362295, "grad_norm": 1.0466594696044922, "learning_rate": 7.342616596600961e-06, "loss": 0.08639717102050781, "step": 2811 }, { "epoch": 0.3918344596948373, "grad_norm": 0.7752171158790588, "learning_rate": 7.3405342332292286e-06, "loss": 0.07578086853027344, "step": 2812 }, { "epoch": 0.3919738033860517, "grad_norm": 0.6784119606018066, "learning_rate": 7.338451349844225e-06, "loss": 0.0902566909790039, "step": 2813 }, { "epoch": 0.3921131470772661, "grad_norm": 1.0478945970535278, "learning_rate": 7.336367946908718e-06, "loss": 0.09564018249511719, "step": 2814 }, { "epoch": 0.39225249076848046, "grad_norm": 0.9346991777420044, "learning_rate": 7.334284024885595e-06, "loss": 0.08895492553710938, "step": 2815 }, { "epoch": 0.39239183445969483, "grad_norm": 0.9669021368026733, "learning_rate": 7.332199584237854e-06, "loss": 0.08196866512298584, "step": 2816 }, { "epoch": 0.3925311781509092, "grad_norm": 0.8052101135253906, "learning_rate": 7.330114625428609e-06, "loss": 0.07987403869628906, "step": 2817 }, { "epoch": 0.3926705218421236, "grad_norm": 0.34036266803741455, "learning_rate": 7.328029148921093e-06, "loss": 0.057369232177734375, "step": 2818 }, { "epoch": 0.39280986553333797, "grad_norm": 0.7884669303894043, "learning_rate": 7.32594315517865e-06, "loss": 0.07317161560058594, "step": 2819 }, { "epoch": 0.39294920922455234, "grad_norm": 1.5128206014633179, "learning_rate": 7.32385664466474e-06, "loss": 0.07973480224609375, "step": 2820 }, { "epoch": 0.3930885529157667, "grad_norm": 0.5513744354248047, "learning_rate": 7.321769617842937e-06, "loss": 0.0779123306274414, "step": 2821 }, { "epoch": 0.3932278966069811, "grad_norm": 1.0559961795806885, "learning_rate": 7.319682075176932e-06, "loss": 0.098602294921875, "step": 2822 }, { "epoch": 0.3933672402981955, "grad_norm": 0.8573762774467468, "learning_rate": 7.317594017130529e-06, "loss": 0.07772636413574219, "step": 2823 }, { "epoch": 0.39350658398940985, "grad_norm": 0.617211103439331, "learning_rate": 7.3155054441676485e-06, "loss": 0.08471202850341797, "step": 2824 }, { "epoch": 0.3936459276806243, "grad_norm": 0.8028154969215393, "learning_rate": 7.313416356752321e-06, "loss": 0.08857536315917969, "step": 2825 }, { "epoch": 0.39378527137183866, "grad_norm": 0.8223022222518921, "learning_rate": 7.311326755348697e-06, "loss": 0.10283851623535156, "step": 2826 }, { "epoch": 0.39392461506305304, "grad_norm": 1.2349662780761719, "learning_rate": 7.309236640421033e-06, "loss": 0.1151266098022461, "step": 2827 }, { "epoch": 0.3940639587542674, "grad_norm": 0.6316396594047546, "learning_rate": 7.30714601243371e-06, "loss": 0.08570575714111328, "step": 2828 }, { "epoch": 0.3942033024454818, "grad_norm": 0.9403184652328491, "learning_rate": 7.305054871851217e-06, "loss": 0.1033792495727539, "step": 2829 }, { "epoch": 0.3943426461366962, "grad_norm": 0.6871681213378906, "learning_rate": 7.302963219138156e-06, "loss": 0.07940864562988281, "step": 2830 }, { "epoch": 0.39448198982791055, "grad_norm": 1.8351576328277588, "learning_rate": 7.3008710547592465e-06, "loss": 0.09828472137451172, "step": 2831 }, { "epoch": 0.39462133351912493, "grad_norm": 1.0234453678131104, "learning_rate": 7.298778379179317e-06, "loss": 0.08290839195251465, "step": 2832 }, { "epoch": 0.3947606772103393, "grad_norm": 0.9448971152305603, "learning_rate": 7.296685192863313e-06, "loss": 0.08782577514648438, "step": 2833 }, { "epoch": 0.3949000209015537, "grad_norm": 0.6927390694618225, "learning_rate": 7.2945914962762954e-06, "loss": 0.07578277587890625, "step": 2834 }, { "epoch": 0.39503936459276806, "grad_norm": 0.8261826038360596, "learning_rate": 7.292497289883432e-06, "loss": 0.113006591796875, "step": 2835 }, { "epoch": 0.39517870828398244, "grad_norm": 0.6606326103210449, "learning_rate": 7.29040257415001e-06, "loss": 0.07235050201416016, "step": 2836 }, { "epoch": 0.3953180519751968, "grad_norm": 0.7285338044166565, "learning_rate": 7.288307349541427e-06, "loss": 0.07117843627929688, "step": 2837 }, { "epoch": 0.3954573956664112, "grad_norm": 0.5171446204185486, "learning_rate": 7.286211616523193e-06, "loss": 0.06658542156219482, "step": 2838 }, { "epoch": 0.39559673935762557, "grad_norm": 1.147541880607605, "learning_rate": 7.284115375560934e-06, "loss": 0.09811973571777344, "step": 2839 }, { "epoch": 0.39573608304883995, "grad_norm": 0.7682148218154907, "learning_rate": 7.282018627120386e-06, "loss": 0.0848684310913086, "step": 2840 }, { "epoch": 0.3958754267400543, "grad_norm": 0.43570077419281006, "learning_rate": 7.279921371667397e-06, "loss": 0.07753276824951172, "step": 2841 }, { "epoch": 0.3960147704312687, "grad_norm": 0.6376878619194031, "learning_rate": 7.2778236096679325e-06, "loss": 0.08602714538574219, "step": 2842 }, { "epoch": 0.3961541141224831, "grad_norm": 0.9179841876029968, "learning_rate": 7.275725341588064e-06, "loss": 0.0825033187866211, "step": 2843 }, { "epoch": 0.39629345781369746, "grad_norm": 0.4115816652774811, "learning_rate": 7.27362656789398e-06, "loss": 0.057407379150390625, "step": 2844 }, { "epoch": 0.3964328015049119, "grad_norm": 0.666006326675415, "learning_rate": 7.2715272890519815e-06, "loss": 0.08115673065185547, "step": 2845 }, { "epoch": 0.39657214519612627, "grad_norm": 1.1457102298736572, "learning_rate": 7.2694275055284795e-06, "loss": 0.09910869598388672, "step": 2846 }, { "epoch": 0.39671148888734065, "grad_norm": 1.7105985879898071, "learning_rate": 7.267327217789998e-06, "loss": 0.08642959594726562, "step": 2847 }, { "epoch": 0.396850832578555, "grad_norm": 1.0132523775100708, "learning_rate": 7.26522642630317e-06, "loss": 0.09010887145996094, "step": 2848 }, { "epoch": 0.3969901762697694, "grad_norm": 0.7185067534446716, "learning_rate": 7.263125131534749e-06, "loss": 0.06831741333007812, "step": 2849 }, { "epoch": 0.3971295199609838, "grad_norm": 0.5798638463020325, "learning_rate": 7.26102333395159e-06, "loss": 0.07654094696044922, "step": 2850 }, { "epoch": 0.39726886365219816, "grad_norm": 1.018784999847412, "learning_rate": 7.2589210340206675e-06, "loss": 0.08168315887451172, "step": 2851 }, { "epoch": 0.39740820734341253, "grad_norm": 0.8911297917366028, "learning_rate": 7.256818232209062e-06, "loss": 0.08052968978881836, "step": 2852 }, { "epoch": 0.3975475510346269, "grad_norm": 0.686527669429779, "learning_rate": 7.25471492898397e-06, "loss": 0.07777833938598633, "step": 2853 }, { "epoch": 0.3976868947258413, "grad_norm": 1.694633960723877, "learning_rate": 7.2526111248126976e-06, "loss": 0.11373138427734375, "step": 2854 }, { "epoch": 0.39782623841705567, "grad_norm": 1.2494364976882935, "learning_rate": 7.250506820162661e-06, "loss": 0.10649299621582031, "step": 2855 }, { "epoch": 0.39796558210827004, "grad_norm": 0.8733842968940735, "learning_rate": 7.248402015501388e-06, "loss": 0.07135581970214844, "step": 2856 }, { "epoch": 0.3981049257994844, "grad_norm": 0.6652790307998657, "learning_rate": 7.246296711296519e-06, "loss": 0.08431053161621094, "step": 2857 }, { "epoch": 0.3982442694906988, "grad_norm": 0.7042795419692993, "learning_rate": 7.244190908015805e-06, "loss": 0.07955360412597656, "step": 2858 }, { "epoch": 0.3983836131819132, "grad_norm": 0.8792843222618103, "learning_rate": 7.2420846061271065e-06, "loss": 0.09083318710327148, "step": 2859 }, { "epoch": 0.39852295687312755, "grad_norm": 0.6880455613136292, "learning_rate": 7.239977806098398e-06, "loss": 0.07503509521484375, "step": 2860 }, { "epoch": 0.39866230056434193, "grad_norm": 0.8478845953941345, "learning_rate": 7.237870508397757e-06, "loss": 0.08970403671264648, "step": 2861 }, { "epoch": 0.3988016442555563, "grad_norm": 1.6059517860412598, "learning_rate": 7.235762713493384e-06, "loss": 0.08007240295410156, "step": 2862 }, { "epoch": 0.3989409879467707, "grad_norm": 0.6371945142745972, "learning_rate": 7.2336544218535776e-06, "loss": 0.07369804382324219, "step": 2863 }, { "epoch": 0.39908033163798506, "grad_norm": 1.0147968530654907, "learning_rate": 7.231545633946755e-06, "loss": 0.08084487915039062, "step": 2864 }, { "epoch": 0.3992196753291995, "grad_norm": 1.0438312292099, "learning_rate": 7.229436350241439e-06, "loss": 0.10889053344726562, "step": 2865 }, { "epoch": 0.3993590190204139, "grad_norm": 0.5380706787109375, "learning_rate": 7.2273265712062646e-06, "loss": 0.06363296508789062, "step": 2866 }, { "epoch": 0.39949836271162825, "grad_norm": 0.6571918725967407, "learning_rate": 7.225216297309977e-06, "loss": 0.08739709854125977, "step": 2867 }, { "epoch": 0.39963770640284263, "grad_norm": 0.8394675850868225, "learning_rate": 7.22310552902143e-06, "loss": 0.07884025573730469, "step": 2868 }, { "epoch": 0.399777050094057, "grad_norm": 0.7377246022224426, "learning_rate": 7.220994266809591e-06, "loss": 0.07726669311523438, "step": 2869 }, { "epoch": 0.3999163937852714, "grad_norm": 0.8560518622398376, "learning_rate": 7.21888251114353e-06, "loss": 0.07344341278076172, "step": 2870 }, { "epoch": 0.40005573747648576, "grad_norm": 1.1576476097106934, "learning_rate": 7.2167702624924345e-06, "loss": 0.08598136901855469, "step": 2871 }, { "epoch": 0.40019508116770014, "grad_norm": 0.979694664478302, "learning_rate": 7.2146575213255945e-06, "loss": 0.10407066345214844, "step": 2872 }, { "epoch": 0.4003344248589145, "grad_norm": 0.5646626949310303, "learning_rate": 7.212544288112415e-06, "loss": 0.07322883605957031, "step": 2873 }, { "epoch": 0.4004737685501289, "grad_norm": 0.4962877631187439, "learning_rate": 7.21043056332241e-06, "loss": 0.05945014953613281, "step": 2874 }, { "epoch": 0.40061311224134327, "grad_norm": 1.5520098209381104, "learning_rate": 7.208316347425197e-06, "loss": 0.10656118392944336, "step": 2875 }, { "epoch": 0.40075245593255765, "grad_norm": 0.7060416340827942, "learning_rate": 7.206201640890509e-06, "loss": 0.10355377197265625, "step": 2876 }, { "epoch": 0.400891799623772, "grad_norm": 0.6195724606513977, "learning_rate": 7.204086444188184e-06, "loss": 0.07505989074707031, "step": 2877 }, { "epoch": 0.4010311433149864, "grad_norm": 0.8177638053894043, "learning_rate": 7.201970757788172e-06, "loss": 0.09859275817871094, "step": 2878 }, { "epoch": 0.4011704870062008, "grad_norm": 1.6752854585647583, "learning_rate": 7.199854582160529e-06, "loss": 0.10119438171386719, "step": 2879 }, { "epoch": 0.40130983069741516, "grad_norm": 0.7273966670036316, "learning_rate": 7.197737917775422e-06, "loss": 0.09013175964355469, "step": 2880 }, { "epoch": 0.40144917438862954, "grad_norm": 1.016904354095459, "learning_rate": 7.1956207651031254e-06, "loss": 0.08185482025146484, "step": 2881 }, { "epoch": 0.4015885180798439, "grad_norm": 0.922187864780426, "learning_rate": 7.193503124614021e-06, "loss": 0.07807064056396484, "step": 2882 }, { "epoch": 0.4017278617710583, "grad_norm": 2.7683863639831543, "learning_rate": 7.191384996778601e-06, "loss": 0.0895376205444336, "step": 2883 }, { "epoch": 0.40186720546227267, "grad_norm": 1.3689806461334229, "learning_rate": 7.189266382067464e-06, "loss": 0.08032608032226562, "step": 2884 }, { "epoch": 0.4020065491534871, "grad_norm": 0.7559650540351868, "learning_rate": 7.1871472809513185e-06, "loss": 0.08051776885986328, "step": 2885 }, { "epoch": 0.4021458928447015, "grad_norm": 0.7179968357086182, "learning_rate": 7.185027693900982e-06, "loss": 0.08294868469238281, "step": 2886 }, { "epoch": 0.40228523653591586, "grad_norm": 1.1403402090072632, "learning_rate": 7.182907621387376e-06, "loss": 0.10456657409667969, "step": 2887 }, { "epoch": 0.40242458022713024, "grad_norm": 0.5153565406799316, "learning_rate": 7.180787063881534e-06, "loss": 0.07101917266845703, "step": 2888 }, { "epoch": 0.4025639239183446, "grad_norm": 0.8924327492713928, "learning_rate": 7.178666021854593e-06, "loss": 0.09192276000976562, "step": 2889 }, { "epoch": 0.402703267609559, "grad_norm": 0.8250053524971008, "learning_rate": 7.176544495777804e-06, "loss": 0.06955146789550781, "step": 2890 }, { "epoch": 0.40284261130077337, "grad_norm": 0.9076849818229675, "learning_rate": 7.174422486122517e-06, "loss": 0.08390998840332031, "step": 2891 }, { "epoch": 0.40298195499198775, "grad_norm": 0.762511134147644, "learning_rate": 7.1722999933602e-06, "loss": 0.08265972137451172, "step": 2892 }, { "epoch": 0.4031212986832021, "grad_norm": 0.4421687722206116, "learning_rate": 7.170177017962415e-06, "loss": 0.08400535583496094, "step": 2893 }, { "epoch": 0.4032606423744165, "grad_norm": 1.6000268459320068, "learning_rate": 7.168053560400845e-06, "loss": 0.08922290802001953, "step": 2894 }, { "epoch": 0.4033999860656309, "grad_norm": 2.4322116374969482, "learning_rate": 7.16592962114727e-06, "loss": 0.10983085632324219, "step": 2895 }, { "epoch": 0.40353932975684526, "grad_norm": 1.4648855924606323, "learning_rate": 7.163805200673584e-06, "loss": 0.07956409454345703, "step": 2896 }, { "epoch": 0.40367867344805963, "grad_norm": 1.3530224561691284, "learning_rate": 7.161680299451782e-06, "loss": 0.08460712432861328, "step": 2897 }, { "epoch": 0.403818017139274, "grad_norm": 0.6969216465950012, "learning_rate": 7.159554917953968e-06, "loss": 0.0608980655670166, "step": 2898 }, { "epoch": 0.4039573608304884, "grad_norm": 0.9117463827133179, "learning_rate": 7.157429056652357e-06, "loss": 0.0901336669921875, "step": 2899 }, { "epoch": 0.40409670452170277, "grad_norm": 1.003940224647522, "learning_rate": 7.155302716019263e-06, "loss": 0.0661325454711914, "step": 2900 }, { "epoch": 0.40423604821291714, "grad_norm": 1.224851131439209, "learning_rate": 7.153175896527112e-06, "loss": 0.08529138565063477, "step": 2901 }, { "epoch": 0.4043753919041315, "grad_norm": 0.6025007963180542, "learning_rate": 7.151048598648436e-06, "loss": 0.07238626480102539, "step": 2902 }, { "epoch": 0.4045147355953459, "grad_norm": 0.9489473104476929, "learning_rate": 7.148920822855869e-06, "loss": 0.08904552459716797, "step": 2903 }, { "epoch": 0.4046540792865603, "grad_norm": 0.7262398600578308, "learning_rate": 7.146792569622157e-06, "loss": 0.0700235366821289, "step": 2904 }, { "epoch": 0.4047934229777747, "grad_norm": 1.7267400026321411, "learning_rate": 7.144663839420147e-06, "loss": 0.11648750305175781, "step": 2905 }, { "epoch": 0.4049327666689891, "grad_norm": 1.2650266885757446, "learning_rate": 7.142534632722797e-06, "loss": 0.07982587814331055, "step": 2906 }, { "epoch": 0.40507211036020346, "grad_norm": 1.1125967502593994, "learning_rate": 7.140404950003164e-06, "loss": 0.09127569198608398, "step": 2907 }, { "epoch": 0.40521145405141784, "grad_norm": 1.092225432395935, "learning_rate": 7.138274791734421e-06, "loss": 0.10886573791503906, "step": 2908 }, { "epoch": 0.4053507977426322, "grad_norm": 0.7038055062294006, "learning_rate": 7.136144158389834e-06, "loss": 0.07749748229980469, "step": 2909 }, { "epoch": 0.4054901414338466, "grad_norm": 0.6368604302406311, "learning_rate": 7.134013050442785e-06, "loss": 0.08406639099121094, "step": 2910 }, { "epoch": 0.405629485125061, "grad_norm": 1.2061935663223267, "learning_rate": 7.1318814683667555e-06, "loss": 0.09695243835449219, "step": 2911 }, { "epoch": 0.40576882881627535, "grad_norm": 0.7270941734313965, "learning_rate": 7.129749412635337e-06, "loss": 0.07147502899169922, "step": 2912 }, { "epoch": 0.40590817250748973, "grad_norm": 0.6747002005577087, "learning_rate": 7.1276168837222215e-06, "loss": 0.07126951217651367, "step": 2913 }, { "epoch": 0.4060475161987041, "grad_norm": 0.6686989665031433, "learning_rate": 7.125483882101208e-06, "loss": 0.08022308349609375, "step": 2914 }, { "epoch": 0.4061868598899185, "grad_norm": 1.1464180946350098, "learning_rate": 7.123350408246203e-06, "loss": 0.07517623901367188, "step": 2915 }, { "epoch": 0.40632620358113286, "grad_norm": 0.767557680606842, "learning_rate": 7.121216462631213e-06, "loss": 0.08438491821289062, "step": 2916 }, { "epoch": 0.40646554727234724, "grad_norm": 0.4132266044616699, "learning_rate": 7.1190820457303535e-06, "loss": 0.057333946228027344, "step": 2917 }, { "epoch": 0.4066048909635616, "grad_norm": 1.143153190612793, "learning_rate": 7.116947158017842e-06, "loss": 0.095733642578125, "step": 2918 }, { "epoch": 0.406744234654776, "grad_norm": 0.810802161693573, "learning_rate": 7.114811799968005e-06, "loss": 0.09471893310546875, "step": 2919 }, { "epoch": 0.40688357834599037, "grad_norm": 1.0680266618728638, "learning_rate": 7.1126759720552665e-06, "loss": 0.08979511260986328, "step": 2920 }, { "epoch": 0.40702292203720475, "grad_norm": 0.4784191846847534, "learning_rate": 7.11053967475416e-06, "loss": 0.06760025024414062, "step": 2921 }, { "epoch": 0.4071622657284191, "grad_norm": 0.7625582814216614, "learning_rate": 7.108402908539323e-06, "loss": 0.09368896484375, "step": 2922 }, { "epoch": 0.4073016094196335, "grad_norm": 0.8210481405258179, "learning_rate": 7.106265673885494e-06, "loss": 0.07815980911254883, "step": 2923 }, { "epoch": 0.4074409531108479, "grad_norm": 0.8943662047386169, "learning_rate": 7.104127971267521e-06, "loss": 0.06377410888671875, "step": 2924 }, { "epoch": 0.4075802968020623, "grad_norm": 0.7365792989730835, "learning_rate": 7.10198980116035e-06, "loss": 0.07792186737060547, "step": 2925 }, { "epoch": 0.4077196404932767, "grad_norm": 0.9166756272315979, "learning_rate": 7.099851164039035e-06, "loss": 0.08002758026123047, "step": 2926 }, { "epoch": 0.40785898418449107, "grad_norm": 0.839526891708374, "learning_rate": 7.0977120603787296e-06, "loss": 0.07058525085449219, "step": 2927 }, { "epoch": 0.40799832787570545, "grad_norm": 1.7279688119888306, "learning_rate": 7.095572490654698e-06, "loss": 0.11830663681030273, "step": 2928 }, { "epoch": 0.4081376715669198, "grad_norm": 0.6445662975311279, "learning_rate": 7.0934324553423015e-06, "loss": 0.08794498443603516, "step": 2929 }, { "epoch": 0.4082770152581342, "grad_norm": 1.216645359992981, "learning_rate": 7.091291954917007e-06, "loss": 0.08664798736572266, "step": 2930 }, { "epoch": 0.4084163589493486, "grad_norm": 0.7548661231994629, "learning_rate": 7.089150989854385e-06, "loss": 0.10715293884277344, "step": 2931 }, { "epoch": 0.40855570264056296, "grad_norm": 0.7478958368301392, "learning_rate": 7.0870095606301095e-06, "loss": 0.07270050048828125, "step": 2932 }, { "epoch": 0.40869504633177733, "grad_norm": 0.7594417929649353, "learning_rate": 7.084867667719957e-06, "loss": 0.08423900604248047, "step": 2933 }, { "epoch": 0.4088343900229917, "grad_norm": 0.7561121582984924, "learning_rate": 7.082725311599808e-06, "loss": 0.09062767028808594, "step": 2934 }, { "epoch": 0.4089737337142061, "grad_norm": 0.5890040397644043, "learning_rate": 7.080582492745642e-06, "loss": 0.08283615112304688, "step": 2935 }, { "epoch": 0.40911307740542047, "grad_norm": 1.1457308530807495, "learning_rate": 7.0784392116335475e-06, "loss": 0.1073904037475586, "step": 2936 }, { "epoch": 0.40925242109663484, "grad_norm": 1.9744524955749512, "learning_rate": 7.076295468739711e-06, "loss": 0.10753345489501953, "step": 2937 }, { "epoch": 0.4093917647878492, "grad_norm": 1.0914238691329956, "learning_rate": 7.074151264540425e-06, "loss": 0.10615921020507812, "step": 2938 }, { "epoch": 0.4095311084790636, "grad_norm": 0.7086724638938904, "learning_rate": 7.0720065995120815e-06, "loss": 0.0875701904296875, "step": 2939 }, { "epoch": 0.409670452170278, "grad_norm": 0.7909852862358093, "learning_rate": 7.069861474131176e-06, "loss": 0.08224010467529297, "step": 2940 }, { "epoch": 0.40980979586149235, "grad_norm": 0.6491298079490662, "learning_rate": 7.067715888874307e-06, "loss": 0.07279014587402344, "step": 2941 }, { "epoch": 0.40994913955270673, "grad_norm": 0.93088698387146, "learning_rate": 7.065569844218175e-06, "loss": 0.08783674240112305, "step": 2942 }, { "epoch": 0.4100884832439211, "grad_norm": 1.0579005479812622, "learning_rate": 7.0634233406395806e-06, "loss": 0.06551599502563477, "step": 2943 }, { "epoch": 0.4102278269351355, "grad_norm": 1.5717254877090454, "learning_rate": 7.061276378615428e-06, "loss": 0.09845447540283203, "step": 2944 }, { "epoch": 0.4103671706263499, "grad_norm": 0.9126725792884827, "learning_rate": 7.059128958622725e-06, "loss": 0.09381484985351562, "step": 2945 }, { "epoch": 0.4105065143175643, "grad_norm": 1.3309962749481201, "learning_rate": 7.056981081138578e-06, "loss": 0.10369110107421875, "step": 2946 }, { "epoch": 0.4106458580087787, "grad_norm": 0.828926682472229, "learning_rate": 7.054832746640196e-06, "loss": 0.10263442993164062, "step": 2947 }, { "epoch": 0.41078520169999305, "grad_norm": 1.2519925832748413, "learning_rate": 7.05268395560489e-06, "loss": 0.11266613006591797, "step": 2948 }, { "epoch": 0.41092454539120743, "grad_norm": 0.5156674385070801, "learning_rate": 7.050534708510073e-06, "loss": 0.0794076919555664, "step": 2949 }, { "epoch": 0.4110638890824218, "grad_norm": 0.5697126984596252, "learning_rate": 7.048385005833258e-06, "loss": 0.06454801559448242, "step": 2950 }, { "epoch": 0.4112032327736362, "grad_norm": 0.782764196395874, "learning_rate": 7.04623484805206e-06, "loss": 0.09380531311035156, "step": 2951 }, { "epoch": 0.41134257646485056, "grad_norm": 1.1182949542999268, "learning_rate": 7.044084235644196e-06, "loss": 0.08078432083129883, "step": 2952 }, { "epoch": 0.41148192015606494, "grad_norm": 0.9661069512367249, "learning_rate": 7.041933169087482e-06, "loss": 0.07649040222167969, "step": 2953 }, { "epoch": 0.4116212638472793, "grad_norm": 0.8319328427314758, "learning_rate": 7.039781648859836e-06, "loss": 0.10477924346923828, "step": 2954 }, { "epoch": 0.4117606075384937, "grad_norm": 0.5248733758926392, "learning_rate": 7.037629675439276e-06, "loss": 0.0673065185546875, "step": 2955 }, { "epoch": 0.4118999512297081, "grad_norm": 0.69386887550354, "learning_rate": 7.035477249303923e-06, "loss": 0.0913991928100586, "step": 2956 }, { "epoch": 0.41203929492092245, "grad_norm": 0.7113481163978577, "learning_rate": 7.033324370931993e-06, "loss": 0.08106422424316406, "step": 2957 }, { "epoch": 0.4121786386121368, "grad_norm": 0.5843032598495483, "learning_rate": 7.031171040801813e-06, "loss": 0.07419395446777344, "step": 2958 }, { "epoch": 0.4123179823033512, "grad_norm": 1.1238354444503784, "learning_rate": 7.029017259391797e-06, "loss": 0.08527803421020508, "step": 2959 }, { "epoch": 0.4124573259945656, "grad_norm": 1.5702756643295288, "learning_rate": 7.026863027180472e-06, "loss": 0.10592937469482422, "step": 2960 }, { "epoch": 0.41259666968577996, "grad_norm": 0.8105482459068298, "learning_rate": 7.024708344646455e-06, "loss": 0.08491039276123047, "step": 2961 }, { "epoch": 0.41273601337699434, "grad_norm": 1.3353573083877563, "learning_rate": 7.022553212268469e-06, "loss": 0.093475341796875, "step": 2962 }, { "epoch": 0.4128753570682087, "grad_norm": 1.1283047199249268, "learning_rate": 7.020397630525336e-06, "loss": 0.0767812728881836, "step": 2963 }, { "epoch": 0.4130147007594231, "grad_norm": 0.6139127612113953, "learning_rate": 7.018241599895974e-06, "loss": 0.0831766128540039, "step": 2964 }, { "epoch": 0.41315404445063747, "grad_norm": 0.7036682963371277, "learning_rate": 7.016085120859406e-06, "loss": 0.08994007110595703, "step": 2965 }, { "epoch": 0.4132933881418519, "grad_norm": 0.898779571056366, "learning_rate": 7.013928193894753e-06, "loss": 0.0676116943359375, "step": 2966 }, { "epoch": 0.4134327318330663, "grad_norm": 0.6939769983291626, "learning_rate": 7.011770819481234e-06, "loss": 0.08343505859375, "step": 2967 }, { "epoch": 0.41357207552428066, "grad_norm": 0.8995922803878784, "learning_rate": 7.0096129980981674e-06, "loss": 0.07615947723388672, "step": 2968 }, { "epoch": 0.41371141921549504, "grad_norm": 0.7341762185096741, "learning_rate": 7.0074547302249755e-06, "loss": 0.0814981460571289, "step": 2969 }, { "epoch": 0.4138507629067094, "grad_norm": 0.47043275833129883, "learning_rate": 7.005296016341171e-06, "loss": 0.06528186798095703, "step": 2970 }, { "epoch": 0.4139901065979238, "grad_norm": 0.6094847321510315, "learning_rate": 7.003136856926374e-06, "loss": 0.07262516021728516, "step": 2971 }, { "epoch": 0.41412945028913817, "grad_norm": 0.695948600769043, "learning_rate": 7.0009772524603e-06, "loss": 0.0785379409790039, "step": 2972 }, { "epoch": 0.41426879398035255, "grad_norm": 0.924669086933136, "learning_rate": 6.998817203422763e-06, "loss": 0.07497549057006836, "step": 2973 }, { "epoch": 0.4144081376715669, "grad_norm": 0.8592563271522522, "learning_rate": 6.996656710293679e-06, "loss": 0.08827590942382812, "step": 2974 }, { "epoch": 0.4145474813627813, "grad_norm": 1.1537262201309204, "learning_rate": 6.994495773553056e-06, "loss": 0.08371639251708984, "step": 2975 }, { "epoch": 0.4146868250539957, "grad_norm": 0.49985402822494507, "learning_rate": 6.992334393681008e-06, "loss": 0.07790851593017578, "step": 2976 }, { "epoch": 0.41482616874521006, "grad_norm": 0.47355151176452637, "learning_rate": 6.990172571157744e-06, "loss": 0.06524848937988281, "step": 2977 }, { "epoch": 0.41496551243642443, "grad_norm": 1.1466578245162964, "learning_rate": 6.988010306463571e-06, "loss": 0.10468101501464844, "step": 2978 }, { "epoch": 0.4151048561276388, "grad_norm": 0.8260097503662109, "learning_rate": 6.985847600078894e-06, "loss": 0.1029214859008789, "step": 2979 }, { "epoch": 0.4152441998188532, "grad_norm": 0.7682439684867859, "learning_rate": 6.98368445248422e-06, "loss": 0.09339261054992676, "step": 2980 }, { "epoch": 0.41538354351006757, "grad_norm": 0.6741361021995544, "learning_rate": 6.981520864160147e-06, "loss": 0.07949256896972656, "step": 2981 }, { "epoch": 0.41552288720128194, "grad_norm": 1.100008249282837, "learning_rate": 6.979356835587377e-06, "loss": 0.08202457427978516, "step": 2982 }, { "epoch": 0.4156622308924963, "grad_norm": 0.6036608219146729, "learning_rate": 6.977192367246709e-06, "loss": 0.07926177978515625, "step": 2983 }, { "epoch": 0.4158015745837107, "grad_norm": 0.9015666246414185, "learning_rate": 6.9750274596190344e-06, "loss": 0.07030296325683594, "step": 2984 }, { "epoch": 0.4159409182749251, "grad_norm": 0.6818307638168335, "learning_rate": 6.972862113185353e-06, "loss": 0.08598041534423828, "step": 2985 }, { "epoch": 0.4160802619661395, "grad_norm": 0.6697362065315247, "learning_rate": 6.970696328426749e-06, "loss": 0.08743572235107422, "step": 2986 }, { "epoch": 0.4162196056573539, "grad_norm": 0.6367471218109131, "learning_rate": 6.968530105824413e-06, "loss": 0.0792388916015625, "step": 2987 }, { "epoch": 0.41635894934856826, "grad_norm": 0.826650083065033, "learning_rate": 6.966363445859629e-06, "loss": 0.06305694580078125, "step": 2988 }, { "epoch": 0.41649829303978264, "grad_norm": 0.9692680835723877, "learning_rate": 6.96419634901378e-06, "loss": 0.08425426483154297, "step": 2989 }, { "epoch": 0.416637636730997, "grad_norm": 0.5286574959754944, "learning_rate": 6.962028815768347e-06, "loss": 0.07798576354980469, "step": 2990 }, { "epoch": 0.4167769804222114, "grad_norm": 0.5382439494132996, "learning_rate": 6.959860846604903e-06, "loss": 0.07293319702148438, "step": 2991 }, { "epoch": 0.4169163241134258, "grad_norm": 0.7994220852851868, "learning_rate": 6.957692442005126e-06, "loss": 0.08598709106445312, "step": 2992 }, { "epoch": 0.41705566780464015, "grad_norm": 0.8613064885139465, "learning_rate": 6.95552360245078e-06, "loss": 0.08604717254638672, "step": 2993 }, { "epoch": 0.41719501149585453, "grad_norm": 0.4583999216556549, "learning_rate": 6.953354328423737e-06, "loss": 0.061309814453125, "step": 2994 }, { "epoch": 0.4173343551870689, "grad_norm": 1.4307650327682495, "learning_rate": 6.951184620405958e-06, "loss": 0.08209896087646484, "step": 2995 }, { "epoch": 0.4174736988782833, "grad_norm": 1.8159009218215942, "learning_rate": 6.949014478879502e-06, "loss": 0.08646678924560547, "step": 2996 }, { "epoch": 0.41761304256949766, "grad_norm": 0.9370650053024292, "learning_rate": 6.946843904326527e-06, "loss": 0.09153175354003906, "step": 2997 }, { "epoch": 0.41775238626071204, "grad_norm": 0.6378462910652161, "learning_rate": 6.944672897229282e-06, "loss": 0.06887531280517578, "step": 2998 }, { "epoch": 0.4178917299519264, "grad_norm": 0.7347134351730347, "learning_rate": 6.942501458070117e-06, "loss": 0.07088470458984375, "step": 2999 }, { "epoch": 0.4180310736431408, "grad_norm": 0.5475161671638489, "learning_rate": 6.940329587331477e-06, "loss": 0.07719802856445312, "step": 3000 }, { "epoch": 0.41817041733435517, "grad_norm": 0.6322544813156128, "learning_rate": 6.938157285495901e-06, "loss": 0.068359375, "step": 3001 }, { "epoch": 0.41830976102556955, "grad_norm": 0.41088175773620605, "learning_rate": 6.935984553046025e-06, "loss": 0.060654640197753906, "step": 3002 }, { "epoch": 0.4184491047167839, "grad_norm": 0.6488860845565796, "learning_rate": 6.93381139046458e-06, "loss": 0.07336044311523438, "step": 3003 }, { "epoch": 0.4185884484079983, "grad_norm": 0.447714626789093, "learning_rate": 6.931637798234394e-06, "loss": 0.059012413024902344, "step": 3004 }, { "epoch": 0.4187277920992127, "grad_norm": 0.8562869429588318, "learning_rate": 6.929463776838389e-06, "loss": 0.08748722076416016, "step": 3005 }, { "epoch": 0.4188671357904271, "grad_norm": 0.6294808983802795, "learning_rate": 6.927289326759585e-06, "loss": 0.08676719665527344, "step": 3006 }, { "epoch": 0.4190064794816415, "grad_norm": 0.8793321847915649, "learning_rate": 6.925114448481089e-06, "loss": 0.08039188385009766, "step": 3007 }, { "epoch": 0.41914582317285587, "grad_norm": 1.3344112634658813, "learning_rate": 6.922939142486118e-06, "loss": 0.0869150161743164, "step": 3008 }, { "epoch": 0.41928516686407025, "grad_norm": 0.7149291634559631, "learning_rate": 6.9207634092579686e-06, "loss": 0.08502006530761719, "step": 3009 }, { "epoch": 0.4194245105552846, "grad_norm": 0.7433922290802002, "learning_rate": 6.9185872492800434e-06, "loss": 0.08507537841796875, "step": 3010 }, { "epoch": 0.419563854246499, "grad_norm": 0.5819532871246338, "learning_rate": 6.916410663035832e-06, "loss": 0.08195877075195312, "step": 3011 }, { "epoch": 0.4197031979377134, "grad_norm": 0.8373497128486633, "learning_rate": 6.9142336510089235e-06, "loss": 0.0800924301147461, "step": 3012 }, { "epoch": 0.41984254162892776, "grad_norm": 0.7701376676559448, "learning_rate": 6.912056213683001e-06, "loss": 0.07958793640136719, "step": 3013 }, { "epoch": 0.41998188532014213, "grad_norm": 0.6436020135879517, "learning_rate": 6.909878351541841e-06, "loss": 0.06303787231445312, "step": 3014 }, { "epoch": 0.4201212290113565, "grad_norm": 0.8520702123641968, "learning_rate": 6.907700065069315e-06, "loss": 0.08630657196044922, "step": 3015 }, { "epoch": 0.4202605727025709, "grad_norm": 1.040088415145874, "learning_rate": 6.905521354749387e-06, "loss": 0.08013629913330078, "step": 3016 }, { "epoch": 0.42039991639378527, "grad_norm": 0.70517498254776, "learning_rate": 6.90334222106612e-06, "loss": 0.06510734558105469, "step": 3017 }, { "epoch": 0.42053926008499964, "grad_norm": 0.6179289221763611, "learning_rate": 6.901162664503662e-06, "loss": 0.0879669189453125, "step": 3018 }, { "epoch": 0.420678603776214, "grad_norm": 0.5998061299324036, "learning_rate": 6.898982685546267e-06, "loss": 0.07944393157958984, "step": 3019 }, { "epoch": 0.4208179474674284, "grad_norm": 0.7825337648391724, "learning_rate": 6.896802284678273e-06, "loss": 0.09334754943847656, "step": 3020 }, { "epoch": 0.4209572911586428, "grad_norm": 0.5720078945159912, "learning_rate": 6.894621462384116e-06, "loss": 0.07258987426757812, "step": 3021 }, { "epoch": 0.42109663484985715, "grad_norm": 1.1510238647460938, "learning_rate": 6.8924402191483245e-06, "loss": 0.091644287109375, "step": 3022 }, { "epoch": 0.42123597854107153, "grad_norm": 0.8085497617721558, "learning_rate": 6.890258555455521e-06, "loss": 0.08062171936035156, "step": 3023 }, { "epoch": 0.4213753222322859, "grad_norm": 1.0582088232040405, "learning_rate": 6.888076471790423e-06, "loss": 0.12145042419433594, "step": 3024 }, { "epoch": 0.4215146659235003, "grad_norm": 1.1024531126022339, "learning_rate": 6.8858939686378376e-06, "loss": 0.10711002349853516, "step": 3025 }, { "epoch": 0.4216540096147147, "grad_norm": 0.8404805660247803, "learning_rate": 6.8837110464826685e-06, "loss": 0.0902872085571289, "step": 3026 }, { "epoch": 0.4217933533059291, "grad_norm": 1.0422812700271606, "learning_rate": 6.881527705809912e-06, "loss": 0.10052776336669922, "step": 3027 }, { "epoch": 0.4219326969971435, "grad_norm": 1.0814855098724365, "learning_rate": 6.879343947104653e-06, "loss": 0.09438705444335938, "step": 3028 }, { "epoch": 0.42207204068835785, "grad_norm": 0.6333077549934387, "learning_rate": 6.8771597708520766e-06, "loss": 0.0880126953125, "step": 3029 }, { "epoch": 0.42221138437957223, "grad_norm": 0.6474775075912476, "learning_rate": 6.874975177537455e-06, "loss": 0.07411456108093262, "step": 3030 }, { "epoch": 0.4223507280707866, "grad_norm": 1.0163593292236328, "learning_rate": 6.872790167646155e-06, "loss": 0.09297370910644531, "step": 3031 }, { "epoch": 0.422490071762001, "grad_norm": 0.6569063663482666, "learning_rate": 6.870604741663638e-06, "loss": 0.08534526824951172, "step": 3032 }, { "epoch": 0.42262941545321536, "grad_norm": 0.5298543572425842, "learning_rate": 6.868418900075452e-06, "loss": 0.06488037109375, "step": 3033 }, { "epoch": 0.42276875914442974, "grad_norm": 0.6381168961524963, "learning_rate": 6.866232643367243e-06, "loss": 0.07095718383789062, "step": 3034 }, { "epoch": 0.4229081028356441, "grad_norm": 0.3715987503528595, "learning_rate": 6.864045972024749e-06, "loss": 0.06310462951660156, "step": 3035 }, { "epoch": 0.4230474465268585, "grad_norm": 0.4678328335285187, "learning_rate": 6.861858886533796e-06, "loss": 0.06642675399780273, "step": 3036 }, { "epoch": 0.4231867902180729, "grad_norm": 0.7563794255256653, "learning_rate": 6.859671387380307e-06, "loss": 0.1059713363647461, "step": 3037 }, { "epoch": 0.42332613390928725, "grad_norm": 0.7171308398246765, "learning_rate": 6.85748347505029e-06, "loss": 0.07016372680664062, "step": 3038 }, { "epoch": 0.4234654776005016, "grad_norm": 0.5968902111053467, "learning_rate": 6.855295150029853e-06, "loss": 0.08395671844482422, "step": 3039 }, { "epoch": 0.423604821291716, "grad_norm": 0.5038372278213501, "learning_rate": 6.853106412805192e-06, "loss": 0.06415176391601562, "step": 3040 }, { "epoch": 0.4237441649829304, "grad_norm": 0.7977039813995361, "learning_rate": 6.850917263862591e-06, "loss": 0.08115243911743164, "step": 3041 }, { "epoch": 0.42388350867414476, "grad_norm": 1.0491735935211182, "learning_rate": 6.848727703688432e-06, "loss": 0.08902931213378906, "step": 3042 }, { "epoch": 0.42402285236535914, "grad_norm": 1.1796873807907104, "learning_rate": 6.846537732769185e-06, "loss": 0.09755754470825195, "step": 3043 }, { "epoch": 0.4241621960565735, "grad_norm": 0.8574014902114868, "learning_rate": 6.8443473515914105e-06, "loss": 0.09236717224121094, "step": 3044 }, { "epoch": 0.4243015397477879, "grad_norm": 0.7041814923286438, "learning_rate": 6.842156560641762e-06, "loss": 0.07693767547607422, "step": 3045 }, { "epoch": 0.4244408834390023, "grad_norm": 0.5137086510658264, "learning_rate": 6.839965360406983e-06, "loss": 0.07677936553955078, "step": 3046 }, { "epoch": 0.4245802271302167, "grad_norm": 0.6012682914733887, "learning_rate": 6.837773751373908e-06, "loss": 0.08092308044433594, "step": 3047 }, { "epoch": 0.4247195708214311, "grad_norm": 1.151564121246338, "learning_rate": 6.835581734029462e-06, "loss": 0.09309577941894531, "step": 3048 }, { "epoch": 0.42485891451264546, "grad_norm": 0.6360094547271729, "learning_rate": 6.833389308860662e-06, "loss": 0.06522083282470703, "step": 3049 }, { "epoch": 0.42499825820385984, "grad_norm": 0.6395101547241211, "learning_rate": 6.831196476354615e-06, "loss": 0.07694602012634277, "step": 3050 }, { "epoch": 0.4251376018950742, "grad_norm": 1.4679028987884521, "learning_rate": 6.829003236998517e-06, "loss": 0.09033393859863281, "step": 3051 }, { "epoch": 0.4252769455862886, "grad_norm": 0.7321866750717163, "learning_rate": 6.8268095912796574e-06, "loss": 0.0693960189819336, "step": 3052 }, { "epoch": 0.42541628927750297, "grad_norm": 0.49836263060569763, "learning_rate": 6.824615539685413e-06, "loss": 0.06913280487060547, "step": 3053 }, { "epoch": 0.42555563296871735, "grad_norm": 0.969355583190918, "learning_rate": 6.822421082703253e-06, "loss": 0.09618616104125977, "step": 3054 }, { "epoch": 0.4256949766599317, "grad_norm": 0.9440403580665588, "learning_rate": 6.820226220820733e-06, "loss": 0.07970237731933594, "step": 3055 }, { "epoch": 0.4258343203511461, "grad_norm": 0.9659737348556519, "learning_rate": 6.818030954525505e-06, "loss": 0.07614803314208984, "step": 3056 }, { "epoch": 0.4259736640423605, "grad_norm": 1.0719571113586426, "learning_rate": 6.815835284305304e-06, "loss": 0.08660316467285156, "step": 3057 }, { "epoch": 0.42611300773357486, "grad_norm": 0.587699830532074, "learning_rate": 6.8136392106479624e-06, "loss": 0.07800102233886719, "step": 3058 }, { "epoch": 0.42625235142478923, "grad_norm": 0.9106557369232178, "learning_rate": 6.81144273404139e-06, "loss": 0.07523536682128906, "step": 3059 }, { "epoch": 0.4263916951160036, "grad_norm": 0.7552955746650696, "learning_rate": 6.8092458549736e-06, "loss": 0.07344818115234375, "step": 3060 }, { "epoch": 0.426531038807218, "grad_norm": 0.931777834892273, "learning_rate": 6.807048573932687e-06, "loss": 0.08661270141601562, "step": 3061 }, { "epoch": 0.42667038249843237, "grad_norm": 0.806816041469574, "learning_rate": 6.8048508914068355e-06, "loss": 0.09136581420898438, "step": 3062 }, { "epoch": 0.42680972618964674, "grad_norm": 0.8426628112792969, "learning_rate": 6.802652807884322e-06, "loss": 0.09145069122314453, "step": 3063 }, { "epoch": 0.4269490698808611, "grad_norm": 0.7065972089767456, "learning_rate": 6.80045432385351e-06, "loss": 0.06927680969238281, "step": 3064 }, { "epoch": 0.4270884135720755, "grad_norm": 0.6251782178878784, "learning_rate": 6.798255439802852e-06, "loss": 0.07511711120605469, "step": 3065 }, { "epoch": 0.42722775726328993, "grad_norm": 0.6684771776199341, "learning_rate": 6.796056156220892e-06, "loss": 0.07032489776611328, "step": 3066 }, { "epoch": 0.4273671009545043, "grad_norm": 0.7632026076316833, "learning_rate": 6.793856473596256e-06, "loss": 0.08192920684814453, "step": 3067 }, { "epoch": 0.4275064446457187, "grad_norm": 0.7180680632591248, "learning_rate": 6.791656392417666e-06, "loss": 0.07861137390136719, "step": 3068 }, { "epoch": 0.42764578833693306, "grad_norm": 0.5027080774307251, "learning_rate": 6.789455913173933e-06, "loss": 0.053189754486083984, "step": 3069 }, { "epoch": 0.42778513202814744, "grad_norm": 0.9529245495796204, "learning_rate": 6.787255036353947e-06, "loss": 0.08438873291015625, "step": 3070 }, { "epoch": 0.4279244757193618, "grad_norm": 0.7170193791389465, "learning_rate": 6.785053762446696e-06, "loss": 0.08291196823120117, "step": 3071 }, { "epoch": 0.4280638194105762, "grad_norm": 0.6450795531272888, "learning_rate": 6.782852091941254e-06, "loss": 0.07283401489257812, "step": 3072 }, { "epoch": 0.4282031631017906, "grad_norm": 0.8589430451393127, "learning_rate": 6.780650025326778e-06, "loss": 0.088287353515625, "step": 3073 }, { "epoch": 0.42834250679300495, "grad_norm": 0.6338161826133728, "learning_rate": 6.778447563092523e-06, "loss": 0.07675552368164062, "step": 3074 }, { "epoch": 0.42848185048421933, "grad_norm": 0.586016058921814, "learning_rate": 6.776244705727818e-06, "loss": 0.09239006042480469, "step": 3075 }, { "epoch": 0.4286211941754337, "grad_norm": 0.7851543426513672, "learning_rate": 6.774041453722093e-06, "loss": 0.10338973999023438, "step": 3076 }, { "epoch": 0.4287605378666481, "grad_norm": 0.9215417504310608, "learning_rate": 6.771837807564861e-06, "loss": 0.08988380432128906, "step": 3077 }, { "epoch": 0.42889988155786246, "grad_norm": 0.6304561495780945, "learning_rate": 6.769633767745718e-06, "loss": 0.076141357421875, "step": 3078 }, { "epoch": 0.42903922524907684, "grad_norm": 0.6290774941444397, "learning_rate": 6.767429334754354e-06, "loss": 0.07617568969726562, "step": 3079 }, { "epoch": 0.4291785689402912, "grad_norm": 0.9014177322387695, "learning_rate": 6.7652245090805426e-06, "loss": 0.10122013092041016, "step": 3080 }, { "epoch": 0.4293179126315056, "grad_norm": 0.7429930567741394, "learning_rate": 6.763019291214146e-06, "loss": 0.09566307067871094, "step": 3081 }, { "epoch": 0.42945725632271997, "grad_norm": 0.7840132117271423, "learning_rate": 6.760813681645114e-06, "loss": 0.07908248901367188, "step": 3082 }, { "epoch": 0.42959660001393435, "grad_norm": 0.93851238489151, "learning_rate": 6.758607680863481e-06, "loss": 0.10204601287841797, "step": 3083 }, { "epoch": 0.4297359437051487, "grad_norm": 0.83607017993927, "learning_rate": 6.756401289359371e-06, "loss": 0.08829784393310547, "step": 3084 }, { "epoch": 0.4298752873963631, "grad_norm": 0.5937705039978027, "learning_rate": 6.754194507622995e-06, "loss": 0.07111597061157227, "step": 3085 }, { "epoch": 0.43001463108757754, "grad_norm": 0.8045179843902588, "learning_rate": 6.7519873361446475e-06, "loss": 0.0759735107421875, "step": 3086 }, { "epoch": 0.4301539747787919, "grad_norm": 0.8012644648551941, "learning_rate": 6.7497797754147134e-06, "loss": 0.07374000549316406, "step": 3087 }, { "epoch": 0.4302933184700063, "grad_norm": 0.6092119216918945, "learning_rate": 6.74757182592366e-06, "loss": 0.06532907485961914, "step": 3088 }, { "epoch": 0.43043266216122067, "grad_norm": 0.8954304456710815, "learning_rate": 6.7453634881620445e-06, "loss": 0.08477401733398438, "step": 3089 }, { "epoch": 0.43057200585243505, "grad_norm": 0.5707760453224182, "learning_rate": 6.743154762620511e-06, "loss": 0.06928253173828125, "step": 3090 }, { "epoch": 0.4307113495436494, "grad_norm": 0.9167419672012329, "learning_rate": 6.740945649789784e-06, "loss": 0.08063316345214844, "step": 3091 }, { "epoch": 0.4308506932348638, "grad_norm": 1.0337632894515991, "learning_rate": 6.738736150160681e-06, "loss": 0.11199283599853516, "step": 3092 }, { "epoch": 0.4309900369260782, "grad_norm": 0.6607641577720642, "learning_rate": 6.736526264224101e-06, "loss": 0.0762929916381836, "step": 3093 }, { "epoch": 0.43112938061729256, "grad_norm": 0.355338454246521, "learning_rate": 6.734315992471032e-06, "loss": 0.058083534240722656, "step": 3094 }, { "epoch": 0.43126872430850693, "grad_norm": 0.8652171492576599, "learning_rate": 6.7321053353925446e-06, "loss": 0.08064556121826172, "step": 3095 }, { "epoch": 0.4314080679997213, "grad_norm": 0.6818732023239136, "learning_rate": 6.729894293479795e-06, "loss": 0.08481979370117188, "step": 3096 }, { "epoch": 0.4315474116909357, "grad_norm": 0.7986383438110352, "learning_rate": 6.727682867224028e-06, "loss": 0.07662248611450195, "step": 3097 }, { "epoch": 0.43168675538215007, "grad_norm": 0.5852009057998657, "learning_rate": 6.725471057116573e-06, "loss": 0.08185291290283203, "step": 3098 }, { "epoch": 0.43182609907336444, "grad_norm": 0.9939224720001221, "learning_rate": 6.723258863648841e-06, "loss": 0.10525941848754883, "step": 3099 }, { "epoch": 0.4319654427645788, "grad_norm": 1.2233469486236572, "learning_rate": 6.72104628731233e-06, "loss": 0.09433650970458984, "step": 3100 }, { "epoch": 0.4321047864557932, "grad_norm": 0.8909695148468018, "learning_rate": 6.718833328598629e-06, "loss": 0.08532238006591797, "step": 3101 }, { "epoch": 0.4322441301470076, "grad_norm": 0.6537291407585144, "learning_rate": 6.716619987999404e-06, "loss": 0.08132457733154297, "step": 3102 }, { "epoch": 0.43238347383822195, "grad_norm": 0.6431054472923279, "learning_rate": 6.714406266006408e-06, "loss": 0.07623767852783203, "step": 3103 }, { "epoch": 0.43252281752943633, "grad_norm": 1.1952930688858032, "learning_rate": 6.712192163111481e-06, "loss": 0.10379600524902344, "step": 3104 }, { "epoch": 0.4326621612206507, "grad_norm": 0.8757791519165039, "learning_rate": 6.709977679806543e-06, "loss": 0.08118247985839844, "step": 3105 }, { "epoch": 0.43280150491186514, "grad_norm": 1.1639684438705444, "learning_rate": 6.707762816583608e-06, "loss": 0.09332275390625, "step": 3106 }, { "epoch": 0.4329408486030795, "grad_norm": 0.9047536849975586, "learning_rate": 6.705547573934759e-06, "loss": 0.0848383903503418, "step": 3107 }, { "epoch": 0.4330801922942939, "grad_norm": 0.9215735793113708, "learning_rate": 6.703331952352181e-06, "loss": 0.0946798324584961, "step": 3108 }, { "epoch": 0.4332195359855083, "grad_norm": 1.1571722030639648, "learning_rate": 6.70111595232813e-06, "loss": 0.10495948791503906, "step": 3109 }, { "epoch": 0.43335887967672265, "grad_norm": 0.867156982421875, "learning_rate": 6.6988995743549516e-06, "loss": 0.06875896453857422, "step": 3110 }, { "epoch": 0.43349822336793703, "grad_norm": 0.8638577461242676, "learning_rate": 6.696682818925074e-06, "loss": 0.0819244384765625, "step": 3111 }, { "epoch": 0.4336375670591514, "grad_norm": 1.0007705688476562, "learning_rate": 6.694465686531011e-06, "loss": 0.07719802856445312, "step": 3112 }, { "epoch": 0.4337769107503658, "grad_norm": 0.7456021904945374, "learning_rate": 6.692248177665357e-06, "loss": 0.08669471740722656, "step": 3113 }, { "epoch": 0.43391625444158016, "grad_norm": 0.6029819250106812, "learning_rate": 6.690030292820792e-06, "loss": 0.06412696838378906, "step": 3114 }, { "epoch": 0.43405559813279454, "grad_norm": 2.014794111251831, "learning_rate": 6.687812032490081e-06, "loss": 0.07940673828125, "step": 3115 }, { "epoch": 0.4341949418240089, "grad_norm": 1.3015047311782837, "learning_rate": 6.685593397166069e-06, "loss": 0.08350181579589844, "step": 3116 }, { "epoch": 0.4343342855152233, "grad_norm": 1.1551671028137207, "learning_rate": 6.683374387341688e-06, "loss": 0.07032299041748047, "step": 3117 }, { "epoch": 0.4344736292064377, "grad_norm": 0.7459808588027954, "learning_rate": 6.681155003509949e-06, "loss": 0.07114219665527344, "step": 3118 }, { "epoch": 0.43461297289765205, "grad_norm": 0.8245290517807007, "learning_rate": 6.67893524616395e-06, "loss": 0.09227752685546875, "step": 3119 }, { "epoch": 0.4347523165888664, "grad_norm": 1.1206821203231812, "learning_rate": 6.67671511579687e-06, "loss": 0.0857243537902832, "step": 3120 }, { "epoch": 0.4348916602800808, "grad_norm": 1.1534602642059326, "learning_rate": 6.67449461290197e-06, "loss": 0.08252954483032227, "step": 3121 }, { "epoch": 0.4350310039712952, "grad_norm": 0.706976592540741, "learning_rate": 6.6722737379726e-06, "loss": 0.05933856964111328, "step": 3122 }, { "epoch": 0.43517034766250956, "grad_norm": 1.2259262800216675, "learning_rate": 6.670052491502182e-06, "loss": 0.08867740631103516, "step": 3123 }, { "epoch": 0.43530969135372394, "grad_norm": 0.7474872469902039, "learning_rate": 6.667830873984228e-06, "loss": 0.09455585479736328, "step": 3124 }, { "epoch": 0.4354490350449383, "grad_norm": 1.1541780233383179, "learning_rate": 6.66560888591233e-06, "loss": 0.07726097106933594, "step": 3125 }, { "epoch": 0.43558837873615275, "grad_norm": 1.001387357711792, "learning_rate": 6.663386527780166e-06, "loss": 0.09088516235351562, "step": 3126 }, { "epoch": 0.4357277224273671, "grad_norm": 0.6135890483856201, "learning_rate": 6.66116380008149e-06, "loss": 0.08394050598144531, "step": 3127 }, { "epoch": 0.4358670661185815, "grad_norm": 1.5328364372253418, "learning_rate": 6.6589407033101435e-06, "loss": 0.12249946594238281, "step": 3128 }, { "epoch": 0.4360064098097959, "grad_norm": 0.8512507677078247, "learning_rate": 6.656717237960047e-06, "loss": 0.068634033203125, "step": 3129 }, { "epoch": 0.43614575350101026, "grad_norm": 0.9704003930091858, "learning_rate": 6.654493404525204e-06, "loss": 0.07561111450195312, "step": 3130 }, { "epoch": 0.43628509719222464, "grad_norm": 0.6745249629020691, "learning_rate": 6.652269203499699e-06, "loss": 0.08096694946289062, "step": 3131 }, { "epoch": 0.436424440883439, "grad_norm": 0.8905719518661499, "learning_rate": 6.650044635377698e-06, "loss": 0.08404040336608887, "step": 3132 }, { "epoch": 0.4365637845746534, "grad_norm": 1.2303454875946045, "learning_rate": 6.64781970065345e-06, "loss": 0.09824180603027344, "step": 3133 }, { "epoch": 0.43670312826586777, "grad_norm": 0.42483505606651306, "learning_rate": 6.645594399821286e-06, "loss": 0.06754779815673828, "step": 3134 }, { "epoch": 0.43684247195708215, "grad_norm": 0.6599994897842407, "learning_rate": 6.6433687333756165e-06, "loss": 0.06789731979370117, "step": 3135 }, { "epoch": 0.4369818156482965, "grad_norm": 0.5889401435852051, "learning_rate": 6.641142701810932e-06, "loss": 0.06152534484863281, "step": 3136 }, { "epoch": 0.4371211593395109, "grad_norm": 1.047994613647461, "learning_rate": 6.638916305621807e-06, "loss": 0.07145071029663086, "step": 3137 }, { "epoch": 0.4372605030307253, "grad_norm": 0.6909381747245789, "learning_rate": 6.636689545302898e-06, "loss": 0.0887155532836914, "step": 3138 }, { "epoch": 0.43739984672193966, "grad_norm": 0.7014615535736084, "learning_rate": 6.634462421348935e-06, "loss": 0.09127140045166016, "step": 3139 }, { "epoch": 0.43753919041315403, "grad_norm": 1.0635871887207031, "learning_rate": 6.63223493425474e-06, "loss": 0.10464763641357422, "step": 3140 }, { "epoch": 0.4376785341043684, "grad_norm": 0.8844901323318481, "learning_rate": 6.630007084515205e-06, "loss": 0.08255863189697266, "step": 3141 }, { "epoch": 0.4378178777955828, "grad_norm": 0.5250558853149414, "learning_rate": 6.627778872625311e-06, "loss": 0.06389379501342773, "step": 3142 }, { "epoch": 0.43795722148679717, "grad_norm": 1.0578190088272095, "learning_rate": 6.625550299080115e-06, "loss": 0.09218692779541016, "step": 3143 }, { "epoch": 0.43809656517801154, "grad_norm": 0.33362701535224915, "learning_rate": 6.6233213643747525e-06, "loss": 0.050011634826660156, "step": 3144 }, { "epoch": 0.4382359088692259, "grad_norm": 1.061120867729187, "learning_rate": 6.621092069004445e-06, "loss": 0.09653747081756592, "step": 3145 }, { "epoch": 0.4383752525604403, "grad_norm": 0.5856987237930298, "learning_rate": 6.618862413464491e-06, "loss": 0.06624448299407959, "step": 3146 }, { "epoch": 0.43851459625165473, "grad_norm": 0.7378026843070984, "learning_rate": 6.616632398250266e-06, "loss": 0.10072708129882812, "step": 3147 }, { "epoch": 0.4386539399428691, "grad_norm": 0.5301852822303772, "learning_rate": 6.614402023857231e-06, "loss": 0.0674891471862793, "step": 3148 }, { "epoch": 0.4387932836340835, "grad_norm": 0.7278788089752197, "learning_rate": 6.612171290780925e-06, "loss": 0.0648202896118164, "step": 3149 }, { "epoch": 0.43893262732529786, "grad_norm": 0.5476402640342712, "learning_rate": 6.6099401995169635e-06, "loss": 0.07857799530029297, "step": 3150 }, { "epoch": 0.43907197101651224, "grad_norm": 0.9637129306793213, "learning_rate": 6.607708750561046e-06, "loss": 0.08674335479736328, "step": 3151 }, { "epoch": 0.4392113147077266, "grad_norm": 0.7283115983009338, "learning_rate": 6.605476944408948e-06, "loss": 0.0765533447265625, "step": 3152 }, { "epoch": 0.439350658398941, "grad_norm": 1.0364658832550049, "learning_rate": 6.603244781556527e-06, "loss": 0.08865928649902344, "step": 3153 }, { "epoch": 0.4394900020901554, "grad_norm": 0.6467538475990295, "learning_rate": 6.601012262499718e-06, "loss": 0.07516908645629883, "step": 3154 }, { "epoch": 0.43962934578136975, "grad_norm": 1.6145538091659546, "learning_rate": 6.598779387734535e-06, "loss": 0.09865188598632812, "step": 3155 }, { "epoch": 0.43976868947258413, "grad_norm": 1.0144835710525513, "learning_rate": 6.596546157757075e-06, "loss": 0.08391857147216797, "step": 3156 }, { "epoch": 0.4399080331637985, "grad_norm": 0.8712371587753296, "learning_rate": 6.594312573063506e-06, "loss": 0.08791446685791016, "step": 3157 }, { "epoch": 0.4400473768550129, "grad_norm": 0.765942394733429, "learning_rate": 6.592078634150084e-06, "loss": 0.09020042419433594, "step": 3158 }, { "epoch": 0.44018672054622726, "grad_norm": 0.48719748854637146, "learning_rate": 6.589844341513137e-06, "loss": 0.0678548812866211, "step": 3159 }, { "epoch": 0.44032606423744164, "grad_norm": 0.5158800482749939, "learning_rate": 6.587609695649073e-06, "loss": 0.061443328857421875, "step": 3160 }, { "epoch": 0.440465407928656, "grad_norm": 1.5243297815322876, "learning_rate": 6.585374697054382e-06, "loss": 0.11088180541992188, "step": 3161 }, { "epoch": 0.4406047516198704, "grad_norm": 0.8292446732521057, "learning_rate": 6.583139346225627e-06, "loss": 0.06672191619873047, "step": 3162 }, { "epoch": 0.44074409531108477, "grad_norm": 0.9304513931274414, "learning_rate": 6.580903643659453e-06, "loss": 0.0765371322631836, "step": 3163 }, { "epoch": 0.44088343900229915, "grad_norm": 0.6835747361183167, "learning_rate": 6.578667589852583e-06, "loss": 0.08311843872070312, "step": 3164 }, { "epoch": 0.4410227826935135, "grad_norm": 0.6250916123390198, "learning_rate": 6.576431185301815e-06, "loss": 0.08348941802978516, "step": 3165 }, { "epoch": 0.4411621263847279, "grad_norm": 0.6463056206703186, "learning_rate": 6.574194430504027e-06, "loss": 0.0711662769317627, "step": 3166 }, { "epoch": 0.44130147007594234, "grad_norm": 1.2170354127883911, "learning_rate": 6.571957325956178e-06, "loss": 0.11347246170043945, "step": 3167 }, { "epoch": 0.4414408137671567, "grad_norm": 1.0239999294281006, "learning_rate": 6.569719872155299e-06, "loss": 0.08858966827392578, "step": 3168 }, { "epoch": 0.4415801574583711, "grad_norm": 0.7754956483840942, "learning_rate": 6.567482069598503e-06, "loss": 0.08350276947021484, "step": 3169 }, { "epoch": 0.44171950114958547, "grad_norm": 0.7317479252815247, "learning_rate": 6.565243918782975e-06, "loss": 0.09045982360839844, "step": 3170 }, { "epoch": 0.44185884484079985, "grad_norm": 0.8547337651252747, "learning_rate": 6.563005420205984e-06, "loss": 0.057132720947265625, "step": 3171 }, { "epoch": 0.4419981885320142, "grad_norm": 0.7175488471984863, "learning_rate": 6.560766574364874e-06, "loss": 0.07843017578125, "step": 3172 }, { "epoch": 0.4421375322232286, "grad_norm": 0.8411827087402344, "learning_rate": 6.558527381757063e-06, "loss": 0.1046609878540039, "step": 3173 }, { "epoch": 0.442276875914443, "grad_norm": 0.80977863073349, "learning_rate": 6.55628784288005e-06, "loss": 0.08076763153076172, "step": 3174 }, { "epoch": 0.44241621960565736, "grad_norm": 0.6906711459159851, "learning_rate": 6.5540479582314085e-06, "loss": 0.09616470336914062, "step": 3175 }, { "epoch": 0.44255556329687173, "grad_norm": 0.5969390273094177, "learning_rate": 6.55180772830879e-06, "loss": 0.06702899932861328, "step": 3176 }, { "epoch": 0.4426949069880861, "grad_norm": 0.9172763824462891, "learning_rate": 6.5495671536099235e-06, "loss": 0.0723886489868164, "step": 3177 }, { "epoch": 0.4428342506793005, "grad_norm": 0.788365364074707, "learning_rate": 6.5473262346326125e-06, "loss": 0.0795745849609375, "step": 3178 }, { "epoch": 0.44297359437051487, "grad_norm": 0.6686076521873474, "learning_rate": 6.545084971874738e-06, "loss": 0.08573055267333984, "step": 3179 }, { "epoch": 0.44311293806172924, "grad_norm": 0.9041802287101746, "learning_rate": 6.542843365834257e-06, "loss": 0.08597564697265625, "step": 3180 }, { "epoch": 0.4432522817529436, "grad_norm": 0.7440356612205505, "learning_rate": 6.540601417009205e-06, "loss": 0.06814932823181152, "step": 3181 }, { "epoch": 0.443391625444158, "grad_norm": 0.9466443657875061, "learning_rate": 6.538359125897691e-06, "loss": 0.09373664855957031, "step": 3182 }, { "epoch": 0.4435309691353724, "grad_norm": 0.9222822189331055, "learning_rate": 6.536116492997899e-06, "loss": 0.08848381042480469, "step": 3183 }, { "epoch": 0.44367031282658675, "grad_norm": 0.9628097414970398, "learning_rate": 6.5338735188080916e-06, "loss": 0.08937740325927734, "step": 3184 }, { "epoch": 0.44380965651780113, "grad_norm": 0.9724774956703186, "learning_rate": 6.53163020382661e-06, "loss": 0.09633445739746094, "step": 3185 }, { "epoch": 0.4439490002090155, "grad_norm": 1.2569280862808228, "learning_rate": 6.529386548551864e-06, "loss": 0.089813232421875, "step": 3186 }, { "epoch": 0.44408834390022994, "grad_norm": 0.49368175864219666, "learning_rate": 6.5271425534823415e-06, "loss": 0.06976699829101562, "step": 3187 }, { "epoch": 0.4442276875914443, "grad_norm": 0.5870642066001892, "learning_rate": 6.524898219116612e-06, "loss": 0.06862449645996094, "step": 3188 }, { "epoch": 0.4443670312826587, "grad_norm": 1.0896267890930176, "learning_rate": 6.522653545953309e-06, "loss": 0.08896446228027344, "step": 3189 }, { "epoch": 0.4445063749738731, "grad_norm": 0.5256476402282715, "learning_rate": 6.520408534491154e-06, "loss": 0.06798505783081055, "step": 3190 }, { "epoch": 0.44464571866508745, "grad_norm": 0.8971378803253174, "learning_rate": 6.518163185228932e-06, "loss": 0.08147239685058594, "step": 3191 }, { "epoch": 0.44478506235630183, "grad_norm": 1.510532259941101, "learning_rate": 6.515917498665511e-06, "loss": 0.11366081237792969, "step": 3192 }, { "epoch": 0.4449244060475162, "grad_norm": 0.47813957929611206, "learning_rate": 6.51367147529983e-06, "loss": 0.066802978515625, "step": 3193 }, { "epoch": 0.4450637497387306, "grad_norm": 0.4509992003440857, "learning_rate": 6.511425115630906e-06, "loss": 0.07012271881103516, "step": 3194 }, { "epoch": 0.44520309342994496, "grad_norm": 0.8649786114692688, "learning_rate": 6.509178420157828e-06, "loss": 0.08015060424804688, "step": 3195 }, { "epoch": 0.44534243712115934, "grad_norm": 1.1146634817123413, "learning_rate": 6.506931389379759e-06, "loss": 0.08667182922363281, "step": 3196 }, { "epoch": 0.4454817808123737, "grad_norm": 0.8310951590538025, "learning_rate": 6.50468402379594e-06, "loss": 0.09100151062011719, "step": 3197 }, { "epoch": 0.4456211245035881, "grad_norm": 1.2757904529571533, "learning_rate": 6.502436323905683e-06, "loss": 0.09211063385009766, "step": 3198 }, { "epoch": 0.4457604681948025, "grad_norm": 0.6856065392494202, "learning_rate": 6.500188290208377e-06, "loss": 0.0662393569946289, "step": 3199 }, { "epoch": 0.44589981188601685, "grad_norm": 0.6569310426712036, "learning_rate": 6.49793992320348e-06, "loss": 0.09133720397949219, "step": 3200 }, { "epoch": 0.44603915557723123, "grad_norm": 0.6968250274658203, "learning_rate": 6.495691223390534e-06, "loss": 0.07956695556640625, "step": 3201 }, { "epoch": 0.4461784992684456, "grad_norm": 0.6952348351478577, "learning_rate": 6.4934421912691445e-06, "loss": 0.08231163024902344, "step": 3202 }, { "epoch": 0.44631784295966, "grad_norm": 0.9116555452346802, "learning_rate": 6.4911928273389946e-06, "loss": 0.10492324829101562, "step": 3203 }, { "epoch": 0.44645718665087436, "grad_norm": 0.563914954662323, "learning_rate": 6.488943132099845e-06, "loss": 0.07014083862304688, "step": 3204 }, { "epoch": 0.44659653034208874, "grad_norm": 0.7503229975700378, "learning_rate": 6.486693106051523e-06, "loss": 0.09258174896240234, "step": 3205 }, { "epoch": 0.4467358740333031, "grad_norm": 0.9852012395858765, "learning_rate": 6.484442749693935e-06, "loss": 0.09184932708740234, "step": 3206 }, { "epoch": 0.44687521772451755, "grad_norm": 0.9626832604408264, "learning_rate": 6.482192063527058e-06, "loss": 0.08653736114501953, "step": 3207 }, { "epoch": 0.4470145614157319, "grad_norm": 0.7801849842071533, "learning_rate": 6.479941048050944e-06, "loss": 0.07761478424072266, "step": 3208 }, { "epoch": 0.4471539051069463, "grad_norm": 0.8393688797950745, "learning_rate": 6.477689703765717e-06, "loss": 0.09024429321289062, "step": 3209 }, { "epoch": 0.4472932487981607, "grad_norm": 0.3942955434322357, "learning_rate": 6.475438031171574e-06, "loss": 0.0605926513671875, "step": 3210 }, { "epoch": 0.44743259248937506, "grad_norm": 0.7342818379402161, "learning_rate": 6.4731860307687845e-06, "loss": 0.08615732192993164, "step": 3211 }, { "epoch": 0.44757193618058944, "grad_norm": 1.0939174890518188, "learning_rate": 6.470933703057693e-06, "loss": 0.07432842254638672, "step": 3212 }, { "epoch": 0.4477112798718038, "grad_norm": 0.7447007298469543, "learning_rate": 6.468681048538715e-06, "loss": 0.07808971405029297, "step": 3213 }, { "epoch": 0.4478506235630182, "grad_norm": 0.9489312767982483, "learning_rate": 6.4664280677123385e-06, "loss": 0.09468364715576172, "step": 3214 }, { "epoch": 0.44798996725423257, "grad_norm": 0.6443993449211121, "learning_rate": 6.464174761079124e-06, "loss": 0.07319068908691406, "step": 3215 }, { "epoch": 0.44812931094544695, "grad_norm": 1.0428485870361328, "learning_rate": 6.461921129139704e-06, "loss": 0.09198379516601562, "step": 3216 }, { "epoch": 0.4482686546366613, "grad_norm": 1.1714882850646973, "learning_rate": 6.459667172394788e-06, "loss": 0.08338165283203125, "step": 3217 }, { "epoch": 0.4484079983278757, "grad_norm": 1.1610190868377686, "learning_rate": 6.4574128913451495e-06, "loss": 0.11300849914550781, "step": 3218 }, { "epoch": 0.4485473420190901, "grad_norm": 0.8914393186569214, "learning_rate": 6.455158286491641e-06, "loss": 0.11247611045837402, "step": 3219 }, { "epoch": 0.44868668571030446, "grad_norm": 0.7387272119522095, "learning_rate": 6.452903358335182e-06, "loss": 0.08092689514160156, "step": 3220 }, { "epoch": 0.44882602940151883, "grad_norm": 1.0501575469970703, "learning_rate": 6.450648107376767e-06, "loss": 0.12460899353027344, "step": 3221 }, { "epoch": 0.4489653730927332, "grad_norm": 0.6101815700531006, "learning_rate": 6.4483925341174625e-06, "loss": 0.07233333587646484, "step": 3222 }, { "epoch": 0.4491047167839476, "grad_norm": 0.732722818851471, "learning_rate": 6.4461366390584025e-06, "loss": 0.09072494506835938, "step": 3223 }, { "epoch": 0.44924406047516197, "grad_norm": 0.674531102180481, "learning_rate": 6.443880422700799e-06, "loss": 0.07727527618408203, "step": 3224 }, { "epoch": 0.44938340416637634, "grad_norm": 0.6564273834228516, "learning_rate": 6.441623885545929e-06, "loss": 0.0755620002746582, "step": 3225 }, { "epoch": 0.4495227478575907, "grad_norm": 0.8014233708381653, "learning_rate": 6.439367028095145e-06, "loss": 0.09254741668701172, "step": 3226 }, { "epoch": 0.44966209154880515, "grad_norm": 1.1761164665222168, "learning_rate": 6.437109850849868e-06, "loss": 0.07997393608093262, "step": 3227 }, { "epoch": 0.44980143524001953, "grad_norm": 0.9296741485595703, "learning_rate": 6.434852354311592e-06, "loss": 0.09597492218017578, "step": 3228 }, { "epoch": 0.4499407789312339, "grad_norm": 1.2652137279510498, "learning_rate": 6.432594538981881e-06, "loss": 0.10281085968017578, "step": 3229 }, { "epoch": 0.4500801226224483, "grad_norm": 0.8931143283843994, "learning_rate": 6.430336405362371e-06, "loss": 0.08812606334686279, "step": 3230 }, { "epoch": 0.45021946631366266, "grad_norm": 0.6597676277160645, "learning_rate": 6.428077953954766e-06, "loss": 0.08074283599853516, "step": 3231 }, { "epoch": 0.45035881000487704, "grad_norm": 0.6475532054901123, "learning_rate": 6.425819185260842e-06, "loss": 0.079833984375, "step": 3232 }, { "epoch": 0.4504981536960914, "grad_norm": 0.8151196241378784, "learning_rate": 6.42356009978245e-06, "loss": 0.10744285583496094, "step": 3233 }, { "epoch": 0.4506374973873058, "grad_norm": 1.0131478309631348, "learning_rate": 6.421300698021502e-06, "loss": 0.09044873714447021, "step": 3234 }, { "epoch": 0.4507768410785202, "grad_norm": 0.7562086582183838, "learning_rate": 6.419040980479989e-06, "loss": 0.07764053344726562, "step": 3235 }, { "epoch": 0.45091618476973455, "grad_norm": 0.9144704937934875, "learning_rate": 6.416780947659967e-06, "loss": 0.09696006774902344, "step": 3236 }, { "epoch": 0.45105552846094893, "grad_norm": 1.3476197719573975, "learning_rate": 6.4145206000635626e-06, "loss": 0.11435580253601074, "step": 3237 }, { "epoch": 0.4511948721521633, "grad_norm": 0.5028970837593079, "learning_rate": 6.412259938192978e-06, "loss": 0.06454753875732422, "step": 3238 }, { "epoch": 0.4513342158433777, "grad_norm": 0.7938106060028076, "learning_rate": 6.4099989625504756e-06, "loss": 0.07999897003173828, "step": 3239 }, { "epoch": 0.45147355953459206, "grad_norm": 0.9143985509872437, "learning_rate": 6.4077376736383954e-06, "loss": 0.07123279571533203, "step": 3240 }, { "epoch": 0.45161290322580644, "grad_norm": 1.0348296165466309, "learning_rate": 6.405476071959142e-06, "loss": 0.10293769836425781, "step": 3241 }, { "epoch": 0.4517522469170208, "grad_norm": 0.7105141878128052, "learning_rate": 6.403214158015194e-06, "loss": 0.07900619506835938, "step": 3242 }, { "epoch": 0.4518915906082352, "grad_norm": 0.6371899843215942, "learning_rate": 6.400951932309097e-06, "loss": 0.08238792419433594, "step": 3243 }, { "epoch": 0.45203093429944957, "grad_norm": 0.6188682317733765, "learning_rate": 6.3986893953434625e-06, "loss": 0.0823974609375, "step": 3244 }, { "epoch": 0.45217027799066395, "grad_norm": 0.6733136177062988, "learning_rate": 6.396426547620979e-06, "loss": 0.0872793197631836, "step": 3245 }, { "epoch": 0.4523096216818783, "grad_norm": 0.9141566157341003, "learning_rate": 6.394163389644397e-06, "loss": 0.08795881271362305, "step": 3246 }, { "epoch": 0.45244896537309276, "grad_norm": 1.5172628164291382, "learning_rate": 6.391899921916538e-06, "loss": 0.09424972534179688, "step": 3247 }, { "epoch": 0.45258830906430714, "grad_norm": 0.6518595814704895, "learning_rate": 6.389636144940294e-06, "loss": 0.08608818054199219, "step": 3248 }, { "epoch": 0.4527276527555215, "grad_norm": 1.0580182075500488, "learning_rate": 6.387372059218626e-06, "loss": 0.0809178352355957, "step": 3249 }, { "epoch": 0.4528669964467359, "grad_norm": 0.6692058444023132, "learning_rate": 6.38510766525456e-06, "loss": 0.098419189453125, "step": 3250 }, { "epoch": 0.45300634013795027, "grad_norm": 0.5338814854621887, "learning_rate": 6.382842963551193e-06, "loss": 0.07656431198120117, "step": 3251 }, { "epoch": 0.45314568382916465, "grad_norm": 1.0217829942703247, "learning_rate": 6.380577954611691e-06, "loss": 0.07451248168945312, "step": 3252 }, { "epoch": 0.453285027520379, "grad_norm": 0.8563360571861267, "learning_rate": 6.378312638939286e-06, "loss": 0.08616924285888672, "step": 3253 }, { "epoch": 0.4534243712115934, "grad_norm": 1.188259482383728, "learning_rate": 6.3760470170372815e-06, "loss": 0.10108089447021484, "step": 3254 }, { "epoch": 0.4535637149028078, "grad_norm": 0.9111030697822571, "learning_rate": 6.373781089409043e-06, "loss": 0.07026481628417969, "step": 3255 }, { "epoch": 0.45370305859402216, "grad_norm": 0.7716332077980042, "learning_rate": 6.371514856558013e-06, "loss": 0.08808565139770508, "step": 3256 }, { "epoch": 0.45384240228523653, "grad_norm": 0.8166860938072205, "learning_rate": 6.369248318987692e-06, "loss": 0.09227561950683594, "step": 3257 }, { "epoch": 0.4539817459764509, "grad_norm": 0.9272300004959106, "learning_rate": 6.3669814772016555e-06, "loss": 0.09005355834960938, "step": 3258 }, { "epoch": 0.4541210896676653, "grad_norm": 0.5125316381454468, "learning_rate": 6.3647143317035445e-06, "loss": 0.07188606262207031, "step": 3259 }, { "epoch": 0.45426043335887967, "grad_norm": 0.9676203727722168, "learning_rate": 6.362446882997064e-06, "loss": 0.09068489074707031, "step": 3260 }, { "epoch": 0.45439977705009404, "grad_norm": 0.8886604905128479, "learning_rate": 6.360179131585993e-06, "loss": 0.0771780014038086, "step": 3261 }, { "epoch": 0.4545391207413084, "grad_norm": 0.6583217978477478, "learning_rate": 6.357911077974173e-06, "loss": 0.08142852783203125, "step": 3262 }, { "epoch": 0.4546784644325228, "grad_norm": 1.002532958984375, "learning_rate": 6.355642722665512e-06, "loss": 0.0970926284790039, "step": 3263 }, { "epoch": 0.4548178081237372, "grad_norm": 1.2858072519302368, "learning_rate": 6.353374066163988e-06, "loss": 0.09421348571777344, "step": 3264 }, { "epoch": 0.45495715181495155, "grad_norm": 1.1540112495422363, "learning_rate": 6.351105108973644e-06, "loss": 0.11601829528808594, "step": 3265 }, { "epoch": 0.45509649550616593, "grad_norm": 0.6762357950210571, "learning_rate": 6.34883585159859e-06, "loss": 0.07519292831420898, "step": 3266 }, { "epoch": 0.45523583919738037, "grad_norm": 1.2170217037200928, "learning_rate": 6.346566294543008e-06, "loss": 0.09266471862792969, "step": 3267 }, { "epoch": 0.45537518288859474, "grad_norm": 0.7561399340629578, "learning_rate": 6.344296438311134e-06, "loss": 0.11195659637451172, "step": 3268 }, { "epoch": 0.4555145265798091, "grad_norm": 0.998395562171936, "learning_rate": 6.342026283407286e-06, "loss": 0.08588314056396484, "step": 3269 }, { "epoch": 0.4556538702710235, "grad_norm": 0.9363046288490295, "learning_rate": 6.339755830335834e-06, "loss": 0.07853221893310547, "step": 3270 }, { "epoch": 0.4557932139622379, "grad_norm": 0.8425816297531128, "learning_rate": 6.337485079601224e-06, "loss": 0.07196998596191406, "step": 3271 }, { "epoch": 0.45593255765345225, "grad_norm": 1.1619219779968262, "learning_rate": 6.335214031707966e-06, "loss": 0.0858907699584961, "step": 3272 }, { "epoch": 0.45607190134466663, "grad_norm": 1.0535908937454224, "learning_rate": 6.332942687160632e-06, "loss": 0.07656288146972656, "step": 3273 }, { "epoch": 0.456211245035881, "grad_norm": 0.9330799579620361, "learning_rate": 6.3306710464638645e-06, "loss": 0.09746360778808594, "step": 3274 }, { "epoch": 0.4563505887270954, "grad_norm": 0.8239259719848633, "learning_rate": 6.328399110122371e-06, "loss": 0.0832052230834961, "step": 3275 }, { "epoch": 0.45648993241830976, "grad_norm": 1.2387958765029907, "learning_rate": 6.3261268786409225e-06, "loss": 0.09212970733642578, "step": 3276 }, { "epoch": 0.45662927610952414, "grad_norm": 0.7287478446960449, "learning_rate": 6.323854352524359e-06, "loss": 0.06962347030639648, "step": 3277 }, { "epoch": 0.4567686198007385, "grad_norm": 1.2492824792861938, "learning_rate": 6.321581532277581e-06, "loss": 0.11446285247802734, "step": 3278 }, { "epoch": 0.4569079634919529, "grad_norm": 0.949626624584198, "learning_rate": 6.319308418405559e-06, "loss": 0.09436893463134766, "step": 3279 }, { "epoch": 0.4570473071831673, "grad_norm": 0.5809729099273682, "learning_rate": 6.317035011413327e-06, "loss": 0.0803070068359375, "step": 3280 }, { "epoch": 0.45718665087438165, "grad_norm": 0.8415589332580566, "learning_rate": 6.314761311805983e-06, "loss": 0.07648849487304688, "step": 3281 }, { "epoch": 0.45732599456559603, "grad_norm": 0.9414567351341248, "learning_rate": 6.312487320088693e-06, "loss": 0.0821075439453125, "step": 3282 }, { "epoch": 0.4574653382568104, "grad_norm": 0.45572301745414734, "learning_rate": 6.3102130367666855e-06, "loss": 0.07725948095321655, "step": 3283 }, { "epoch": 0.4576046819480248, "grad_norm": 1.7767776250839233, "learning_rate": 6.307938462345253e-06, "loss": 0.10376691818237305, "step": 3284 }, { "epoch": 0.45774402563923916, "grad_norm": 1.057389259338379, "learning_rate": 6.305663597329756e-06, "loss": 0.10577774047851562, "step": 3285 }, { "epoch": 0.45788336933045354, "grad_norm": 0.8335888981819153, "learning_rate": 6.303388442225616e-06, "loss": 0.0838003158569336, "step": 3286 }, { "epoch": 0.45802271302166797, "grad_norm": 0.8002011775970459, "learning_rate": 6.30111299753832e-06, "loss": 0.0675806999206543, "step": 3287 }, { "epoch": 0.45816205671288235, "grad_norm": 0.5453510880470276, "learning_rate": 6.298837263773423e-06, "loss": 0.06850910186767578, "step": 3288 }, { "epoch": 0.4583014004040967, "grad_norm": 0.8261687159538269, "learning_rate": 6.2965612414365365e-06, "loss": 0.0761880874633789, "step": 3289 }, { "epoch": 0.4584407440953111, "grad_norm": 0.5315942168235779, "learning_rate": 6.294284931033344e-06, "loss": 0.05529308319091797, "step": 3290 }, { "epoch": 0.4585800877865255, "grad_norm": 0.877212643623352, "learning_rate": 6.292008333069589e-06, "loss": 0.08572578430175781, "step": 3291 }, { "epoch": 0.45871943147773986, "grad_norm": 0.5112149715423584, "learning_rate": 6.289731448051079e-06, "loss": 0.08475875854492188, "step": 3292 }, { "epoch": 0.45885877516895424, "grad_norm": 0.5808886289596558, "learning_rate": 6.287454276483687e-06, "loss": 0.08190727233886719, "step": 3293 }, { "epoch": 0.4589981188601686, "grad_norm": 1.1299560070037842, "learning_rate": 6.2851768188733485e-06, "loss": 0.08832263946533203, "step": 3294 }, { "epoch": 0.459137462551383, "grad_norm": 0.9537436962127686, "learning_rate": 6.282899075726061e-06, "loss": 0.0770263671875, "step": 3295 }, { "epoch": 0.45927680624259737, "grad_norm": 0.8133154511451721, "learning_rate": 6.280621047547888e-06, "loss": 0.06903076171875, "step": 3296 }, { "epoch": 0.45941614993381175, "grad_norm": 1.0585408210754395, "learning_rate": 6.278342734844955e-06, "loss": 0.07964706420898438, "step": 3297 }, { "epoch": 0.4595554936250261, "grad_norm": 0.9481601715087891, "learning_rate": 6.276064138123453e-06, "loss": 0.08166980743408203, "step": 3298 }, { "epoch": 0.4596948373162405, "grad_norm": 1.2669744491577148, "learning_rate": 6.27378525788963e-06, "loss": 0.07554244995117188, "step": 3299 }, { "epoch": 0.4598341810074549, "grad_norm": 0.7668007612228394, "learning_rate": 6.271506094649804e-06, "loss": 0.08670330047607422, "step": 3300 }, { "epoch": 0.45997352469866926, "grad_norm": 0.7451852560043335, "learning_rate": 6.269226648910356e-06, "loss": 0.07477378845214844, "step": 3301 }, { "epoch": 0.46011286838988363, "grad_norm": 0.847869336605072, "learning_rate": 6.266946921177721e-06, "loss": 0.09167909622192383, "step": 3302 }, { "epoch": 0.460252212081098, "grad_norm": 0.8066619634628296, "learning_rate": 6.264666911958404e-06, "loss": 0.08500289916992188, "step": 3303 }, { "epoch": 0.4603915557723124, "grad_norm": 1.2283546924591064, "learning_rate": 6.262386621758975e-06, "loss": 0.06982994079589844, "step": 3304 }, { "epoch": 0.46053089946352677, "grad_norm": 0.516592800617218, "learning_rate": 6.2601060510860565e-06, "loss": 0.0805511474609375, "step": 3305 }, { "epoch": 0.46067024315474114, "grad_norm": 0.5933857560157776, "learning_rate": 6.2578252004463436e-06, "loss": 0.08106422424316406, "step": 3306 }, { "epoch": 0.4608095868459556, "grad_norm": 0.8289296627044678, "learning_rate": 6.255544070346588e-06, "loss": 0.07400798797607422, "step": 3307 }, { "epoch": 0.46094893053716995, "grad_norm": 0.7269400358200073, "learning_rate": 6.2532626612936035e-06, "loss": 0.06982231140136719, "step": 3308 }, { "epoch": 0.46108827422838433, "grad_norm": 1.633123517036438, "learning_rate": 6.250980973794268e-06, "loss": 0.0904855728149414, "step": 3309 }, { "epoch": 0.4612276179195987, "grad_norm": 0.9364359378814697, "learning_rate": 6.248699008355522e-06, "loss": 0.09533882141113281, "step": 3310 }, { "epoch": 0.4613669616108131, "grad_norm": 0.6603257656097412, "learning_rate": 6.2464167654843645e-06, "loss": 0.07065486907958984, "step": 3311 }, { "epoch": 0.46150630530202746, "grad_norm": 1.1709158420562744, "learning_rate": 6.2441342456878565e-06, "loss": 0.1007838249206543, "step": 3312 }, { "epoch": 0.46164564899324184, "grad_norm": 0.8179845809936523, "learning_rate": 6.2418514494731245e-06, "loss": 0.08216285705566406, "step": 3313 }, { "epoch": 0.4617849926844562, "grad_norm": 1.2842501401901245, "learning_rate": 6.239568377347352e-06, "loss": 0.10706424713134766, "step": 3314 }, { "epoch": 0.4619243363756706, "grad_norm": 0.7064462304115295, "learning_rate": 6.237285029817786e-06, "loss": 0.0770115852355957, "step": 3315 }, { "epoch": 0.462063680066885, "grad_norm": 0.8600456118583679, "learning_rate": 6.235001407391732e-06, "loss": 0.09558296203613281, "step": 3316 }, { "epoch": 0.46220302375809935, "grad_norm": 0.5451250076293945, "learning_rate": 6.232717510576563e-06, "loss": 0.06868147850036621, "step": 3317 }, { "epoch": 0.46234236744931373, "grad_norm": 0.7510248422622681, "learning_rate": 6.230433339879706e-06, "loss": 0.09190177917480469, "step": 3318 }, { "epoch": 0.4624817111405281, "grad_norm": 0.8609080910682678, "learning_rate": 6.228148895808652e-06, "loss": 0.07301044464111328, "step": 3319 }, { "epoch": 0.4626210548317425, "grad_norm": 0.866221010684967, "learning_rate": 6.225864178870954e-06, "loss": 0.07703018188476562, "step": 3320 }, { "epoch": 0.46276039852295686, "grad_norm": 0.36920395493507385, "learning_rate": 6.22357918957422e-06, "loss": 0.057471275329589844, "step": 3321 }, { "epoch": 0.46289974221417124, "grad_norm": 0.5740931630134583, "learning_rate": 6.221293928426128e-06, "loss": 0.0669851303100586, "step": 3322 }, { "epoch": 0.4630390859053856, "grad_norm": 1.4950076341629028, "learning_rate": 6.219008395934405e-06, "loss": 0.10284805297851562, "step": 3323 }, { "epoch": 0.4631784295966, "grad_norm": 1.0115419626235962, "learning_rate": 6.216722592606847e-06, "loss": 0.09128856658935547, "step": 3324 }, { "epoch": 0.46331777328781437, "grad_norm": 0.4287603497505188, "learning_rate": 6.214436518951308e-06, "loss": 0.051387786865234375, "step": 3325 }, { "epoch": 0.46345711697902875, "grad_norm": 1.0565606355667114, "learning_rate": 6.212150175475701e-06, "loss": 0.10251617431640625, "step": 3326 }, { "epoch": 0.4635964606702432, "grad_norm": 1.1443145275115967, "learning_rate": 6.209863562687998e-06, "loss": 0.0937490463256836, "step": 3327 }, { "epoch": 0.46373580436145756, "grad_norm": 0.6750577092170715, "learning_rate": 6.207576681096233e-06, "loss": 0.07229232788085938, "step": 3328 }, { "epoch": 0.46387514805267194, "grad_norm": 0.6121259927749634, "learning_rate": 6.2052895312085e-06, "loss": 0.08184337615966797, "step": 3329 }, { "epoch": 0.4640144917438863, "grad_norm": 1.095881462097168, "learning_rate": 6.203002113532949e-06, "loss": 0.07562923431396484, "step": 3330 }, { "epoch": 0.4641538354351007, "grad_norm": 0.6734304428100586, "learning_rate": 6.200714428577794e-06, "loss": 0.06491756439208984, "step": 3331 }, { "epoch": 0.46429317912631507, "grad_norm": 0.8930040597915649, "learning_rate": 6.198426476851305e-06, "loss": 0.09293174743652344, "step": 3332 }, { "epoch": 0.46443252281752945, "grad_norm": 0.4941350519657135, "learning_rate": 6.196138258861815e-06, "loss": 0.07073044776916504, "step": 3333 }, { "epoch": 0.4645718665087438, "grad_norm": 0.5370311141014099, "learning_rate": 6.193849775117709e-06, "loss": 0.06803703308105469, "step": 3334 }, { "epoch": 0.4647112101999582, "grad_norm": 0.8905772566795349, "learning_rate": 6.191561026127444e-06, "loss": 0.07647037506103516, "step": 3335 }, { "epoch": 0.4648505538911726, "grad_norm": 1.023231029510498, "learning_rate": 6.18927201239952e-06, "loss": 0.08244991302490234, "step": 3336 }, { "epoch": 0.46498989758238696, "grad_norm": 0.6351078748703003, "learning_rate": 6.186982734442505e-06, "loss": 0.0782022476196289, "step": 3337 }, { "epoch": 0.46512924127360133, "grad_norm": 1.1873165369033813, "learning_rate": 6.184693192765028e-06, "loss": 0.1031656265258789, "step": 3338 }, { "epoch": 0.4652685849648157, "grad_norm": 0.7924783825874329, "learning_rate": 6.1824033878757685e-06, "loss": 0.07457447052001953, "step": 3339 }, { "epoch": 0.4654079286560301, "grad_norm": 0.9650660753250122, "learning_rate": 6.180113320283473e-06, "loss": 0.08153629302978516, "step": 3340 }, { "epoch": 0.46554727234724447, "grad_norm": 1.1607952117919922, "learning_rate": 6.177822990496939e-06, "loss": 0.09709548950195312, "step": 3341 }, { "epoch": 0.46568661603845884, "grad_norm": 0.5309658050537109, "learning_rate": 6.175532399025027e-06, "loss": 0.06993484497070312, "step": 3342 }, { "epoch": 0.4658259597296732, "grad_norm": 1.0751588344573975, "learning_rate": 6.173241546376654e-06, "loss": 0.09267115592956543, "step": 3343 }, { "epoch": 0.4659653034208876, "grad_norm": 0.7542791962623596, "learning_rate": 6.170950433060795e-06, "loss": 0.11547183990478516, "step": 3344 }, { "epoch": 0.466104647112102, "grad_norm": 1.1445978879928589, "learning_rate": 6.168659059586483e-06, "loss": 0.08333778381347656, "step": 3345 }, { "epoch": 0.46624399080331635, "grad_norm": 0.6487346887588501, "learning_rate": 6.166367426462808e-06, "loss": 0.07491397857666016, "step": 3346 }, { "epoch": 0.46638333449453073, "grad_norm": 0.6706246137619019, "learning_rate": 6.16407553419892e-06, "loss": 0.06510639190673828, "step": 3347 }, { "epoch": 0.46652267818574517, "grad_norm": 0.7610967755317688, "learning_rate": 6.161783383304024e-06, "loss": 0.07655620574951172, "step": 3348 }, { "epoch": 0.46666202187695954, "grad_norm": 1.1121795177459717, "learning_rate": 6.159490974287386e-06, "loss": 0.08022236824035645, "step": 3349 }, { "epoch": 0.4668013655681739, "grad_norm": 0.6393246054649353, "learning_rate": 6.157198307658323e-06, "loss": 0.08123493194580078, "step": 3350 }, { "epoch": 0.4669407092593883, "grad_norm": 0.6022802591323853, "learning_rate": 6.154905383926218e-06, "loss": 0.07195186614990234, "step": 3351 }, { "epoch": 0.4670800529506027, "grad_norm": 0.7735450863838196, "learning_rate": 6.152612203600502e-06, "loss": 0.09092593193054199, "step": 3352 }, { "epoch": 0.46721939664181705, "grad_norm": 0.8540034294128418, "learning_rate": 6.150318767190668e-06, "loss": 0.06928110122680664, "step": 3353 }, { "epoch": 0.46735874033303143, "grad_norm": 1.0085697174072266, "learning_rate": 6.148025075206268e-06, "loss": 0.0736846923828125, "step": 3354 }, { "epoch": 0.4674980840242458, "grad_norm": 0.479869544506073, "learning_rate": 6.145731128156904e-06, "loss": 0.061593055725097656, "step": 3355 }, { "epoch": 0.4676374277154602, "grad_norm": 0.8109086751937866, "learning_rate": 6.143436926552242e-06, "loss": 0.08332443237304688, "step": 3356 }, { "epoch": 0.46777677140667456, "grad_norm": 1.072054386138916, "learning_rate": 6.141142470902001e-06, "loss": 0.0961904525756836, "step": 3357 }, { "epoch": 0.46791611509788894, "grad_norm": 0.5056020617485046, "learning_rate": 6.138847761715955e-06, "loss": 0.061977386474609375, "step": 3358 }, { "epoch": 0.4680554587891033, "grad_norm": 1.3511223793029785, "learning_rate": 6.1365527995039366e-06, "loss": 0.07057476043701172, "step": 3359 }, { "epoch": 0.4681948024803177, "grad_norm": 0.46909862756729126, "learning_rate": 6.134257584775833e-06, "loss": 0.056652069091796875, "step": 3360 }, { "epoch": 0.4683341461715321, "grad_norm": 0.6530526876449585, "learning_rate": 6.131962118041591e-06, "loss": 0.0819406509399414, "step": 3361 }, { "epoch": 0.46847348986274645, "grad_norm": 0.8296990394592285, "learning_rate": 6.129666399811209e-06, "loss": 0.06578445434570312, "step": 3362 }, { "epoch": 0.46861283355396083, "grad_norm": 0.675062894821167, "learning_rate": 6.127370430594745e-06, "loss": 0.08073997497558594, "step": 3363 }, { "epoch": 0.4687521772451752, "grad_norm": 0.9574986696243286, "learning_rate": 6.125074210902307e-06, "loss": 0.07664299011230469, "step": 3364 }, { "epoch": 0.4688915209363896, "grad_norm": 1.514140009880066, "learning_rate": 6.122777741244067e-06, "loss": 0.11628055572509766, "step": 3365 }, { "epoch": 0.46903086462760396, "grad_norm": 1.7903720140457153, "learning_rate": 6.120481022130245e-06, "loss": 0.10791492462158203, "step": 3366 }, { "epoch": 0.46917020831881834, "grad_norm": 0.8371031284332275, "learning_rate": 6.118184054071124e-06, "loss": 0.059051513671875, "step": 3367 }, { "epoch": 0.46930955201003277, "grad_norm": 0.6258707046508789, "learning_rate": 6.115886837577031e-06, "loss": 0.06320476531982422, "step": 3368 }, { "epoch": 0.46944889570124715, "grad_norm": 0.8364197611808777, "learning_rate": 6.113589373158361e-06, "loss": 0.09503650665283203, "step": 3369 }, { "epoch": 0.4695882393924615, "grad_norm": 1.5188344717025757, "learning_rate": 6.111291661325556e-06, "loss": 0.10358047485351562, "step": 3370 }, { "epoch": 0.4697275830836759, "grad_norm": 1.1232094764709473, "learning_rate": 6.108993702589114e-06, "loss": 0.0822906494140625, "step": 3371 }, { "epoch": 0.4698669267748903, "grad_norm": 1.0889519453048706, "learning_rate": 6.106695497459591e-06, "loss": 0.07745838165283203, "step": 3372 }, { "epoch": 0.47000627046610466, "grad_norm": 1.0316612720489502, "learning_rate": 6.104397046447593e-06, "loss": 0.09308338165283203, "step": 3373 }, { "epoch": 0.47014561415731904, "grad_norm": 0.7224655151367188, "learning_rate": 6.102098350063786e-06, "loss": 0.09270477294921875, "step": 3374 }, { "epoch": 0.4702849578485334, "grad_norm": 0.582480251789093, "learning_rate": 6.099799408818889e-06, "loss": 0.07000923156738281, "step": 3375 }, { "epoch": 0.4704243015397478, "grad_norm": 0.868527352809906, "learning_rate": 6.097500223223669e-06, "loss": 0.08721733093261719, "step": 3376 }, { "epoch": 0.47056364523096217, "grad_norm": 0.6190572381019592, "learning_rate": 6.095200793788958e-06, "loss": 0.07884788513183594, "step": 3377 }, { "epoch": 0.47070298892217655, "grad_norm": 0.8689056634902954, "learning_rate": 6.092901121025634e-06, "loss": 0.07786369323730469, "step": 3378 }, { "epoch": 0.4708423326133909, "grad_norm": 0.9562178254127502, "learning_rate": 6.090601205444632e-06, "loss": 0.09640073776245117, "step": 3379 }, { "epoch": 0.4709816763046053, "grad_norm": 1.2148950099945068, "learning_rate": 6.088301047556942e-06, "loss": 0.10394954681396484, "step": 3380 }, { "epoch": 0.4711210199958197, "grad_norm": 0.6699635982513428, "learning_rate": 6.086000647873604e-06, "loss": 0.08682107925415039, "step": 3381 }, { "epoch": 0.47126036368703406, "grad_norm": 0.4741453230381012, "learning_rate": 6.083700006905715e-06, "loss": 0.0746774673461914, "step": 3382 }, { "epoch": 0.47139970737824843, "grad_norm": 0.4030364155769348, "learning_rate": 6.081399125164429e-06, "loss": 0.06365489959716797, "step": 3383 }, { "epoch": 0.4715390510694628, "grad_norm": 0.7388176918029785, "learning_rate": 6.079098003160943e-06, "loss": 0.07451057434082031, "step": 3384 }, { "epoch": 0.4716783947606772, "grad_norm": 0.8065313100814819, "learning_rate": 6.076796641406518e-06, "loss": 0.08412361145019531, "step": 3385 }, { "epoch": 0.47181773845189157, "grad_norm": 0.810470461845398, "learning_rate": 6.074495040412465e-06, "loss": 0.08077812194824219, "step": 3386 }, { "epoch": 0.47195708214310594, "grad_norm": 0.7917141914367676, "learning_rate": 6.072193200690142e-06, "loss": 0.07651233673095703, "step": 3387 }, { "epoch": 0.4720964258343204, "grad_norm": 0.31863775849342346, "learning_rate": 6.069891122750971e-06, "loss": 0.06409263610839844, "step": 3388 }, { "epoch": 0.47223576952553475, "grad_norm": 0.8780884742736816, "learning_rate": 6.067588807106416e-06, "loss": 0.08466196060180664, "step": 3389 }, { "epoch": 0.47237511321674913, "grad_norm": 0.6824145913124084, "learning_rate": 6.0652862542680034e-06, "loss": 0.09561347961425781, "step": 3390 }, { "epoch": 0.4725144569079635, "grad_norm": 0.9962921738624573, "learning_rate": 6.062983464747305e-06, "loss": 0.10233259201049805, "step": 3391 }, { "epoch": 0.4726538005991779, "grad_norm": 0.6137648820877075, "learning_rate": 6.06068043905595e-06, "loss": 0.0645895004272461, "step": 3392 }, { "epoch": 0.47279314429039226, "grad_norm": 1.013767957687378, "learning_rate": 6.0583771777056166e-06, "loss": 0.09688377380371094, "step": 3393 }, { "epoch": 0.47293248798160664, "grad_norm": 1.0165290832519531, "learning_rate": 6.056073681208038e-06, "loss": 0.09412622451782227, "step": 3394 }, { "epoch": 0.473071831672821, "grad_norm": 1.1571611166000366, "learning_rate": 6.053769950074997e-06, "loss": 0.0878763198852539, "step": 3395 }, { "epoch": 0.4732111753640354, "grad_norm": 1.0308030843734741, "learning_rate": 6.051465984818332e-06, "loss": 0.07476884126663208, "step": 3396 }, { "epoch": 0.4733505190552498, "grad_norm": 0.5361518263816833, "learning_rate": 6.049161785949931e-06, "loss": 0.07967281341552734, "step": 3397 }, { "epoch": 0.47348986274646415, "grad_norm": 0.9236868023872375, "learning_rate": 6.046857353981732e-06, "loss": 0.07794857025146484, "step": 3398 }, { "epoch": 0.47362920643767853, "grad_norm": 1.8134102821350098, "learning_rate": 6.044552689425731e-06, "loss": 0.08526992797851562, "step": 3399 }, { "epoch": 0.4737685501288929, "grad_norm": 1.441249966621399, "learning_rate": 6.042247792793968e-06, "loss": 0.1308746337890625, "step": 3400 }, { "epoch": 0.4739078938201073, "grad_norm": 0.83821040391922, "learning_rate": 6.0399426645985424e-06, "loss": 0.07037901878356934, "step": 3401 }, { "epoch": 0.47404723751132166, "grad_norm": 0.9112629890441895, "learning_rate": 6.037637305351599e-06, "loss": 0.09149169921875, "step": 3402 }, { "epoch": 0.47418658120253604, "grad_norm": 1.325404405593872, "learning_rate": 6.035331715565333e-06, "loss": 0.11549758911132812, "step": 3403 }, { "epoch": 0.4743259248937504, "grad_norm": 1.1968685388565063, "learning_rate": 6.033025895752002e-06, "loss": 0.07752799987792969, "step": 3404 }, { "epoch": 0.4744652685849648, "grad_norm": 0.6443455815315247, "learning_rate": 6.030719846423897e-06, "loss": 0.06267833709716797, "step": 3405 }, { "epoch": 0.47460461227617917, "grad_norm": 0.748511016368866, "learning_rate": 6.028413568093375e-06, "loss": 0.07790851593017578, "step": 3406 }, { "epoch": 0.47474395596739355, "grad_norm": 0.7986204028129578, "learning_rate": 6.026107061272838e-06, "loss": 0.10392189025878906, "step": 3407 }, { "epoch": 0.474883299658608, "grad_norm": 1.1730632781982422, "learning_rate": 6.023800326474738e-06, "loss": 0.08237123489379883, "step": 3408 }, { "epoch": 0.47502264334982236, "grad_norm": 0.8474903106689453, "learning_rate": 6.0214933642115794e-06, "loss": 0.0797128677368164, "step": 3409 }, { "epoch": 0.47516198704103674, "grad_norm": 0.6710580587387085, "learning_rate": 6.019186174995916e-06, "loss": 0.0654449462890625, "step": 3410 }, { "epoch": 0.4753013307322511, "grad_norm": 0.9342487454414368, "learning_rate": 6.016878759340352e-06, "loss": 0.08857059478759766, "step": 3411 }, { "epoch": 0.4754406744234655, "grad_norm": 0.5827676653862, "learning_rate": 6.014571117757545e-06, "loss": 0.06158018112182617, "step": 3412 }, { "epoch": 0.47558001811467987, "grad_norm": 0.9323533773422241, "learning_rate": 6.012263250760199e-06, "loss": 0.08986091613769531, "step": 3413 }, { "epoch": 0.47571936180589425, "grad_norm": 0.7106996774673462, "learning_rate": 6.009955158861066e-06, "loss": 0.09559059143066406, "step": 3414 }, { "epoch": 0.4758587054971086, "grad_norm": 0.8881306052207947, "learning_rate": 6.007646842572959e-06, "loss": 0.07495880126953125, "step": 3415 }, { "epoch": 0.475998049188323, "grad_norm": 0.6235960125923157, "learning_rate": 6.005338302408724e-06, "loss": 0.08625125885009766, "step": 3416 }, { "epoch": 0.4761373928795374, "grad_norm": 0.5334309935569763, "learning_rate": 6.0030295388812736e-06, "loss": 0.07142353057861328, "step": 3417 }, { "epoch": 0.47627673657075176, "grad_norm": 0.5937414169311523, "learning_rate": 6.000720552503557e-06, "loss": 0.06139087677001953, "step": 3418 }, { "epoch": 0.47641608026196614, "grad_norm": 0.7738785147666931, "learning_rate": 5.998411343788582e-06, "loss": 0.09150886535644531, "step": 3419 }, { "epoch": 0.4765554239531805, "grad_norm": 0.7706207633018494, "learning_rate": 5.996101913249402e-06, "loss": 0.06314754486083984, "step": 3420 }, { "epoch": 0.4766947676443949, "grad_norm": 0.8953108191490173, "learning_rate": 5.993792261399115e-06, "loss": 0.06478404998779297, "step": 3421 }, { "epoch": 0.47683411133560927, "grad_norm": 1.230458378791809, "learning_rate": 5.991482388750878e-06, "loss": 0.08194351196289062, "step": 3422 }, { "epoch": 0.47697345502682365, "grad_norm": 0.5239139199256897, "learning_rate": 5.989172295817889e-06, "loss": 0.07602214813232422, "step": 3423 }, { "epoch": 0.477112798718038, "grad_norm": 0.9190511703491211, "learning_rate": 5.9868619831134e-06, "loss": 0.09912490844726562, "step": 3424 }, { "epoch": 0.4772521424092524, "grad_norm": 0.9032093286514282, "learning_rate": 5.984551451150709e-06, "loss": 0.07948064804077148, "step": 3425 }, { "epoch": 0.4773914861004668, "grad_norm": 0.907770037651062, "learning_rate": 5.9822407004431625e-06, "loss": 0.07836341857910156, "step": 3426 }, { "epoch": 0.47753082979168116, "grad_norm": 0.9245799779891968, "learning_rate": 5.979929731504158e-06, "loss": 0.08770942687988281, "step": 3427 }, { "epoch": 0.4776701734828956, "grad_norm": 1.1344232559204102, "learning_rate": 5.977618544847139e-06, "loss": 0.1250743865966797, "step": 3428 }, { "epoch": 0.47780951717410997, "grad_norm": 0.8148223161697388, "learning_rate": 5.975307140985599e-06, "loss": 0.07717323303222656, "step": 3429 }, { "epoch": 0.47794886086532434, "grad_norm": 0.7359947562217712, "learning_rate": 5.972995520433078e-06, "loss": 0.08038806915283203, "step": 3430 }, { "epoch": 0.4780882045565387, "grad_norm": 1.0257426500320435, "learning_rate": 5.970683683703168e-06, "loss": 0.08766746520996094, "step": 3431 }, { "epoch": 0.4782275482477531, "grad_norm": 0.7864608764648438, "learning_rate": 5.968371631309502e-06, "loss": 0.06010794639587402, "step": 3432 }, { "epoch": 0.4783668919389675, "grad_norm": 0.9604579210281372, "learning_rate": 5.966059363765771e-06, "loss": 0.10344886779785156, "step": 3433 }, { "epoch": 0.47850623563018185, "grad_norm": 0.6418907046318054, "learning_rate": 5.9637468815857016e-06, "loss": 0.07519054412841797, "step": 3434 }, { "epoch": 0.47864557932139623, "grad_norm": 0.7898691892623901, "learning_rate": 5.961434185283079e-06, "loss": 0.07262134552001953, "step": 3435 }, { "epoch": 0.4787849230126106, "grad_norm": 0.5419700145721436, "learning_rate": 5.959121275371732e-06, "loss": 0.07703304290771484, "step": 3436 }, { "epoch": 0.478924266703825, "grad_norm": 1.3564058542251587, "learning_rate": 5.956808152365532e-06, "loss": 0.1072378158569336, "step": 3437 }, { "epoch": 0.47906361039503936, "grad_norm": 0.7520043849945068, "learning_rate": 5.954494816778408e-06, "loss": 0.09064674377441406, "step": 3438 }, { "epoch": 0.47920295408625374, "grad_norm": 0.8316725492477417, "learning_rate": 5.952181269124324e-06, "loss": 0.08830404281616211, "step": 3439 }, { "epoch": 0.4793422977774681, "grad_norm": 0.6948453783988953, "learning_rate": 5.949867509917303e-06, "loss": 0.08402824401855469, "step": 3440 }, { "epoch": 0.4794816414686825, "grad_norm": 0.6614876389503479, "learning_rate": 5.9475535396714055e-06, "loss": 0.06978321075439453, "step": 3441 }, { "epoch": 0.4796209851598969, "grad_norm": 0.5902853608131409, "learning_rate": 5.945239358900746e-06, "loss": 0.07434272766113281, "step": 3442 }, { "epoch": 0.47976032885111125, "grad_norm": 0.5562872886657715, "learning_rate": 5.94292496811948e-06, "loss": 0.08730697631835938, "step": 3443 }, { "epoch": 0.47989967254232563, "grad_norm": 0.6372216939926147, "learning_rate": 5.940610367841815e-06, "loss": 0.07746505737304688, "step": 3444 }, { "epoch": 0.48003901623354, "grad_norm": 0.5318804979324341, "learning_rate": 5.938295558581999e-06, "loss": 0.07743644714355469, "step": 3445 }, { "epoch": 0.4801783599247544, "grad_norm": 0.6871753931045532, "learning_rate": 5.935980540854332e-06, "loss": 0.0958414077758789, "step": 3446 }, { "epoch": 0.48031770361596876, "grad_norm": 0.49021172523498535, "learning_rate": 5.933665315173158e-06, "loss": 0.07283878326416016, "step": 3447 }, { "epoch": 0.4804570473071832, "grad_norm": 0.5682189464569092, "learning_rate": 5.931349882052866e-06, "loss": 0.0699472427368164, "step": 3448 }, { "epoch": 0.48059639099839757, "grad_norm": 0.8088340759277344, "learning_rate": 5.929034242007895e-06, "loss": 0.08024311065673828, "step": 3449 }, { "epoch": 0.48073573468961195, "grad_norm": 1.0198564529418945, "learning_rate": 5.926718395552723e-06, "loss": 0.097015380859375, "step": 3450 }, { "epoch": 0.4808750783808263, "grad_norm": 0.6600379347801208, "learning_rate": 5.924402343201883e-06, "loss": 0.0814962387084961, "step": 3451 }, { "epoch": 0.4810144220720407, "grad_norm": 0.8202258944511414, "learning_rate": 5.922086085469947e-06, "loss": 0.08096623420715332, "step": 3452 }, { "epoch": 0.4811537657632551, "grad_norm": 0.5555921792984009, "learning_rate": 5.919769622871533e-06, "loss": 0.0682373046875, "step": 3453 }, { "epoch": 0.48129310945446946, "grad_norm": 0.740869402885437, "learning_rate": 5.917452955921309e-06, "loss": 0.07926082611083984, "step": 3454 }, { "epoch": 0.48143245314568384, "grad_norm": 0.6571289300918579, "learning_rate": 5.915136085133983e-06, "loss": 0.09933757781982422, "step": 3455 }, { "epoch": 0.4815717968368982, "grad_norm": 0.9599358439445496, "learning_rate": 5.9128190110243115e-06, "loss": 0.0907430648803711, "step": 3456 }, { "epoch": 0.4817111405281126, "grad_norm": 0.8019287586212158, "learning_rate": 5.910501734107097e-06, "loss": 0.07190990447998047, "step": 3457 }, { "epoch": 0.48185048421932697, "grad_norm": 0.910352885723114, "learning_rate": 5.908184254897183e-06, "loss": 0.07115411758422852, "step": 3458 }, { "epoch": 0.48198982791054135, "grad_norm": 1.4251089096069336, "learning_rate": 5.905866573909462e-06, "loss": 0.08917617797851562, "step": 3459 }, { "epoch": 0.4821291716017557, "grad_norm": 1.1927448511123657, "learning_rate": 5.9035486916588705e-06, "loss": 0.08968067169189453, "step": 3460 }, { "epoch": 0.4822685152929701, "grad_norm": 0.7811448574066162, "learning_rate": 5.901230608660386e-06, "loss": 0.08296489715576172, "step": 3461 }, { "epoch": 0.4824078589841845, "grad_norm": 0.7370181679725647, "learning_rate": 5.898912325429038e-06, "loss": 0.07703208923339844, "step": 3462 }, { "epoch": 0.48254720267539886, "grad_norm": 1.0244004726409912, "learning_rate": 5.896593842479893e-06, "loss": 0.07582759857177734, "step": 3463 }, { "epoch": 0.48268654636661323, "grad_norm": 0.6462571620941162, "learning_rate": 5.8942751603280645e-06, "loss": 0.07158946990966797, "step": 3464 }, { "epoch": 0.4828258900578276, "grad_norm": 0.9963232278823853, "learning_rate": 5.891956279488715e-06, "loss": 0.08001041412353516, "step": 3465 }, { "epoch": 0.482965233749042, "grad_norm": 0.9692730903625488, "learning_rate": 5.889637200477041e-06, "loss": 0.06473064422607422, "step": 3466 }, { "epoch": 0.48310457744025637, "grad_norm": 1.0472195148468018, "learning_rate": 5.887317923808294e-06, "loss": 0.07732582092285156, "step": 3467 }, { "epoch": 0.4832439211314708, "grad_norm": 1.457115650177002, "learning_rate": 5.88499844999776e-06, "loss": 0.082611083984375, "step": 3468 }, { "epoch": 0.4833832648226852, "grad_norm": 0.6808584928512573, "learning_rate": 5.882678779560776e-06, "loss": 0.08008670806884766, "step": 3469 }, { "epoch": 0.48352260851389955, "grad_norm": 0.8951553702354431, "learning_rate": 5.880358913012722e-06, "loss": 0.06865501403808594, "step": 3470 }, { "epoch": 0.48366195220511393, "grad_norm": 0.8473978638648987, "learning_rate": 5.878038850869012e-06, "loss": 0.08195304870605469, "step": 3471 }, { "epoch": 0.4838012958963283, "grad_norm": 0.49589771032333374, "learning_rate": 5.875718593645118e-06, "loss": 0.07587337493896484, "step": 3472 }, { "epoch": 0.4839406395875427, "grad_norm": 0.8471946120262146, "learning_rate": 5.873398141856545e-06, "loss": 0.07555341720581055, "step": 3473 }, { "epoch": 0.48407998327875706, "grad_norm": 0.9422582387924194, "learning_rate": 5.871077496018844e-06, "loss": 0.08048629760742188, "step": 3474 }, { "epoch": 0.48421932696997144, "grad_norm": 1.4099048376083374, "learning_rate": 5.868756656647611e-06, "loss": 0.09133672714233398, "step": 3475 }, { "epoch": 0.4843586706611858, "grad_norm": 0.9008955955505371, "learning_rate": 5.866435624258483e-06, "loss": 0.07822608947753906, "step": 3476 }, { "epoch": 0.4844980143524002, "grad_norm": 0.9698305726051331, "learning_rate": 5.86411439936714e-06, "loss": 0.07290458679199219, "step": 3477 }, { "epoch": 0.4846373580436146, "grad_norm": 0.5596779584884644, "learning_rate": 5.861792982489306e-06, "loss": 0.06368160247802734, "step": 3478 }, { "epoch": 0.48477670173482895, "grad_norm": 0.8555561304092407, "learning_rate": 5.8594713741407465e-06, "loss": 0.09761428833007812, "step": 3479 }, { "epoch": 0.48491604542604333, "grad_norm": 0.6629188656806946, "learning_rate": 5.857149574837269e-06, "loss": 0.08242416381835938, "step": 3480 }, { "epoch": 0.4850553891172577, "grad_norm": 0.7116796374320984, "learning_rate": 5.854827585094725e-06, "loss": 0.07989883422851562, "step": 3481 }, { "epoch": 0.4851947328084721, "grad_norm": 0.5386209487915039, "learning_rate": 5.852505405429007e-06, "loss": 0.07593631744384766, "step": 3482 }, { "epoch": 0.48533407649968646, "grad_norm": 1.1260857582092285, "learning_rate": 5.850183036356054e-06, "loss": 0.09751248359680176, "step": 3483 }, { "epoch": 0.48547342019090084, "grad_norm": 1.3889964818954468, "learning_rate": 5.847860478391838e-06, "loss": 0.08800840377807617, "step": 3484 }, { "epoch": 0.4856127638821152, "grad_norm": 0.597465991973877, "learning_rate": 5.845537732052381e-06, "loss": 0.06294012069702148, "step": 3485 }, { "epoch": 0.4857521075733296, "grad_norm": 1.6870262622833252, "learning_rate": 5.8432147978537444e-06, "loss": 0.13196265697479248, "step": 3486 }, { "epoch": 0.48589145126454397, "grad_norm": 0.5138251781463623, "learning_rate": 5.840891676312029e-06, "loss": 0.0711216926574707, "step": 3487 }, { "epoch": 0.4860307949557584, "grad_norm": 0.828618586063385, "learning_rate": 5.838568367943383e-06, "loss": 0.08843040466308594, "step": 3488 }, { "epoch": 0.4861701386469728, "grad_norm": 0.49217694997787476, "learning_rate": 5.836244873263989e-06, "loss": 0.0711669921875, "step": 3489 }, { "epoch": 0.48630948233818716, "grad_norm": 0.8672369718551636, "learning_rate": 5.8339211927900776e-06, "loss": 0.06709814071655273, "step": 3490 }, { "epoch": 0.48644882602940154, "grad_norm": 1.2229759693145752, "learning_rate": 5.831597327037914e-06, "loss": 0.08392333984375, "step": 3491 }, { "epoch": 0.4865881697206159, "grad_norm": 1.0695542097091675, "learning_rate": 5.829273276523811e-06, "loss": 0.09482765197753906, "step": 3492 }, { "epoch": 0.4867275134118303, "grad_norm": 0.4652194678783417, "learning_rate": 5.82694904176412e-06, "loss": 0.06836223602294922, "step": 3493 }, { "epoch": 0.48686685710304467, "grad_norm": 0.5297482013702393, "learning_rate": 5.82462462327523e-06, "loss": 0.0823974609375, "step": 3494 }, { "epoch": 0.48700620079425905, "grad_norm": 0.7982929348945618, "learning_rate": 5.822300021573574e-06, "loss": 0.09959793090820312, "step": 3495 }, { "epoch": 0.4871455444854734, "grad_norm": 0.6069077849388123, "learning_rate": 5.819975237175629e-06, "loss": 0.07579803466796875, "step": 3496 }, { "epoch": 0.4872848881766878, "grad_norm": 0.8157080411911011, "learning_rate": 5.817650270597906e-06, "loss": 0.07554817199707031, "step": 3497 }, { "epoch": 0.4874242318679022, "grad_norm": 0.6860123872756958, "learning_rate": 5.815325122356959e-06, "loss": 0.08489322662353516, "step": 3498 }, { "epoch": 0.48756357555911656, "grad_norm": 0.8265448808670044, "learning_rate": 5.8129997929693845e-06, "loss": 0.11000823974609375, "step": 3499 }, { "epoch": 0.48770291925033094, "grad_norm": 0.8009164333343506, "learning_rate": 5.810674282951817e-06, "loss": 0.07718038558959961, "step": 3500 }, { "epoch": 0.4878422629415453, "grad_norm": 0.8195143342018127, "learning_rate": 5.808348592820932e-06, "loss": 0.08242511749267578, "step": 3501 }, { "epoch": 0.4879816066327597, "grad_norm": 1.0702804327011108, "learning_rate": 5.806022723093445e-06, "loss": 0.08410382270812988, "step": 3502 }, { "epoch": 0.48812095032397407, "grad_norm": 0.8587530255317688, "learning_rate": 5.80369667428611e-06, "loss": 0.08263254165649414, "step": 3503 }, { "epoch": 0.48826029401518845, "grad_norm": 1.0881853103637695, "learning_rate": 5.801370446915724e-06, "loss": 0.09797286987304688, "step": 3504 }, { "epoch": 0.4883996377064028, "grad_norm": 0.5848087072372437, "learning_rate": 5.799044041499119e-06, "loss": 0.07250165939331055, "step": 3505 }, { "epoch": 0.4885389813976172, "grad_norm": 0.7225008606910706, "learning_rate": 5.7967174585531705e-06, "loss": 0.07655811309814453, "step": 3506 }, { "epoch": 0.4886783250888316, "grad_norm": 0.6262607574462891, "learning_rate": 5.794390698594793e-06, "loss": 0.08037185668945312, "step": 3507 }, { "epoch": 0.488817668780046, "grad_norm": 0.6754441261291504, "learning_rate": 5.792063762140938e-06, "loss": 0.08604717254638672, "step": 3508 }, { "epoch": 0.4889570124712604, "grad_norm": 1.1534570455551147, "learning_rate": 5.789736649708598e-06, "loss": 0.0861520767211914, "step": 3509 }, { "epoch": 0.48909635616247477, "grad_norm": 0.6722133159637451, "learning_rate": 5.787409361814805e-06, "loss": 0.08138275146484375, "step": 3510 }, { "epoch": 0.48923569985368914, "grad_norm": 0.5350961089134216, "learning_rate": 5.785081898976627e-06, "loss": 0.07335424423217773, "step": 3511 }, { "epoch": 0.4893750435449035, "grad_norm": 1.3198403120040894, "learning_rate": 5.782754261711177e-06, "loss": 0.08135032653808594, "step": 3512 }, { "epoch": 0.4895143872361179, "grad_norm": 0.7124833464622498, "learning_rate": 5.7804264505356e-06, "loss": 0.08828353881835938, "step": 3513 }, { "epoch": 0.4896537309273323, "grad_norm": 1.0621209144592285, "learning_rate": 5.778098465967082e-06, "loss": 0.103271484375, "step": 3514 }, { "epoch": 0.48979307461854665, "grad_norm": 0.6176009774208069, "learning_rate": 5.7757703085228515e-06, "loss": 0.07678794860839844, "step": 3515 }, { "epoch": 0.48993241830976103, "grad_norm": 0.6439688801765442, "learning_rate": 5.773441978720167e-06, "loss": 0.08615303039550781, "step": 3516 }, { "epoch": 0.4900717620009754, "grad_norm": 1.1433846950531006, "learning_rate": 5.771113477076335e-06, "loss": 0.08691120147705078, "step": 3517 }, { "epoch": 0.4902111056921898, "grad_norm": 1.1506943702697754, "learning_rate": 5.7687848041086905e-06, "loss": 0.10601806640625, "step": 3518 }, { "epoch": 0.49035044938340416, "grad_norm": 0.661958634853363, "learning_rate": 5.766455960334616e-06, "loss": 0.07408332824707031, "step": 3519 }, { "epoch": 0.49048979307461854, "grad_norm": 0.4860573410987854, "learning_rate": 5.764126946271526e-06, "loss": 0.07613515853881836, "step": 3520 }, { "epoch": 0.4906291367658329, "grad_norm": 0.5158430933952332, "learning_rate": 5.761797762436872e-06, "loss": 0.0694742202758789, "step": 3521 }, { "epoch": 0.4907684804570473, "grad_norm": 0.6360711455345154, "learning_rate": 5.759468409348149e-06, "loss": 0.07372236251831055, "step": 3522 }, { "epoch": 0.4909078241482617, "grad_norm": 0.6777043342590332, "learning_rate": 5.757138887522884e-06, "loss": 0.0712270736694336, "step": 3523 }, { "epoch": 0.49104716783947605, "grad_norm": 0.6573695540428162, "learning_rate": 5.754809197478644e-06, "loss": 0.07773017883300781, "step": 3524 }, { "epoch": 0.49118651153069043, "grad_norm": 0.7594519257545471, "learning_rate": 5.752479339733033e-06, "loss": 0.0896444320678711, "step": 3525 }, { "epoch": 0.4913258552219048, "grad_norm": 1.5808302164077759, "learning_rate": 5.750149314803691e-06, "loss": 0.11906814575195312, "step": 3526 }, { "epoch": 0.4914651989131192, "grad_norm": 0.7487438321113586, "learning_rate": 5.747819123208299e-06, "loss": 0.0933980941772461, "step": 3527 }, { "epoch": 0.4916045426043336, "grad_norm": 0.6502747535705566, "learning_rate": 5.7454887654645706e-06, "loss": 0.08931159973144531, "step": 3528 }, { "epoch": 0.491743886295548, "grad_norm": 0.8499768972396851, "learning_rate": 5.7431582420902576e-06, "loss": 0.09675312042236328, "step": 3529 }, { "epoch": 0.49188322998676237, "grad_norm": 0.9410906434059143, "learning_rate": 5.740827553603149e-06, "loss": 0.09079170227050781, "step": 3530 }, { "epoch": 0.49202257367797675, "grad_norm": 0.7863745093345642, "learning_rate": 5.738496700521073e-06, "loss": 0.0684347152709961, "step": 3531 }, { "epoch": 0.4921619173691911, "grad_norm": 0.8592617511749268, "learning_rate": 5.736165683361889e-06, "loss": 0.09101104736328125, "step": 3532 }, { "epoch": 0.4923012610604055, "grad_norm": 0.7186799049377441, "learning_rate": 5.7338345026434995e-06, "loss": 0.07245135307312012, "step": 3533 }, { "epoch": 0.4924406047516199, "grad_norm": 1.5798416137695312, "learning_rate": 5.731503158883835e-06, "loss": 0.08511066436767578, "step": 3534 }, { "epoch": 0.49257994844283426, "grad_norm": 1.4414888620376587, "learning_rate": 5.729171652600869e-06, "loss": 0.08130264282226562, "step": 3535 }, { "epoch": 0.49271929213404864, "grad_norm": 0.8313446640968323, "learning_rate": 5.726839984312611e-06, "loss": 0.07280731201171875, "step": 3536 }, { "epoch": 0.492858635825263, "grad_norm": 0.5674654841423035, "learning_rate": 5.724508154537101e-06, "loss": 0.07942008972167969, "step": 3537 }, { "epoch": 0.4929979795164774, "grad_norm": 0.75196373462677, "learning_rate": 5.72217616379242e-06, "loss": 0.08861327171325684, "step": 3538 }, { "epoch": 0.49313732320769177, "grad_norm": 0.6266509890556335, "learning_rate": 5.719844012596683e-06, "loss": 0.058701515197753906, "step": 3539 }, { "epoch": 0.49327666689890615, "grad_norm": 1.0612760782241821, "learning_rate": 5.7175117014680415e-06, "loss": 0.07415962219238281, "step": 3540 }, { "epoch": 0.4934160105901205, "grad_norm": 1.1420456171035767, "learning_rate": 5.71517923092468e-06, "loss": 0.10476875305175781, "step": 3541 }, { "epoch": 0.4935553542813349, "grad_norm": 0.7798614501953125, "learning_rate": 5.712846601484822e-06, "loss": 0.09690284729003906, "step": 3542 }, { "epoch": 0.4936946979725493, "grad_norm": 0.8596300482749939, "learning_rate": 5.710513813666722e-06, "loss": 0.07724475860595703, "step": 3543 }, { "epoch": 0.49383404166376366, "grad_norm": 0.6915950179100037, "learning_rate": 5.708180867988676e-06, "loss": 0.07648849487304688, "step": 3544 }, { "epoch": 0.49397338535497803, "grad_norm": 0.6603370904922485, "learning_rate": 5.705847764969008e-06, "loss": 0.07885360717773438, "step": 3545 }, { "epoch": 0.4941127290461924, "grad_norm": 1.8680099248886108, "learning_rate": 5.703514505126081e-06, "loss": 0.09045600891113281, "step": 3546 }, { "epoch": 0.4942520727374068, "grad_norm": 0.7377333045005798, "learning_rate": 5.701181088978295e-06, "loss": 0.08858394622802734, "step": 3547 }, { "epoch": 0.49439141642862117, "grad_norm": 1.057263731956482, "learning_rate": 5.698847517044076e-06, "loss": 0.08220481872558594, "step": 3548 }, { "epoch": 0.4945307601198356, "grad_norm": 0.7955649495124817, "learning_rate": 5.696513789841897e-06, "loss": 0.09579944610595703, "step": 3549 }, { "epoch": 0.49467010381105, "grad_norm": 0.5682924389839172, "learning_rate": 5.6941799078902525e-06, "loss": 0.07921314239501953, "step": 3550 }, { "epoch": 0.49480944750226435, "grad_norm": 0.6856989860534668, "learning_rate": 5.691845871707682e-06, "loss": 0.0860443115234375, "step": 3551 }, { "epoch": 0.49494879119347873, "grad_norm": 0.6800500750541687, "learning_rate": 5.689511681812755e-06, "loss": 0.07205963134765625, "step": 3552 }, { "epoch": 0.4950881348846931, "grad_norm": 0.5400657653808594, "learning_rate": 5.687177338724073e-06, "loss": 0.07067012786865234, "step": 3553 }, { "epoch": 0.4952274785759075, "grad_norm": 0.8582639098167419, "learning_rate": 5.684842842960276e-06, "loss": 0.1148824691772461, "step": 3554 }, { "epoch": 0.49536682226712186, "grad_norm": 1.2070355415344238, "learning_rate": 5.682508195040032e-06, "loss": 0.1041727066040039, "step": 3555 }, { "epoch": 0.49550616595833624, "grad_norm": 0.8366382718086243, "learning_rate": 5.68017339548205e-06, "loss": 0.1095123291015625, "step": 3556 }, { "epoch": 0.4956455096495506, "grad_norm": 0.8159394264221191, "learning_rate": 5.6778384448050694e-06, "loss": 0.08949089050292969, "step": 3557 }, { "epoch": 0.495784853340765, "grad_norm": 0.7925398349761963, "learning_rate": 5.675503343527861e-06, "loss": 0.086700439453125, "step": 3558 }, { "epoch": 0.4959241970319794, "grad_norm": 0.7031335830688477, "learning_rate": 5.673168092169231e-06, "loss": 0.08188438415527344, "step": 3559 }, { "epoch": 0.49606354072319375, "grad_norm": 0.5680030584335327, "learning_rate": 5.670832691248021e-06, "loss": 0.07277584075927734, "step": 3560 }, { "epoch": 0.49620288441440813, "grad_norm": 0.6056911945343018, "learning_rate": 5.668497141283101e-06, "loss": 0.06877851486206055, "step": 3561 }, { "epoch": 0.4963422281056225, "grad_norm": 0.6471631526947021, "learning_rate": 5.66616144279338e-06, "loss": 0.0796661376953125, "step": 3562 }, { "epoch": 0.4964815717968369, "grad_norm": 0.6201465129852295, "learning_rate": 5.663825596297794e-06, "loss": 0.08402347564697266, "step": 3563 }, { "epoch": 0.49662091548805126, "grad_norm": 0.6806635856628418, "learning_rate": 5.661489602315314e-06, "loss": 0.07931900024414062, "step": 3564 }, { "epoch": 0.49676025917926564, "grad_norm": 0.8219456076622009, "learning_rate": 5.6591534613649505e-06, "loss": 0.07762527465820312, "step": 3565 }, { "epoch": 0.49689960287048, "grad_norm": 0.7711730599403381, "learning_rate": 5.656817173965733e-06, "loss": 0.07668399810791016, "step": 3566 }, { "epoch": 0.4970389465616944, "grad_norm": 0.8155601024627686, "learning_rate": 5.6544807406367365e-06, "loss": 0.07084465026855469, "step": 3567 }, { "epoch": 0.49717829025290877, "grad_norm": 0.7485607862472534, "learning_rate": 5.6521441618970605e-06, "loss": 0.07846832275390625, "step": 3568 }, { "epoch": 0.4973176339441232, "grad_norm": 0.9580447673797607, "learning_rate": 5.649807438265842e-06, "loss": 0.08727073669433594, "step": 3569 }, { "epoch": 0.4974569776353376, "grad_norm": 1.2683817148208618, "learning_rate": 5.647470570262246e-06, "loss": 0.09495973587036133, "step": 3570 }, { "epoch": 0.49759632132655196, "grad_norm": 0.6032671332359314, "learning_rate": 5.64513355840547e-06, "loss": 0.0746469497680664, "step": 3571 }, { "epoch": 0.49773566501776634, "grad_norm": 0.9615435600280762, "learning_rate": 5.642796403214747e-06, "loss": 0.09450435638427734, "step": 3572 }, { "epoch": 0.4978750087089807, "grad_norm": 0.6072433590888977, "learning_rate": 5.640459105209337e-06, "loss": 0.06743240356445312, "step": 3573 }, { "epoch": 0.4980143524001951, "grad_norm": 1.0183181762695312, "learning_rate": 5.638121664908537e-06, "loss": 0.0873422622680664, "step": 3574 }, { "epoch": 0.49815369609140947, "grad_norm": 0.7229384183883667, "learning_rate": 5.635784082831671e-06, "loss": 0.06682014465332031, "step": 3575 }, { "epoch": 0.49829303978262385, "grad_norm": 0.8852227926254272, "learning_rate": 5.633446359498098e-06, "loss": 0.08186531066894531, "step": 3576 }, { "epoch": 0.4984323834738382, "grad_norm": 0.5782665610313416, "learning_rate": 5.6311084954272055e-06, "loss": 0.0823526382446289, "step": 3577 }, { "epoch": 0.4985717271650526, "grad_norm": 0.7265008687973022, "learning_rate": 5.628770491138414e-06, "loss": 0.07628536224365234, "step": 3578 }, { "epoch": 0.498711070856267, "grad_norm": 0.6156501770019531, "learning_rate": 5.626432347151173e-06, "loss": 0.09151458740234375, "step": 3579 }, { "epoch": 0.49885041454748136, "grad_norm": 0.6140867471694946, "learning_rate": 5.624094063984967e-06, "loss": 0.07618331909179688, "step": 3580 }, { "epoch": 0.49898975823869574, "grad_norm": 0.6916826367378235, "learning_rate": 5.621755642159309e-06, "loss": 0.07964229583740234, "step": 3581 }, { "epoch": 0.4991291019299101, "grad_norm": 0.6174061298370361, "learning_rate": 5.61941708219374e-06, "loss": 0.09716987609863281, "step": 3582 }, { "epoch": 0.4992684456211245, "grad_norm": 0.7096824049949646, "learning_rate": 5.617078384607839e-06, "loss": 0.08175945281982422, "step": 3583 }, { "epoch": 0.49940778931233887, "grad_norm": 1.1242097616195679, "learning_rate": 5.614739549921208e-06, "loss": 0.089874267578125, "step": 3584 }, { "epoch": 0.49954713300355325, "grad_norm": 0.7707458734512329, "learning_rate": 5.612400578653484e-06, "loss": 0.07760810852050781, "step": 3585 }, { "epoch": 0.4996864766947676, "grad_norm": 0.6905641555786133, "learning_rate": 5.610061471324335e-06, "loss": 0.08415722846984863, "step": 3586 }, { "epoch": 0.499825820385982, "grad_norm": 0.70585036277771, "learning_rate": 5.607722228453452e-06, "loss": 0.07423973083496094, "step": 3587 }, { "epoch": 0.4999651640771964, "grad_norm": 0.7734363079071045, "learning_rate": 5.605382850560565e-06, "loss": 0.07781219482421875, "step": 3588 }, { "epoch": 0.5001045077684108, "grad_norm": 0.9253653883934021, "learning_rate": 5.6030433381654305e-06, "loss": 0.08725929260253906, "step": 3589 }, { "epoch": 0.5002438514596251, "grad_norm": 0.5401427149772644, "learning_rate": 5.600703691787833e-06, "loss": 0.07517147064208984, "step": 3590 }, { "epoch": 0.5003831951508395, "grad_norm": 0.7218847870826721, "learning_rate": 5.598363911947591e-06, "loss": 0.0897064208984375, "step": 3591 }, { "epoch": 0.5005225388420539, "grad_norm": 0.6403679847717285, "learning_rate": 5.596023999164547e-06, "loss": 0.07123756408691406, "step": 3592 }, { "epoch": 0.5006618825332683, "grad_norm": 0.559283971786499, "learning_rate": 5.593683953958579e-06, "loss": 0.07702302932739258, "step": 3593 }, { "epoch": 0.5008012262244826, "grad_norm": 0.7185911536216736, "learning_rate": 5.591343776849591e-06, "loss": 0.07622146606445312, "step": 3594 }, { "epoch": 0.500940569915697, "grad_norm": 0.9698741436004639, "learning_rate": 5.5890034683575145e-06, "loss": 0.08816719055175781, "step": 3595 }, { "epoch": 0.5010799136069114, "grad_norm": 0.5029512047767639, "learning_rate": 5.586663029002314e-06, "loss": 0.07400369644165039, "step": 3596 }, { "epoch": 0.5012192572981258, "grad_norm": 0.7004791498184204, "learning_rate": 5.584322459303984e-06, "loss": 0.08357620239257812, "step": 3597 }, { "epoch": 0.5013586009893402, "grad_norm": 0.5796653628349304, "learning_rate": 5.581981759782543e-06, "loss": 0.06689620018005371, "step": 3598 }, { "epoch": 0.5014979446805546, "grad_norm": 0.7317281365394592, "learning_rate": 5.579640930958043e-06, "loss": 0.08185958862304688, "step": 3599 }, { "epoch": 0.501637288371769, "grad_norm": 0.6595704555511475, "learning_rate": 5.57729997335056e-06, "loss": 0.06934833526611328, "step": 3600 }, { "epoch": 0.5017766320629834, "grad_norm": 0.9110985994338989, "learning_rate": 5.5749588874802055e-06, "loss": 0.0981588363647461, "step": 3601 }, { "epoch": 0.5019159757541978, "grad_norm": 0.6668416261672974, "learning_rate": 5.572617673867111e-06, "loss": 0.08530235290527344, "step": 3602 }, { "epoch": 0.5020553194454122, "grad_norm": 0.8091809153556824, "learning_rate": 5.570276333031441e-06, "loss": 0.0636148452758789, "step": 3603 }, { "epoch": 0.5021946631366265, "grad_norm": 0.4654698073863983, "learning_rate": 5.567934865493392e-06, "loss": 0.06564998626708984, "step": 3604 }, { "epoch": 0.5023340068278409, "grad_norm": 1.272921085357666, "learning_rate": 5.5655932717731805e-06, "loss": 0.09994125366210938, "step": 3605 }, { "epoch": 0.5024733505190553, "grad_norm": 0.7449053525924683, "learning_rate": 5.563251552391058e-06, "loss": 0.08012580871582031, "step": 3606 }, { "epoch": 0.5026126942102697, "grad_norm": 1.0335204601287842, "learning_rate": 5.560909707867299e-06, "loss": 0.09395074844360352, "step": 3607 }, { "epoch": 0.502752037901484, "grad_norm": 0.7491799592971802, "learning_rate": 5.558567738722208e-06, "loss": 0.07453155517578125, "step": 3608 }, { "epoch": 0.5028913815926984, "grad_norm": 0.8209254145622253, "learning_rate": 5.556225645476119e-06, "loss": 0.06862425804138184, "step": 3609 }, { "epoch": 0.5030307252839128, "grad_norm": 0.9386504888534546, "learning_rate": 5.55388342864939e-06, "loss": 0.08883190155029297, "step": 3610 }, { "epoch": 0.5031700689751272, "grad_norm": 0.6688951849937439, "learning_rate": 5.5515410887624085e-06, "loss": 0.07065105438232422, "step": 3611 }, { "epoch": 0.5033094126663415, "grad_norm": 1.0101274251937866, "learning_rate": 5.549198626335589e-06, "loss": 0.078765869140625, "step": 3612 }, { "epoch": 0.5034487563575559, "grad_norm": 0.5656741261482239, "learning_rate": 5.546856041889374e-06, "loss": 0.08382892608642578, "step": 3613 }, { "epoch": 0.5035881000487703, "grad_norm": 0.6971048712730408, "learning_rate": 5.544513335944228e-06, "loss": 0.08460235595703125, "step": 3614 }, { "epoch": 0.5037274437399847, "grad_norm": 0.6564088463783264, "learning_rate": 5.542170509020655e-06, "loss": 0.09362602233886719, "step": 3615 }, { "epoch": 0.5038667874311991, "grad_norm": 0.9058481454849243, "learning_rate": 5.539827561639169e-06, "loss": 0.07863998413085938, "step": 3616 }, { "epoch": 0.5040061311224134, "grad_norm": 0.6497145295143127, "learning_rate": 5.537484494320324e-06, "loss": 0.07917022705078125, "step": 3617 }, { "epoch": 0.5041454748136278, "grad_norm": 0.5651482939720154, "learning_rate": 5.535141307584697e-06, "loss": 0.07883882522583008, "step": 3618 }, { "epoch": 0.5042848185048422, "grad_norm": 0.9013325572013855, "learning_rate": 5.532798001952888e-06, "loss": 0.06760549545288086, "step": 3619 }, { "epoch": 0.5044241621960566, "grad_norm": 0.5574406385421753, "learning_rate": 5.530454577945529e-06, "loss": 0.070037841796875, "step": 3620 }, { "epoch": 0.504563505887271, "grad_norm": 0.6622887849807739, "learning_rate": 5.52811103608327e-06, "loss": 0.067413330078125, "step": 3621 }, { "epoch": 0.5047028495784853, "grad_norm": 1.0038527250289917, "learning_rate": 5.525767376886797e-06, "loss": 0.07526063919067383, "step": 3622 }, { "epoch": 0.5048421932696997, "grad_norm": 0.8576382398605347, "learning_rate": 5.523423600876816e-06, "loss": 0.10192728042602539, "step": 3623 }, { "epoch": 0.5049815369609141, "grad_norm": 0.7157410979270935, "learning_rate": 5.521079708574062e-06, "loss": 0.07426071166992188, "step": 3624 }, { "epoch": 0.5051208806521285, "grad_norm": 0.5972293615341187, "learning_rate": 5.5187357004992926e-06, "loss": 0.07592582702636719, "step": 3625 }, { "epoch": 0.5052602243433428, "grad_norm": 0.5277520418167114, "learning_rate": 5.516391577173293e-06, "loss": 0.06717777252197266, "step": 3626 }, { "epoch": 0.5053995680345572, "grad_norm": 0.6906243562698364, "learning_rate": 5.514047339116874e-06, "loss": 0.07798624038696289, "step": 3627 }, { "epoch": 0.5055389117257716, "grad_norm": 0.5325253009796143, "learning_rate": 5.511702986850873e-06, "loss": 0.07157182693481445, "step": 3628 }, { "epoch": 0.505678255416986, "grad_norm": 0.620917558670044, "learning_rate": 5.509358520896151e-06, "loss": 0.07902908325195312, "step": 3629 }, { "epoch": 0.5058175991082003, "grad_norm": 0.5976970195770264, "learning_rate": 5.507013941773593e-06, "loss": 0.0720682144165039, "step": 3630 }, { "epoch": 0.5059569427994147, "grad_norm": 0.6668094396591187, "learning_rate": 5.504669250004116e-06, "loss": 0.08183956146240234, "step": 3631 }, { "epoch": 0.5060962864906291, "grad_norm": 1.4275778532028198, "learning_rate": 5.502324446108649e-06, "loss": 0.08098506927490234, "step": 3632 }, { "epoch": 0.5062356301818435, "grad_norm": 0.4493514597415924, "learning_rate": 5.49997953060816e-06, "loss": 0.06676864624023438, "step": 3633 }, { "epoch": 0.5063749738730579, "grad_norm": 0.6460025310516357, "learning_rate": 5.497634504023634e-06, "loss": 0.07056045532226562, "step": 3634 }, { "epoch": 0.5065143175642722, "grad_norm": 0.7487305998802185, "learning_rate": 5.495289366876083e-06, "loss": 0.08937740325927734, "step": 3635 }, { "epoch": 0.5066536612554866, "grad_norm": 1.1976772546768188, "learning_rate": 5.492944119686544e-06, "loss": 0.10358214378356934, "step": 3636 }, { "epoch": 0.506793004946701, "grad_norm": 0.8471946716308594, "learning_rate": 5.4905987629760724e-06, "loss": 0.0912313461303711, "step": 3637 }, { "epoch": 0.5069323486379154, "grad_norm": 0.5314007997512817, "learning_rate": 5.488253297265757e-06, "loss": 0.060832977294921875, "step": 3638 }, { "epoch": 0.5070716923291299, "grad_norm": 0.6941518783569336, "learning_rate": 5.485907723076708e-06, "loss": 0.07215499877929688, "step": 3639 }, { "epoch": 0.5072110360203442, "grad_norm": 1.1240092515945435, "learning_rate": 5.483562040930055e-06, "loss": 0.09675025939941406, "step": 3640 }, { "epoch": 0.5073503797115586, "grad_norm": 0.7444973587989807, "learning_rate": 5.481216251346956e-06, "loss": 0.078094482421875, "step": 3641 }, { "epoch": 0.507489723402773, "grad_norm": 0.6542607545852661, "learning_rate": 5.478870354848593e-06, "loss": 0.09939765930175781, "step": 3642 }, { "epoch": 0.5076290670939874, "grad_norm": 0.9091564416885376, "learning_rate": 5.47652435195617e-06, "loss": 0.08958244323730469, "step": 3643 }, { "epoch": 0.5077684107852017, "grad_norm": 0.5560624599456787, "learning_rate": 5.4741782431909144e-06, "loss": 0.07094335556030273, "step": 3644 }, { "epoch": 0.5079077544764161, "grad_norm": 0.7503925561904907, "learning_rate": 5.471832029074079e-06, "loss": 0.08676910400390625, "step": 3645 }, { "epoch": 0.5080470981676305, "grad_norm": 0.7762072682380676, "learning_rate": 5.469485710126938e-06, "loss": 0.0842289924621582, "step": 3646 }, { "epoch": 0.5081864418588449, "grad_norm": 0.9431191682815552, "learning_rate": 5.467139286870794e-06, "loss": 0.08983993530273438, "step": 3647 }, { "epoch": 0.5083257855500593, "grad_norm": 1.095253825187683, "learning_rate": 5.464792759826962e-06, "loss": 0.10805845260620117, "step": 3648 }, { "epoch": 0.5084651292412736, "grad_norm": 1.996055245399475, "learning_rate": 5.462446129516793e-06, "loss": 0.10583877563476562, "step": 3649 }, { "epoch": 0.508604472932488, "grad_norm": 0.5481684803962708, "learning_rate": 5.460099396461649e-06, "loss": 0.06930160522460938, "step": 3650 }, { "epoch": 0.5087438166237024, "grad_norm": 0.9560269117355347, "learning_rate": 5.457752561182924e-06, "loss": 0.0881195068359375, "step": 3651 }, { "epoch": 0.5088831603149168, "grad_norm": 0.4573022425174713, "learning_rate": 5.455405624202032e-06, "loss": 0.06273031234741211, "step": 3652 }, { "epoch": 0.5090225040061311, "grad_norm": 0.6757876873016357, "learning_rate": 5.453058586040406e-06, "loss": 0.09827995300292969, "step": 3653 }, { "epoch": 0.5091618476973455, "grad_norm": 0.755547046661377, "learning_rate": 5.450711447219507e-06, "loss": 0.09692764282226562, "step": 3654 }, { "epoch": 0.5093011913885599, "grad_norm": 0.6052380204200745, "learning_rate": 5.448364208260813e-06, "loss": 0.07082080841064453, "step": 3655 }, { "epoch": 0.5094405350797743, "grad_norm": 0.4502519965171814, "learning_rate": 5.446016869685829e-06, "loss": 0.0665273666381836, "step": 3656 }, { "epoch": 0.5095798787709886, "grad_norm": 0.7992151379585266, "learning_rate": 5.44366943201608e-06, "loss": 0.07783126831054688, "step": 3657 }, { "epoch": 0.509719222462203, "grad_norm": 0.7106739282608032, "learning_rate": 5.441321895773112e-06, "loss": 0.06845760345458984, "step": 3658 }, { "epoch": 0.5098585661534174, "grad_norm": 0.5919772982597351, "learning_rate": 5.438974261478494e-06, "loss": 0.0805044174194336, "step": 3659 }, { "epoch": 0.5099979098446318, "grad_norm": 0.5193150639533997, "learning_rate": 5.436626529653817e-06, "loss": 0.07189178466796875, "step": 3660 }, { "epoch": 0.5101372535358462, "grad_norm": 0.9895167946815491, "learning_rate": 5.434278700820693e-06, "loss": 0.07541179656982422, "step": 3661 }, { "epoch": 0.5102765972270605, "grad_norm": 0.6573755741119385, "learning_rate": 5.431930775500756e-06, "loss": 0.0835714340209961, "step": 3662 }, { "epoch": 0.5104159409182749, "grad_norm": 0.6627696752548218, "learning_rate": 5.429582754215664e-06, "loss": 0.08508110046386719, "step": 3663 }, { "epoch": 0.5105552846094893, "grad_norm": 0.8603388667106628, "learning_rate": 5.4272346374870885e-06, "loss": 0.09837818145751953, "step": 3664 }, { "epoch": 0.5106946283007037, "grad_norm": 0.6326137781143188, "learning_rate": 5.424886425836734e-06, "loss": 0.07262182235717773, "step": 3665 }, { "epoch": 0.510833971991918, "grad_norm": 0.632951021194458, "learning_rate": 5.4225381197863135e-06, "loss": 0.07060623168945312, "step": 3666 }, { "epoch": 0.5109733156831324, "grad_norm": 0.6428859829902649, "learning_rate": 5.420189719857571e-06, "loss": 0.07287883758544922, "step": 3667 }, { "epoch": 0.5111126593743468, "grad_norm": 0.737269937992096, "learning_rate": 5.417841226572263e-06, "loss": 0.0876607894897461, "step": 3668 }, { "epoch": 0.5112520030655612, "grad_norm": 0.7349538803100586, "learning_rate": 5.415492640452177e-06, "loss": 0.07388591766357422, "step": 3669 }, { "epoch": 0.5113913467567756, "grad_norm": 0.4477946162223816, "learning_rate": 5.4131439620191115e-06, "loss": 0.057082176208496094, "step": 3670 }, { "epoch": 0.5115306904479899, "grad_norm": 0.8162935972213745, "learning_rate": 5.4107951917948896e-06, "loss": 0.10491752624511719, "step": 3671 }, { "epoch": 0.5116700341392043, "grad_norm": 0.8375660181045532, "learning_rate": 5.408446330301355e-06, "loss": 0.08522796630859375, "step": 3672 }, { "epoch": 0.5118093778304187, "grad_norm": 0.5916626453399658, "learning_rate": 5.40609737806037e-06, "loss": 0.0750722885131836, "step": 3673 }, { "epoch": 0.5119487215216331, "grad_norm": 0.8711057305335999, "learning_rate": 5.403748335593819e-06, "loss": 0.07547855377197266, "step": 3674 }, { "epoch": 0.5120880652128474, "grad_norm": 0.7716543078422546, "learning_rate": 5.4013992034236065e-06, "loss": 0.06356048583984375, "step": 3675 }, { "epoch": 0.5122274089040618, "grad_norm": 1.0550098419189453, "learning_rate": 5.3990499820716545e-06, "loss": 0.079315185546875, "step": 3676 }, { "epoch": 0.5123667525952762, "grad_norm": 1.3228932619094849, "learning_rate": 5.396700672059907e-06, "loss": 0.09774208068847656, "step": 3677 }, { "epoch": 0.5125060962864906, "grad_norm": 1.0084477663040161, "learning_rate": 5.394351273910327e-06, "loss": 0.06755924224853516, "step": 3678 }, { "epoch": 0.5126454399777051, "grad_norm": 0.6836240291595459, "learning_rate": 5.392001788144897e-06, "loss": 0.09138679504394531, "step": 3679 }, { "epoch": 0.5127847836689194, "grad_norm": 0.9864017963409424, "learning_rate": 5.389652215285618e-06, "loss": 0.08416986465454102, "step": 3680 }, { "epoch": 0.5129241273601338, "grad_norm": 0.658791184425354, "learning_rate": 5.387302555854516e-06, "loss": 0.07971382141113281, "step": 3681 }, { "epoch": 0.5130634710513482, "grad_norm": 0.8208654522895813, "learning_rate": 5.384952810373625e-06, "loss": 0.08055305480957031, "step": 3682 }, { "epoch": 0.5132028147425626, "grad_norm": 0.9436005353927612, "learning_rate": 5.382602979365009e-06, "loss": 0.06667804718017578, "step": 3683 }, { "epoch": 0.513342158433777, "grad_norm": 0.6087523102760315, "learning_rate": 5.380253063350747e-06, "loss": 0.06914901733398438, "step": 3684 }, { "epoch": 0.5134815021249913, "grad_norm": 0.626532793045044, "learning_rate": 5.377903062852935e-06, "loss": 0.08347034454345703, "step": 3685 }, { "epoch": 0.5136208458162057, "grad_norm": 1.5842262506484985, "learning_rate": 5.375552978393691e-06, "loss": 0.10171890258789062, "step": 3686 }, { "epoch": 0.5137601895074201, "grad_norm": 0.6371689438819885, "learning_rate": 5.373202810495149e-06, "loss": 0.08152294158935547, "step": 3687 }, { "epoch": 0.5138995331986345, "grad_norm": 0.5795436501502991, "learning_rate": 5.370852559679461e-06, "loss": 0.05991935729980469, "step": 3688 }, { "epoch": 0.5140388768898488, "grad_norm": 0.8956339955329895, "learning_rate": 5.368502226468803e-06, "loss": 0.0709676742553711, "step": 3689 }, { "epoch": 0.5141782205810632, "grad_norm": 0.903332531452179, "learning_rate": 5.366151811385363e-06, "loss": 0.09803199768066406, "step": 3690 }, { "epoch": 0.5143175642722776, "grad_norm": 0.7558239102363586, "learning_rate": 5.363801314951349e-06, "loss": 0.08617210388183594, "step": 3691 }, { "epoch": 0.514456907963492, "grad_norm": 0.8411740064620972, "learning_rate": 5.361450737688989e-06, "loss": 0.07155990600585938, "step": 3692 }, { "epoch": 0.5145962516547063, "grad_norm": 0.8731164336204529, "learning_rate": 5.359100080120527e-06, "loss": 0.06415557861328125, "step": 3693 }, { "epoch": 0.5147355953459207, "grad_norm": 1.238707423210144, "learning_rate": 5.356749342768226e-06, "loss": 0.0900120735168457, "step": 3694 }, { "epoch": 0.5148749390371351, "grad_norm": 0.7435643076896667, "learning_rate": 5.354398526154365e-06, "loss": 0.08514928817749023, "step": 3695 }, { "epoch": 0.5150142827283495, "grad_norm": 0.8601511120796204, "learning_rate": 5.352047630801242e-06, "loss": 0.08095932006835938, "step": 3696 }, { "epoch": 0.5151536264195639, "grad_norm": 0.8519003391265869, "learning_rate": 5.349696657231176e-06, "loss": 0.08007526397705078, "step": 3697 }, { "epoch": 0.5152929701107782, "grad_norm": 0.8600539565086365, "learning_rate": 5.347345605966493e-06, "loss": 0.0864105224609375, "step": 3698 }, { "epoch": 0.5154323138019926, "grad_norm": 0.9717258810997009, "learning_rate": 5.344994477529548e-06, "loss": 0.07433938980102539, "step": 3699 }, { "epoch": 0.515571657493207, "grad_norm": 1.0630340576171875, "learning_rate": 5.342643272442706e-06, "loss": 0.1058511734008789, "step": 3700 }, { "epoch": 0.5157110011844214, "grad_norm": 0.7371855974197388, "learning_rate": 5.340291991228352e-06, "loss": 0.09661102294921875, "step": 3701 }, { "epoch": 0.5158503448756357, "grad_norm": 0.691689670085907, "learning_rate": 5.337940634408888e-06, "loss": 0.06078529357910156, "step": 3702 }, { "epoch": 0.5159896885668501, "grad_norm": 1.001262903213501, "learning_rate": 5.335589202506727e-06, "loss": 0.07692337036132812, "step": 3703 }, { "epoch": 0.5161290322580645, "grad_norm": 1.189682960510254, "learning_rate": 5.333237696044309e-06, "loss": 0.0951986312866211, "step": 3704 }, { "epoch": 0.5162683759492789, "grad_norm": 0.9919215440750122, "learning_rate": 5.330886115544081e-06, "loss": 0.07663345336914062, "step": 3705 }, { "epoch": 0.5164077196404933, "grad_norm": 0.46626806259155273, "learning_rate": 5.328534461528515e-06, "loss": 0.06581592559814453, "step": 3706 }, { "epoch": 0.5165470633317076, "grad_norm": 1.105462670326233, "learning_rate": 5.326182734520091e-06, "loss": 0.10429763793945312, "step": 3707 }, { "epoch": 0.516686407022922, "grad_norm": 0.8823720216751099, "learning_rate": 5.32383093504131e-06, "loss": 0.09685325622558594, "step": 3708 }, { "epoch": 0.5168257507141364, "grad_norm": 0.6803625226020813, "learning_rate": 5.32147906361469e-06, "loss": 0.08888435363769531, "step": 3709 }, { "epoch": 0.5169650944053508, "grad_norm": 0.7877908945083618, "learning_rate": 5.31912712076276e-06, "loss": 0.09022712707519531, "step": 3710 }, { "epoch": 0.5171044380965651, "grad_norm": 1.0981199741363525, "learning_rate": 5.316775107008069e-06, "loss": 0.07590484619140625, "step": 3711 }, { "epoch": 0.5172437817877795, "grad_norm": 0.801657497882843, "learning_rate": 5.314423022873181e-06, "loss": 0.07758045196533203, "step": 3712 }, { "epoch": 0.5173831254789939, "grad_norm": 0.6489709615707397, "learning_rate": 5.312070868880678e-06, "loss": 0.09179210662841797, "step": 3713 }, { "epoch": 0.5175224691702083, "grad_norm": 0.634403645992279, "learning_rate": 5.3097186455531506e-06, "loss": 0.08471393585205078, "step": 3714 }, { "epoch": 0.5176618128614227, "grad_norm": 0.4233015179634094, "learning_rate": 5.307366353413214e-06, "loss": 0.06677627563476562, "step": 3715 }, { "epoch": 0.517801156552637, "grad_norm": 0.7817639112472534, "learning_rate": 5.305013992983487e-06, "loss": 0.08283233642578125, "step": 3716 }, { "epoch": 0.5179405002438514, "grad_norm": 0.7851744890213013, "learning_rate": 5.302661564786617e-06, "loss": 0.08493518829345703, "step": 3717 }, { "epoch": 0.5180798439350658, "grad_norm": 0.7724170088768005, "learning_rate": 5.300309069345257e-06, "loss": 0.07686376571655273, "step": 3718 }, { "epoch": 0.5182191876262803, "grad_norm": 0.5840238928794861, "learning_rate": 5.297956507182077e-06, "loss": 0.07131814956665039, "step": 3719 }, { "epoch": 0.5183585313174947, "grad_norm": 0.7792360782623291, "learning_rate": 5.295603878819764e-06, "loss": 0.09732913970947266, "step": 3720 }, { "epoch": 0.518497875008709, "grad_norm": 0.545918345451355, "learning_rate": 5.2932511847810175e-06, "loss": 0.07382011413574219, "step": 3721 }, { "epoch": 0.5186372186999234, "grad_norm": 0.7264366149902344, "learning_rate": 5.290898425588553e-06, "loss": 0.0816497802734375, "step": 3722 }, { "epoch": 0.5187765623911378, "grad_norm": 0.9761670231819153, "learning_rate": 5.2885456017651e-06, "loss": 0.08471012115478516, "step": 3723 }, { "epoch": 0.5189159060823522, "grad_norm": 0.9385414719581604, "learning_rate": 5.286192713833402e-06, "loss": 0.09632396697998047, "step": 3724 }, { "epoch": 0.5190552497735665, "grad_norm": 0.65434330701828, "learning_rate": 5.283839762316217e-06, "loss": 0.07783126831054688, "step": 3725 }, { "epoch": 0.5191945934647809, "grad_norm": 0.6751278042793274, "learning_rate": 5.281486747736316e-06, "loss": 0.0734853744506836, "step": 3726 }, { "epoch": 0.5193339371559953, "grad_norm": 1.1294052600860596, "learning_rate": 5.279133670616488e-06, "loss": 0.11266517639160156, "step": 3727 }, { "epoch": 0.5194732808472097, "grad_norm": 0.5989218354225159, "learning_rate": 5.276780531479528e-06, "loss": 0.0830678939819336, "step": 3728 }, { "epoch": 0.519612624538424, "grad_norm": 0.9604145288467407, "learning_rate": 5.274427330848257e-06, "loss": 0.10519790649414062, "step": 3729 }, { "epoch": 0.5197519682296384, "grad_norm": 0.6626424789428711, "learning_rate": 5.2720740692454944e-06, "loss": 0.07745552062988281, "step": 3730 }, { "epoch": 0.5198913119208528, "grad_norm": 0.9123162031173706, "learning_rate": 5.269720747194088e-06, "loss": 0.07588481903076172, "step": 3731 }, { "epoch": 0.5200306556120672, "grad_norm": 0.9212066531181335, "learning_rate": 5.267367365216887e-06, "loss": 0.10584449768066406, "step": 3732 }, { "epoch": 0.5201699993032816, "grad_norm": 0.8235381245613098, "learning_rate": 5.265013923836763e-06, "loss": 0.08287715911865234, "step": 3733 }, { "epoch": 0.5203093429944959, "grad_norm": 0.6638946533203125, "learning_rate": 5.262660423576595e-06, "loss": 0.07843017578125, "step": 3734 }, { "epoch": 0.5204486866857103, "grad_norm": 0.5296502113342285, "learning_rate": 5.260306864959278e-06, "loss": 0.06317138671875, "step": 3735 }, { "epoch": 0.5205880303769247, "grad_norm": 0.6704049706459045, "learning_rate": 5.25795324850772e-06, "loss": 0.07602930068969727, "step": 3736 }, { "epoch": 0.5207273740681391, "grad_norm": 0.5750493407249451, "learning_rate": 5.255599574744836e-06, "loss": 0.08562946319580078, "step": 3737 }, { "epoch": 0.5208667177593534, "grad_norm": 0.6950098872184753, "learning_rate": 5.253245844193564e-06, "loss": 0.09021186828613281, "step": 3738 }, { "epoch": 0.5210060614505678, "grad_norm": 0.7535587549209595, "learning_rate": 5.250892057376848e-06, "loss": 0.06502342224121094, "step": 3739 }, { "epoch": 0.5211454051417822, "grad_norm": 0.5794976949691772, "learning_rate": 5.248538214817642e-06, "loss": 0.08289051055908203, "step": 3740 }, { "epoch": 0.5212847488329966, "grad_norm": 0.6314154863357544, "learning_rate": 5.246184317038922e-06, "loss": 0.07814407348632812, "step": 3741 }, { "epoch": 0.521424092524211, "grad_norm": 0.552986204624176, "learning_rate": 5.243830364563665e-06, "loss": 0.07329034805297852, "step": 3742 }, { "epoch": 0.5215634362154253, "grad_norm": 0.9125670194625854, "learning_rate": 5.241476357914869e-06, "loss": 0.07492828369140625, "step": 3743 }, { "epoch": 0.5217027799066397, "grad_norm": 0.7964824438095093, "learning_rate": 5.239122297615539e-06, "loss": 0.07708454132080078, "step": 3744 }, { "epoch": 0.5218421235978541, "grad_norm": 0.9695565700531006, "learning_rate": 5.236768184188693e-06, "loss": 0.10173654556274414, "step": 3745 }, { "epoch": 0.5219814672890685, "grad_norm": 0.8532421588897705, "learning_rate": 5.234414018157361e-06, "loss": 0.08106231689453125, "step": 3746 }, { "epoch": 0.5221208109802828, "grad_norm": 1.0951542854309082, "learning_rate": 5.232059800044589e-06, "loss": 0.07649993896484375, "step": 3747 }, { "epoch": 0.5222601546714972, "grad_norm": 1.0012238025665283, "learning_rate": 5.229705530373424e-06, "loss": 0.11434078216552734, "step": 3748 }, { "epoch": 0.5223994983627116, "grad_norm": 0.8012258410453796, "learning_rate": 5.2273512096669364e-06, "loss": 0.07993507385253906, "step": 3749 }, { "epoch": 0.522538842053926, "grad_norm": 0.7056110501289368, "learning_rate": 5.2249968384482e-06, "loss": 0.08614349365234375, "step": 3750 }, { "epoch": 0.5226781857451404, "grad_norm": 0.7955557703971863, "learning_rate": 5.222642417240305e-06, "loss": 0.07988834381103516, "step": 3751 }, { "epoch": 0.5228175294363547, "grad_norm": 0.8406468033790588, "learning_rate": 5.220287946566347e-06, "loss": 0.08587837219238281, "step": 3752 }, { "epoch": 0.5229568731275691, "grad_norm": 0.46755683422088623, "learning_rate": 5.2179334269494345e-06, "loss": 0.06384658813476562, "step": 3753 }, { "epoch": 0.5230962168187835, "grad_norm": 1.0957914590835571, "learning_rate": 5.215578858912691e-06, "loss": 0.08641433715820312, "step": 3754 }, { "epoch": 0.5232355605099979, "grad_norm": 0.47165706753730774, "learning_rate": 5.213224242979247e-06, "loss": 0.06904363632202148, "step": 3755 }, { "epoch": 0.5233749042012122, "grad_norm": 0.6066452860832214, "learning_rate": 5.2108695796722446e-06, "loss": 0.07938194274902344, "step": 3756 }, { "epoch": 0.5235142478924266, "grad_norm": 0.6942107081413269, "learning_rate": 5.208514869514835e-06, "loss": 0.07948017120361328, "step": 3757 }, { "epoch": 0.523653591583641, "grad_norm": 0.6430485844612122, "learning_rate": 5.206160113030182e-06, "loss": 0.09194755554199219, "step": 3758 }, { "epoch": 0.5237929352748554, "grad_norm": 0.7084940671920776, "learning_rate": 5.203805310741459e-06, "loss": 0.0900888442993164, "step": 3759 }, { "epoch": 0.5239322789660699, "grad_norm": 0.5811633467674255, "learning_rate": 5.201450463171849e-06, "loss": 0.07436656951904297, "step": 3760 }, { "epoch": 0.5240716226572842, "grad_norm": 0.6859610676765442, "learning_rate": 5.199095570844546e-06, "loss": 0.07796669006347656, "step": 3761 }, { "epoch": 0.5242109663484986, "grad_norm": 1.2089906930923462, "learning_rate": 5.19674063428275e-06, "loss": 0.08074808120727539, "step": 3762 }, { "epoch": 0.524350310039713, "grad_norm": 0.7100565433502197, "learning_rate": 5.1943856540096795e-06, "loss": 0.08551478385925293, "step": 3763 }, { "epoch": 0.5244896537309274, "grad_norm": 0.7706743478775024, "learning_rate": 5.192030630548552e-06, "loss": 0.09903144836425781, "step": 3764 }, { "epoch": 0.5246289974221418, "grad_norm": 0.4938461482524872, "learning_rate": 5.1896755644226046e-06, "loss": 0.08037185668945312, "step": 3765 }, { "epoch": 0.5247683411133561, "grad_norm": 0.5584432482719421, "learning_rate": 5.1873204561550764e-06, "loss": 0.06113481521606445, "step": 3766 }, { "epoch": 0.5249076848045705, "grad_norm": 0.5540218949317932, "learning_rate": 5.18496530626922e-06, "loss": 0.0682992935180664, "step": 3767 }, { "epoch": 0.5250470284957849, "grad_norm": 0.7347859144210815, "learning_rate": 5.182610115288296e-06, "loss": 0.0798196792602539, "step": 3768 }, { "epoch": 0.5251863721869993, "grad_norm": 0.6685972809791565, "learning_rate": 5.180254883735571e-06, "loss": 0.0740041732788086, "step": 3769 }, { "epoch": 0.5253257158782136, "grad_norm": 0.5478354692459106, "learning_rate": 5.1778996121343274e-06, "loss": 0.07356071472167969, "step": 3770 }, { "epoch": 0.525465059569428, "grad_norm": 1.0133945941925049, "learning_rate": 5.175544301007852e-06, "loss": 0.08939933776855469, "step": 3771 }, { "epoch": 0.5256044032606424, "grad_norm": 0.8353669047355652, "learning_rate": 5.173188950879441e-06, "loss": 0.09009742736816406, "step": 3772 }, { "epoch": 0.5257437469518568, "grad_norm": 0.5550379753112793, "learning_rate": 5.170833562272398e-06, "loss": 0.060439109802246094, "step": 3773 }, { "epoch": 0.5258830906430711, "grad_norm": 0.6624988317489624, "learning_rate": 5.168478135710038e-06, "loss": 0.08397388458251953, "step": 3774 }, { "epoch": 0.5260224343342855, "grad_norm": 0.7884794473648071, "learning_rate": 5.166122671715683e-06, "loss": 0.10924339294433594, "step": 3775 }, { "epoch": 0.5261617780254999, "grad_norm": 0.7142202258110046, "learning_rate": 5.163767170812663e-06, "loss": 0.08504390716552734, "step": 3776 }, { "epoch": 0.5263011217167143, "grad_norm": 0.9318634271621704, "learning_rate": 5.1614116335243155e-06, "loss": 0.08683061599731445, "step": 3777 }, { "epoch": 0.5264404654079287, "grad_norm": 0.5118428468704224, "learning_rate": 5.1590560603739885e-06, "loss": 0.07205677032470703, "step": 3778 }, { "epoch": 0.526579809099143, "grad_norm": 1.5629794597625732, "learning_rate": 5.156700451885037e-06, "loss": 0.08969306945800781, "step": 3779 }, { "epoch": 0.5267191527903574, "grad_norm": 0.7446704506874084, "learning_rate": 5.154344808580821e-06, "loss": 0.0769968032836914, "step": 3780 }, { "epoch": 0.5268584964815718, "grad_norm": 0.7336946725845337, "learning_rate": 5.151989130984715e-06, "loss": 0.09742927551269531, "step": 3781 }, { "epoch": 0.5269978401727862, "grad_norm": 0.6901087164878845, "learning_rate": 5.149633419620092e-06, "loss": 0.0889739990234375, "step": 3782 }, { "epoch": 0.5271371838640005, "grad_norm": 0.7342773675918579, "learning_rate": 5.147277675010339e-06, "loss": 0.07756519317626953, "step": 3783 }, { "epoch": 0.5272765275552149, "grad_norm": 0.7609965205192566, "learning_rate": 5.144921897678851e-06, "loss": 0.06841850280761719, "step": 3784 }, { "epoch": 0.5274158712464293, "grad_norm": 0.7644372582435608, "learning_rate": 5.142566088149024e-06, "loss": 0.08436965942382812, "step": 3785 }, { "epoch": 0.5275552149376437, "grad_norm": 0.5449750423431396, "learning_rate": 5.1402102469442686e-06, "loss": 0.07775592803955078, "step": 3786 }, { "epoch": 0.5276945586288581, "grad_norm": 0.7307636141777039, "learning_rate": 5.137854374587996e-06, "loss": 0.07161235809326172, "step": 3787 }, { "epoch": 0.5278339023200724, "grad_norm": 0.6710094213485718, "learning_rate": 5.135498471603629e-06, "loss": 0.06052970886230469, "step": 3788 }, { "epoch": 0.5279732460112868, "grad_norm": 0.9681852459907532, "learning_rate": 5.133142538514596e-06, "loss": 0.09156608581542969, "step": 3789 }, { "epoch": 0.5281125897025012, "grad_norm": 0.733955979347229, "learning_rate": 5.130786575844329e-06, "loss": 0.08178234100341797, "step": 3790 }, { "epoch": 0.5282519333937156, "grad_norm": 0.5521684288978577, "learning_rate": 5.128430584116273e-06, "loss": 0.0673823356628418, "step": 3791 }, { "epoch": 0.52839127708493, "grad_norm": 0.7988359332084656, "learning_rate": 5.126074563853872e-06, "loss": 0.06406307220458984, "step": 3792 }, { "epoch": 0.5285306207761443, "grad_norm": 0.7649528384208679, "learning_rate": 5.123718515580581e-06, "loss": 0.07673835754394531, "step": 3793 }, { "epoch": 0.5286699644673587, "grad_norm": 0.632926344871521, "learning_rate": 5.1213624398198606e-06, "loss": 0.06835079193115234, "step": 3794 }, { "epoch": 0.5288093081585731, "grad_norm": 0.7793012261390686, "learning_rate": 5.119006337095178e-06, "loss": 0.0722055435180664, "step": 3795 }, { "epoch": 0.5289486518497875, "grad_norm": 1.1004772186279297, "learning_rate": 5.1166502079300015e-06, "loss": 0.11467170715332031, "step": 3796 }, { "epoch": 0.5290879955410018, "grad_norm": 1.768457055091858, "learning_rate": 5.114294052847814e-06, "loss": 0.1277780532836914, "step": 3797 }, { "epoch": 0.5292273392322162, "grad_norm": 0.9923757314682007, "learning_rate": 5.111937872372097e-06, "loss": 0.11457157135009766, "step": 3798 }, { "epoch": 0.5293666829234306, "grad_norm": 1.2411599159240723, "learning_rate": 5.109581667026341e-06, "loss": 0.10232734680175781, "step": 3799 }, { "epoch": 0.5295060266146451, "grad_norm": 1.11897873878479, "learning_rate": 5.107225437334039e-06, "loss": 0.09153556823730469, "step": 3800 }, { "epoch": 0.5296453703058595, "grad_norm": 0.6403682231903076, "learning_rate": 5.1048691838186935e-06, "loss": 0.07634639739990234, "step": 3801 }, { "epoch": 0.5297847139970738, "grad_norm": 0.8680688738822937, "learning_rate": 5.102512907003812e-06, "loss": 0.08081436157226562, "step": 3802 }, { "epoch": 0.5299240576882882, "grad_norm": 0.5278018116950989, "learning_rate": 5.100156607412899e-06, "loss": 0.06574773788452148, "step": 3803 }, { "epoch": 0.5300634013795026, "grad_norm": 0.9805593490600586, "learning_rate": 5.097800285569476e-06, "loss": 0.08554267883300781, "step": 3804 }, { "epoch": 0.530202745070717, "grad_norm": 0.4690806567668915, "learning_rate": 5.095443941997062e-06, "loss": 0.06843376159667969, "step": 3805 }, { "epoch": 0.5303420887619313, "grad_norm": 0.8422507047653198, "learning_rate": 5.093087577219183e-06, "loss": 0.09338855743408203, "step": 3806 }, { "epoch": 0.5304814324531457, "grad_norm": 0.5807479619979858, "learning_rate": 5.090731191759371e-06, "loss": 0.06690597534179688, "step": 3807 }, { "epoch": 0.5306207761443601, "grad_norm": 0.8376121520996094, "learning_rate": 5.088374786141159e-06, "loss": 0.08333969116210938, "step": 3808 }, { "epoch": 0.5307601198355745, "grad_norm": 0.5842118263244629, "learning_rate": 5.086018360888087e-06, "loss": 0.07238125801086426, "step": 3809 }, { "epoch": 0.5308994635267889, "grad_norm": 0.46891286969184875, "learning_rate": 5.083661916523699e-06, "loss": 0.08374881744384766, "step": 3810 }, { "epoch": 0.5310388072180032, "grad_norm": 0.8407334685325623, "learning_rate": 5.081305453571543e-06, "loss": 0.08232498168945312, "step": 3811 }, { "epoch": 0.5311781509092176, "grad_norm": 0.9416844844818115, "learning_rate": 5.07894897255517e-06, "loss": 0.08216476440429688, "step": 3812 }, { "epoch": 0.531317494600432, "grad_norm": 0.7183004021644592, "learning_rate": 5.076592473998141e-06, "loss": 0.06705617904663086, "step": 3813 }, { "epoch": 0.5314568382916464, "grad_norm": 0.7365012168884277, "learning_rate": 5.07423595842401e-06, "loss": 0.07308006286621094, "step": 3814 }, { "epoch": 0.5315961819828607, "grad_norm": 0.626715898513794, "learning_rate": 5.071879426356345e-06, "loss": 0.0660543441772461, "step": 3815 }, { "epoch": 0.5317355256740751, "grad_norm": 0.8287855386734009, "learning_rate": 5.069522878318712e-06, "loss": 0.09554386138916016, "step": 3816 }, { "epoch": 0.5318748693652895, "grad_norm": 1.3797767162322998, "learning_rate": 5.067166314834684e-06, "loss": 0.0934457778930664, "step": 3817 }, { "epoch": 0.5320142130565039, "grad_norm": 1.195638656616211, "learning_rate": 5.064809736427835e-06, "loss": 0.12210464477539062, "step": 3818 }, { "epoch": 0.5321535567477182, "grad_norm": 0.6790021657943726, "learning_rate": 5.062453143621739e-06, "loss": 0.09479618072509766, "step": 3819 }, { "epoch": 0.5322929004389326, "grad_norm": 1.336012601852417, "learning_rate": 5.060096536939982e-06, "loss": 0.0933837890625, "step": 3820 }, { "epoch": 0.532432244130147, "grad_norm": 0.8717029094696045, "learning_rate": 5.057739916906147e-06, "loss": 0.08033466339111328, "step": 3821 }, { "epoch": 0.5325715878213614, "grad_norm": 0.9331964254379272, "learning_rate": 5.05538328404382e-06, "loss": 0.0814828872680664, "step": 3822 }, { "epoch": 0.5327109315125758, "grad_norm": 0.9737231135368347, "learning_rate": 5.053026638876591e-06, "loss": 0.08262372016906738, "step": 3823 }, { "epoch": 0.5328502752037901, "grad_norm": 1.224021553993225, "learning_rate": 5.050669981928056e-06, "loss": 0.09777259826660156, "step": 3824 }, { "epoch": 0.5329896188950045, "grad_norm": 0.5941999554634094, "learning_rate": 5.048313313721806e-06, "loss": 0.07798004150390625, "step": 3825 }, { "epoch": 0.5331289625862189, "grad_norm": 0.7387123107910156, "learning_rate": 5.04595663478144e-06, "loss": 0.10547447204589844, "step": 3826 }, { "epoch": 0.5332683062774333, "grad_norm": 0.5252520442008972, "learning_rate": 5.0435999456305605e-06, "loss": 0.09030532836914062, "step": 3827 }, { "epoch": 0.5334076499686476, "grad_norm": 0.9102649092674255, "learning_rate": 5.0412432467927674e-06, "loss": 0.07468986511230469, "step": 3828 }, { "epoch": 0.533546993659862, "grad_norm": 1.2138216495513916, "learning_rate": 5.038886538791668e-06, "loss": 0.06839323043823242, "step": 3829 }, { "epoch": 0.5336863373510764, "grad_norm": 1.0797468423843384, "learning_rate": 5.036529822150865e-06, "loss": 0.0730428695678711, "step": 3830 }, { "epoch": 0.5338256810422908, "grad_norm": 1.069101095199585, "learning_rate": 5.034173097393973e-06, "loss": 0.06981754302978516, "step": 3831 }, { "epoch": 0.5339650247335052, "grad_norm": 0.7870736122131348, "learning_rate": 5.031816365044595e-06, "loss": 0.08303070068359375, "step": 3832 }, { "epoch": 0.5341043684247195, "grad_norm": 0.7882254123687744, "learning_rate": 5.02945962562635e-06, "loss": 0.0688018798828125, "step": 3833 }, { "epoch": 0.5342437121159339, "grad_norm": 0.7011737823486328, "learning_rate": 5.027102879662847e-06, "loss": 0.06830883026123047, "step": 3834 }, { "epoch": 0.5343830558071483, "grad_norm": 0.7488727569580078, "learning_rate": 5.024746127677703e-06, "loss": 0.10614204406738281, "step": 3835 }, { "epoch": 0.5345223994983627, "grad_norm": 1.0245915651321411, "learning_rate": 5.022389370194536e-06, "loss": 0.10611820220947266, "step": 3836 }, { "epoch": 0.534661743189577, "grad_norm": 0.5920290946960449, "learning_rate": 5.020032607736961e-06, "loss": 0.0716400146484375, "step": 3837 }, { "epoch": 0.5348010868807914, "grad_norm": 0.6721218228340149, "learning_rate": 5.017675840828597e-06, "loss": 0.07253742218017578, "step": 3838 }, { "epoch": 0.5349404305720058, "grad_norm": 1.0010993480682373, "learning_rate": 5.015319069993066e-06, "loss": 0.10019779205322266, "step": 3839 }, { "epoch": 0.5350797742632203, "grad_norm": 0.6893013715744019, "learning_rate": 5.012962295753988e-06, "loss": 0.06489849090576172, "step": 3840 }, { "epoch": 0.5352191179544347, "grad_norm": 0.4868377447128296, "learning_rate": 5.010605518634982e-06, "loss": 0.06618022918701172, "step": 3841 }, { "epoch": 0.535358461645649, "grad_norm": 0.7262279987335205, "learning_rate": 5.008248739159674e-06, "loss": 0.10015106201171875, "step": 3842 }, { "epoch": 0.5354978053368634, "grad_norm": 0.6292528510093689, "learning_rate": 5.005891957851683e-06, "loss": 0.07014274597167969, "step": 3843 }, { "epoch": 0.5356371490280778, "grad_norm": 0.59772789478302, "learning_rate": 5.003535175234633e-06, "loss": 0.07278013229370117, "step": 3844 }, { "epoch": 0.5357764927192922, "grad_norm": 0.7297507524490356, "learning_rate": 5.001178391832149e-06, "loss": 0.07778358459472656, "step": 3845 }, { "epoch": 0.5359158364105066, "grad_norm": 0.6189523339271545, "learning_rate": 4.998821608167853e-06, "loss": 0.07850837707519531, "step": 3846 }, { "epoch": 0.5360551801017209, "grad_norm": 0.6500387787818909, "learning_rate": 4.996464824765369e-06, "loss": 0.0722970962524414, "step": 3847 }, { "epoch": 0.5361945237929353, "grad_norm": 0.8274425268173218, "learning_rate": 4.994108042148318e-06, "loss": 0.11631202697753906, "step": 3848 }, { "epoch": 0.5363338674841497, "grad_norm": 0.5919544696807861, "learning_rate": 4.991751260840328e-06, "loss": 0.06465625762939453, "step": 3849 }, { "epoch": 0.5364732111753641, "grad_norm": 0.6011368632316589, "learning_rate": 4.9893944813650185e-06, "loss": 0.0818033218383789, "step": 3850 }, { "epoch": 0.5366125548665784, "grad_norm": 0.46768617630004883, "learning_rate": 4.987037704246015e-06, "loss": 0.06602621078491211, "step": 3851 }, { "epoch": 0.5367518985577928, "grad_norm": 0.6595598459243774, "learning_rate": 4.984680930006936e-06, "loss": 0.07168006896972656, "step": 3852 }, { "epoch": 0.5368912422490072, "grad_norm": 0.6959213614463806, "learning_rate": 4.982324159171404e-06, "loss": 0.06975746154785156, "step": 3853 }, { "epoch": 0.5370305859402216, "grad_norm": 0.8406674861907959, "learning_rate": 4.979967392263041e-06, "loss": 0.08823108673095703, "step": 3854 }, { "epoch": 0.537169929631436, "grad_norm": 0.7801045179367065, "learning_rate": 4.977610629805465e-06, "loss": 0.08201980590820312, "step": 3855 }, { "epoch": 0.5373092733226503, "grad_norm": 0.7847827672958374, "learning_rate": 4.975253872322297e-06, "loss": 0.09480762481689453, "step": 3856 }, { "epoch": 0.5374486170138647, "grad_norm": 1.3275129795074463, "learning_rate": 4.972897120337155e-06, "loss": 0.11326122283935547, "step": 3857 }, { "epoch": 0.5375879607050791, "grad_norm": 0.7511340379714966, "learning_rate": 4.970540374373653e-06, "loss": 0.08466720581054688, "step": 3858 }, { "epoch": 0.5377273043962935, "grad_norm": 0.9339806437492371, "learning_rate": 4.9681836349554064e-06, "loss": 0.08791112899780273, "step": 3859 }, { "epoch": 0.5378666480875078, "grad_norm": 0.6462761163711548, "learning_rate": 4.965826902606029e-06, "loss": 0.06658554077148438, "step": 3860 }, { "epoch": 0.5380059917787222, "grad_norm": 0.6149033308029175, "learning_rate": 4.963470177849135e-06, "loss": 0.07266807556152344, "step": 3861 }, { "epoch": 0.5381453354699366, "grad_norm": 1.1171027421951294, "learning_rate": 4.961113461208335e-06, "loss": 0.08191680908203125, "step": 3862 }, { "epoch": 0.538284679161151, "grad_norm": 0.5514424443244934, "learning_rate": 4.958756753207234e-06, "loss": 0.08929634094238281, "step": 3863 }, { "epoch": 0.5384240228523653, "grad_norm": 0.6103603839874268, "learning_rate": 4.956400054369441e-06, "loss": 0.07746601104736328, "step": 3864 }, { "epoch": 0.5385633665435797, "grad_norm": 0.8503748178482056, "learning_rate": 4.954043365218561e-06, "loss": 0.06505584716796875, "step": 3865 }, { "epoch": 0.5387027102347941, "grad_norm": 0.7957927584648132, "learning_rate": 4.951686686278195e-06, "loss": 0.0967702865600586, "step": 3866 }, { "epoch": 0.5388420539260085, "grad_norm": 0.957163393497467, "learning_rate": 4.949330018071947e-06, "loss": 0.07359695434570312, "step": 3867 }, { "epoch": 0.5389813976172229, "grad_norm": 0.5016598701477051, "learning_rate": 4.946973361123411e-06, "loss": 0.07425785064697266, "step": 3868 }, { "epoch": 0.5391207413084372, "grad_norm": 0.6401932835578918, "learning_rate": 4.9446167159561814e-06, "loss": 0.07229995727539062, "step": 3869 }, { "epoch": 0.5392600849996516, "grad_norm": 0.8315362334251404, "learning_rate": 4.942260083093854e-06, "loss": 0.08258819580078125, "step": 3870 }, { "epoch": 0.539399428690866, "grad_norm": 0.8249854445457458, "learning_rate": 4.939903463060018e-06, "loss": 0.07637310028076172, "step": 3871 }, { "epoch": 0.5395387723820804, "grad_norm": 0.6006142497062683, "learning_rate": 4.937546856378263e-06, "loss": 0.07447052001953125, "step": 3872 }, { "epoch": 0.5396781160732947, "grad_norm": 0.6523945927619934, "learning_rate": 4.935190263572168e-06, "loss": 0.07459449768066406, "step": 3873 }, { "epoch": 0.5398174597645091, "grad_norm": 0.9404845833778381, "learning_rate": 4.932833685165318e-06, "loss": 0.09446525573730469, "step": 3874 }, { "epoch": 0.5399568034557235, "grad_norm": 0.9155187606811523, "learning_rate": 4.930477121681289e-06, "loss": 0.06629371643066406, "step": 3875 }, { "epoch": 0.5400961471469379, "grad_norm": 0.8093764185905457, "learning_rate": 4.9281205736436555e-06, "loss": 0.0707855224609375, "step": 3876 }, { "epoch": 0.5402354908381523, "grad_norm": 1.7150259017944336, "learning_rate": 4.925764041575991e-06, "loss": 0.09434700012207031, "step": 3877 }, { "epoch": 0.5403748345293666, "grad_norm": 0.8507980108261108, "learning_rate": 4.9234075260018615e-06, "loss": 0.0830230712890625, "step": 3878 }, { "epoch": 0.540514178220581, "grad_norm": 1.2874815464019775, "learning_rate": 4.921051027444831e-06, "loss": 0.08790206909179688, "step": 3879 }, { "epoch": 0.5406535219117955, "grad_norm": 1.009629487991333, "learning_rate": 4.918694546428458e-06, "loss": 0.07700157165527344, "step": 3880 }, { "epoch": 0.5407928656030099, "grad_norm": 0.8849579095840454, "learning_rate": 4.916338083476303e-06, "loss": 0.06600797176361084, "step": 3881 }, { "epoch": 0.5409322092942243, "grad_norm": 0.845373809337616, "learning_rate": 4.913981639111914e-06, "loss": 0.07639217376708984, "step": 3882 }, { "epoch": 0.5410715529854386, "grad_norm": 0.9096406102180481, "learning_rate": 4.9116252138588435e-06, "loss": 0.08394622802734375, "step": 3883 }, { "epoch": 0.541210896676653, "grad_norm": 1.1866353750228882, "learning_rate": 4.90926880824063e-06, "loss": 0.09013843536376953, "step": 3884 }, { "epoch": 0.5413502403678674, "grad_norm": 1.1372730731964111, "learning_rate": 4.906912422780818e-06, "loss": 0.08223915100097656, "step": 3885 }, { "epoch": 0.5414895840590818, "grad_norm": 0.9103301167488098, "learning_rate": 4.904556058002939e-06, "loss": 0.09090328216552734, "step": 3886 }, { "epoch": 0.5416289277502961, "grad_norm": 1.1682493686676025, "learning_rate": 4.902199714430525e-06, "loss": 0.0625600814819336, "step": 3887 }, { "epoch": 0.5417682714415105, "grad_norm": 1.0935215950012207, "learning_rate": 4.899843392587104e-06, "loss": 0.0780649185180664, "step": 3888 }, { "epoch": 0.5419076151327249, "grad_norm": 0.8181934952735901, "learning_rate": 4.8974870929961915e-06, "loss": 0.07205390930175781, "step": 3889 }, { "epoch": 0.5420469588239393, "grad_norm": 0.916631281375885, "learning_rate": 4.895130816181307e-06, "loss": 0.08016300201416016, "step": 3890 }, { "epoch": 0.5421863025151537, "grad_norm": 0.9542246460914612, "learning_rate": 4.8927745626659625e-06, "loss": 0.07370567321777344, "step": 3891 }, { "epoch": 0.542325646206368, "grad_norm": 0.9666680693626404, "learning_rate": 4.89041833297366e-06, "loss": 0.07925748825073242, "step": 3892 }, { "epoch": 0.5424649898975824, "grad_norm": 0.7504460215568542, "learning_rate": 4.888062127627904e-06, "loss": 0.09350967407226562, "step": 3893 }, { "epoch": 0.5426043335887968, "grad_norm": 1.1180115938186646, "learning_rate": 4.885705947152187e-06, "loss": 0.08321189880371094, "step": 3894 }, { "epoch": 0.5427436772800112, "grad_norm": 0.7941261529922485, "learning_rate": 4.883349792069999e-06, "loss": 0.0786590576171875, "step": 3895 }, { "epoch": 0.5428830209712255, "grad_norm": 0.42083221673965454, "learning_rate": 4.880993662904824e-06, "loss": 0.06502628326416016, "step": 3896 }, { "epoch": 0.5430223646624399, "grad_norm": 0.7089248895645142, "learning_rate": 4.87863756018014e-06, "loss": 0.09137630462646484, "step": 3897 }, { "epoch": 0.5431617083536543, "grad_norm": 1.774942398071289, "learning_rate": 4.87628148441942e-06, "loss": 0.09249687194824219, "step": 3898 }, { "epoch": 0.5433010520448687, "grad_norm": 0.6840721368789673, "learning_rate": 4.8739254361461305e-06, "loss": 0.06636905670166016, "step": 3899 }, { "epoch": 0.543440395736083, "grad_norm": 0.7427138090133667, "learning_rate": 4.871569415883729e-06, "loss": 0.09743118286132812, "step": 3900 }, { "epoch": 0.5435797394272974, "grad_norm": 0.7547391653060913, "learning_rate": 4.869213424155671e-06, "loss": 0.08192920684814453, "step": 3901 }, { "epoch": 0.5437190831185118, "grad_norm": 0.9912539124488831, "learning_rate": 4.8668574614854055e-06, "loss": 0.07119464874267578, "step": 3902 }, { "epoch": 0.5438584268097262, "grad_norm": 0.5620578527450562, "learning_rate": 4.864501528396371e-06, "loss": 0.07842350006103516, "step": 3903 }, { "epoch": 0.5439977705009406, "grad_norm": 0.6405583024024963, "learning_rate": 4.862145625412006e-06, "loss": 0.06272125244140625, "step": 3904 }, { "epoch": 0.5441371141921549, "grad_norm": 0.6214212775230408, "learning_rate": 4.859789753055734e-06, "loss": 0.06655025482177734, "step": 3905 }, { "epoch": 0.5442764578833693, "grad_norm": 0.9956351518630981, "learning_rate": 4.857433911850977e-06, "loss": 0.089141845703125, "step": 3906 }, { "epoch": 0.5444158015745837, "grad_norm": 0.7949082851409912, "learning_rate": 4.8550781023211516e-06, "loss": 0.09972000122070312, "step": 3907 }, { "epoch": 0.5445551452657981, "grad_norm": 1.5097283124923706, "learning_rate": 4.852722324989661e-06, "loss": 0.10484886169433594, "step": 3908 }, { "epoch": 0.5446944889570124, "grad_norm": 0.7753856182098389, "learning_rate": 4.85036658037991e-06, "loss": 0.08585166931152344, "step": 3909 }, { "epoch": 0.5448338326482268, "grad_norm": 1.2888280153274536, "learning_rate": 4.848010869015288e-06, "loss": 0.09069442749023438, "step": 3910 }, { "epoch": 0.5449731763394412, "grad_norm": 0.6502051949501038, "learning_rate": 4.84565519141918e-06, "loss": 0.074066162109375, "step": 3911 }, { "epoch": 0.5451125200306556, "grad_norm": 0.6976872086524963, "learning_rate": 4.843299548114964e-06, "loss": 0.07932758331298828, "step": 3912 }, { "epoch": 0.54525186372187, "grad_norm": 0.7849737405776978, "learning_rate": 4.840943939626012e-06, "loss": 0.08835315704345703, "step": 3913 }, { "epoch": 0.5453912074130843, "grad_norm": 1.2971305847167969, "learning_rate": 4.838588366475685e-06, "loss": 0.08262252807617188, "step": 3914 }, { "epoch": 0.5455305511042987, "grad_norm": 0.5874335765838623, "learning_rate": 4.83623282918734e-06, "loss": 0.07401466369628906, "step": 3915 }, { "epoch": 0.5456698947955131, "grad_norm": 0.629767656326294, "learning_rate": 4.833877328284319e-06, "loss": 0.08738040924072266, "step": 3916 }, { "epoch": 0.5458092384867275, "grad_norm": 0.577234148979187, "learning_rate": 4.831521864289964e-06, "loss": 0.08032608032226562, "step": 3917 }, { "epoch": 0.5459485821779418, "grad_norm": 1.1288853883743286, "learning_rate": 4.829166437727603e-06, "loss": 0.10203170776367188, "step": 3918 }, { "epoch": 0.5460879258691562, "grad_norm": 0.9086178541183472, "learning_rate": 4.82681104912056e-06, "loss": 0.09294986724853516, "step": 3919 }, { "epoch": 0.5462272695603707, "grad_norm": 1.0071454048156738, "learning_rate": 4.82445569899215e-06, "loss": 0.09330940246582031, "step": 3920 }, { "epoch": 0.5463666132515851, "grad_norm": 0.7352439165115356, "learning_rate": 4.822100387865673e-06, "loss": 0.08558464050292969, "step": 3921 }, { "epoch": 0.5465059569427995, "grad_norm": 0.7708268761634827, "learning_rate": 4.8197451162644305e-06, "loss": 0.07436180114746094, "step": 3922 }, { "epoch": 0.5466453006340138, "grad_norm": 0.6222013831138611, "learning_rate": 4.817389884711706e-06, "loss": 0.07248783111572266, "step": 3923 }, { "epoch": 0.5467846443252282, "grad_norm": 0.7167956829071045, "learning_rate": 4.815034693730781e-06, "loss": 0.0818643569946289, "step": 3924 }, { "epoch": 0.5469239880164426, "grad_norm": 1.2867224216461182, "learning_rate": 4.812679543844924e-06, "loss": 0.0823812484741211, "step": 3925 }, { "epoch": 0.547063331707657, "grad_norm": 0.7136272192001343, "learning_rate": 4.810324435577397e-06, "loss": 0.07781744003295898, "step": 3926 }, { "epoch": 0.5472026753988714, "grad_norm": 0.6204665303230286, "learning_rate": 4.807969369451449e-06, "loss": 0.07563400268554688, "step": 3927 }, { "epoch": 0.5473420190900857, "grad_norm": 1.0028810501098633, "learning_rate": 4.805614345990322e-06, "loss": 0.10716533660888672, "step": 3928 }, { "epoch": 0.5474813627813001, "grad_norm": 0.6288691163063049, "learning_rate": 4.803259365717251e-06, "loss": 0.08430957794189453, "step": 3929 }, { "epoch": 0.5476207064725145, "grad_norm": 0.675757110118866, "learning_rate": 4.800904429155458e-06, "loss": 0.06509208679199219, "step": 3930 }, { "epoch": 0.5477600501637289, "grad_norm": 0.6360414028167725, "learning_rate": 4.7985495368281534e-06, "loss": 0.08013343811035156, "step": 3931 }, { "epoch": 0.5478993938549432, "grad_norm": 0.6967456340789795, "learning_rate": 4.796194689258542e-06, "loss": 0.0952444076538086, "step": 3932 }, { "epoch": 0.5480387375461576, "grad_norm": 0.6913489103317261, "learning_rate": 4.793839886969819e-06, "loss": 0.08119869232177734, "step": 3933 }, { "epoch": 0.548178081237372, "grad_norm": 0.8822779655456543, "learning_rate": 4.791485130485167e-06, "loss": 0.10680198669433594, "step": 3934 }, { "epoch": 0.5483174249285864, "grad_norm": 1.418470859527588, "learning_rate": 4.789130420327756e-06, "loss": 0.08374214172363281, "step": 3935 }, { "epoch": 0.5484567686198007, "grad_norm": 0.6186476945877075, "learning_rate": 4.786775757020755e-06, "loss": 0.07459592819213867, "step": 3936 }, { "epoch": 0.5485961123110151, "grad_norm": 1.6332652568817139, "learning_rate": 4.784421141087311e-06, "loss": 0.09105300903320312, "step": 3937 }, { "epoch": 0.5487354560022295, "grad_norm": 0.5955246686935425, "learning_rate": 4.782066573050567e-06, "loss": 0.07870674133300781, "step": 3938 }, { "epoch": 0.5488747996934439, "grad_norm": 0.6982647776603699, "learning_rate": 4.779712053433655e-06, "loss": 0.07034587860107422, "step": 3939 }, { "epoch": 0.5490141433846583, "grad_norm": 0.862182080745697, "learning_rate": 4.777357582759696e-06, "loss": 0.07821178436279297, "step": 3940 }, { "epoch": 0.5491534870758726, "grad_norm": 0.9046956896781921, "learning_rate": 4.7750031615518e-06, "loss": 0.08678746223449707, "step": 3941 }, { "epoch": 0.549292830767087, "grad_norm": 0.7259808778762817, "learning_rate": 4.772648790333065e-06, "loss": 0.06080913543701172, "step": 3942 }, { "epoch": 0.5494321744583014, "grad_norm": 0.6401607394218445, "learning_rate": 4.7702944696265766e-06, "loss": 0.07783317565917969, "step": 3943 }, { "epoch": 0.5495715181495158, "grad_norm": 0.7495033740997314, "learning_rate": 4.767940199955413e-06, "loss": 0.1071157455444336, "step": 3944 }, { "epoch": 0.5497108618407301, "grad_norm": 0.6644220352172852, "learning_rate": 4.765585981842639e-06, "loss": 0.07256412506103516, "step": 3945 }, { "epoch": 0.5498502055319445, "grad_norm": 0.6525827646255493, "learning_rate": 4.76323181581131e-06, "loss": 0.058441162109375, "step": 3946 }, { "epoch": 0.5499895492231589, "grad_norm": 0.5685415863990784, "learning_rate": 4.760877702384464e-06, "loss": 0.06749725341796875, "step": 3947 }, { "epoch": 0.5501288929143733, "grad_norm": 0.6731892228126526, "learning_rate": 4.758523642085133e-06, "loss": 0.06669855117797852, "step": 3948 }, { "epoch": 0.5502682366055877, "grad_norm": 0.6267871856689453, "learning_rate": 4.756169635436336e-06, "loss": 0.06831836700439453, "step": 3949 }, { "epoch": 0.550407580296802, "grad_norm": 0.598672091960907, "learning_rate": 4.75381568296108e-06, "loss": 0.06953048706054688, "step": 3950 }, { "epoch": 0.5505469239880164, "grad_norm": 0.892711877822876, "learning_rate": 4.751461785182358e-06, "loss": 0.09015560150146484, "step": 3951 }, { "epoch": 0.5506862676792308, "grad_norm": 0.5909920334815979, "learning_rate": 4.7491079426231556e-06, "loss": 0.08000469207763672, "step": 3952 }, { "epoch": 0.5508256113704452, "grad_norm": 0.9192797541618347, "learning_rate": 4.746754155806437e-06, "loss": 0.07503843307495117, "step": 3953 }, { "epoch": 0.5509649550616595, "grad_norm": 0.9077401757240295, "learning_rate": 4.744400425255165e-06, "loss": 0.07905197143554688, "step": 3954 }, { "epoch": 0.5511042987528739, "grad_norm": 0.6443670988082886, "learning_rate": 4.7420467514922815e-06, "loss": 0.07320451736450195, "step": 3955 }, { "epoch": 0.5512436424440883, "grad_norm": 0.5318081974983215, "learning_rate": 4.739693135040722e-06, "loss": 0.06663036346435547, "step": 3956 }, { "epoch": 0.5513829861353027, "grad_norm": 0.6408534646034241, "learning_rate": 4.737339576423406e-06, "loss": 0.06067848205566406, "step": 3957 }, { "epoch": 0.551522329826517, "grad_norm": 0.8440285325050354, "learning_rate": 4.734986076163238e-06, "loss": 0.08545160293579102, "step": 3958 }, { "epoch": 0.5516616735177314, "grad_norm": 0.8230769038200378, "learning_rate": 4.732632634783114e-06, "loss": 0.0680239200592041, "step": 3959 }, { "epoch": 0.5518010172089458, "grad_norm": 0.8406873345375061, "learning_rate": 4.730279252805914e-06, "loss": 0.08115863800048828, "step": 3960 }, { "epoch": 0.5519403609001603, "grad_norm": 0.719213604927063, "learning_rate": 4.727925930754506e-06, "loss": 0.09613037109375, "step": 3961 }, { "epoch": 0.5520797045913747, "grad_norm": 0.7215135097503662, "learning_rate": 4.725572669151747e-06, "loss": 0.08558368682861328, "step": 3962 }, { "epoch": 0.552219048282589, "grad_norm": 0.7559782266616821, "learning_rate": 4.723219468520474e-06, "loss": 0.08317184448242188, "step": 3963 }, { "epoch": 0.5523583919738034, "grad_norm": 0.5975756645202637, "learning_rate": 4.720866329383514e-06, "loss": 0.07881879806518555, "step": 3964 }, { "epoch": 0.5524977356650178, "grad_norm": 0.5961477160453796, "learning_rate": 4.718513252263685e-06, "loss": 0.07172012329101562, "step": 3965 }, { "epoch": 0.5526370793562322, "grad_norm": 0.6011540293693542, "learning_rate": 4.716160237683785e-06, "loss": 0.08294391632080078, "step": 3966 }, { "epoch": 0.5527764230474466, "grad_norm": 1.1140085458755493, "learning_rate": 4.7138072861666e-06, "loss": 0.07280921936035156, "step": 3967 }, { "epoch": 0.5529157667386609, "grad_norm": 0.7964913249015808, "learning_rate": 4.711454398234902e-06, "loss": 0.08917641639709473, "step": 3968 }, { "epoch": 0.5530551104298753, "grad_norm": 0.711279034614563, "learning_rate": 4.7091015744114475e-06, "loss": 0.07857036590576172, "step": 3969 }, { "epoch": 0.5531944541210897, "grad_norm": 0.5541741847991943, "learning_rate": 4.706748815218984e-06, "loss": 0.06199073791503906, "step": 3970 }, { "epoch": 0.5533337978123041, "grad_norm": 1.0574089288711548, "learning_rate": 4.704396121180237e-06, "loss": 0.07958698272705078, "step": 3971 }, { "epoch": 0.5534731415035185, "grad_norm": 0.7055047154426575, "learning_rate": 4.702043492817924e-06, "loss": 0.08775711059570312, "step": 3972 }, { "epoch": 0.5536124851947328, "grad_norm": 0.5765355825424194, "learning_rate": 4.6996909306547455e-06, "loss": 0.06495475769042969, "step": 3973 }, { "epoch": 0.5537518288859472, "grad_norm": 0.6630047559738159, "learning_rate": 4.697338435213385e-06, "loss": 0.07315254211425781, "step": 3974 }, { "epoch": 0.5538911725771616, "grad_norm": 0.6490917801856995, "learning_rate": 4.694986007016514e-06, "loss": 0.07369422912597656, "step": 3975 }, { "epoch": 0.554030516268376, "grad_norm": 1.1065433025360107, "learning_rate": 4.692633646586788e-06, "loss": 0.0899810791015625, "step": 3976 }, { "epoch": 0.5541698599595903, "grad_norm": 0.7434398531913757, "learning_rate": 4.690281354446849e-06, "loss": 0.08029842376708984, "step": 3977 }, { "epoch": 0.5543092036508047, "grad_norm": 1.3613265752792358, "learning_rate": 4.6879291311193244e-06, "loss": 0.11006999015808105, "step": 3978 }, { "epoch": 0.5544485473420191, "grad_norm": 0.8748787641525269, "learning_rate": 4.68557697712682e-06, "loss": 0.0882415771484375, "step": 3979 }, { "epoch": 0.5545878910332335, "grad_norm": 0.6708895564079285, "learning_rate": 4.683224892991932e-06, "loss": 0.07989978790283203, "step": 3980 }, { "epoch": 0.5547272347244478, "grad_norm": 0.7074035406112671, "learning_rate": 4.680872879237242e-06, "loss": 0.09372615814208984, "step": 3981 }, { "epoch": 0.5548665784156622, "grad_norm": 0.9617750644683838, "learning_rate": 4.678520936385313e-06, "loss": 0.08323860168457031, "step": 3982 }, { "epoch": 0.5550059221068766, "grad_norm": 0.5856135487556458, "learning_rate": 4.676169064958692e-06, "loss": 0.07157516479492188, "step": 3983 }, { "epoch": 0.555145265798091, "grad_norm": 0.7526550889015198, "learning_rate": 4.6738172654799105e-06, "loss": 0.06682729721069336, "step": 3984 }, { "epoch": 0.5552846094893054, "grad_norm": 0.8066220879554749, "learning_rate": 4.671465538471487e-06, "loss": 0.0839228630065918, "step": 3985 }, { "epoch": 0.5554239531805197, "grad_norm": 0.385781854391098, "learning_rate": 4.66911388445592e-06, "loss": 0.05742168426513672, "step": 3986 }, { "epoch": 0.5555632968717341, "grad_norm": 0.5299086570739746, "learning_rate": 4.666762303955692e-06, "loss": 0.05928754806518555, "step": 3987 }, { "epoch": 0.5557026405629485, "grad_norm": 0.4626697599887848, "learning_rate": 4.664410797493275e-06, "loss": 0.06273365020751953, "step": 3988 }, { "epoch": 0.5558419842541629, "grad_norm": 0.9195610284805298, "learning_rate": 4.662059365591115e-06, "loss": 0.09643363952636719, "step": 3989 }, { "epoch": 0.5559813279453772, "grad_norm": 0.7253668308258057, "learning_rate": 4.6597080087716494e-06, "loss": 0.08318805694580078, "step": 3990 }, { "epoch": 0.5561206716365916, "grad_norm": 0.4779408872127533, "learning_rate": 4.657356727557295e-06, "loss": 0.06566047668457031, "step": 3991 }, { "epoch": 0.556260015327806, "grad_norm": 0.7764744758605957, "learning_rate": 4.655005522470453e-06, "loss": 0.08760261535644531, "step": 3992 }, { "epoch": 0.5563993590190204, "grad_norm": 0.4720037579536438, "learning_rate": 4.652654394033508e-06, "loss": 0.060812950134277344, "step": 3993 }, { "epoch": 0.5565387027102348, "grad_norm": 0.8580929040908813, "learning_rate": 4.650303342768827e-06, "loss": 0.07644367218017578, "step": 3994 }, { "epoch": 0.5566780464014491, "grad_norm": 0.7015684247016907, "learning_rate": 4.6479523691987585e-06, "loss": 0.07313108444213867, "step": 3995 }, { "epoch": 0.5568173900926635, "grad_norm": 0.7100796103477478, "learning_rate": 4.645601473845636e-06, "loss": 0.09496498107910156, "step": 3996 }, { "epoch": 0.5569567337838779, "grad_norm": 0.7501670122146606, "learning_rate": 4.6432506572317754e-06, "loss": 0.07916450500488281, "step": 3997 }, { "epoch": 0.5570960774750923, "grad_norm": 1.0828033685684204, "learning_rate": 4.6408999198794744e-06, "loss": 0.10488128662109375, "step": 3998 }, { "epoch": 0.5572354211663066, "grad_norm": 0.7649691104888916, "learning_rate": 4.6385492623110135e-06, "loss": 0.0953683853149414, "step": 3999 }, { "epoch": 0.557374764857521, "grad_norm": 0.40134376287460327, "learning_rate": 4.636198685048653e-06, "loss": 0.05895519256591797, "step": 4000 }, { "epoch": 0.5575141085487355, "grad_norm": 0.6710278391838074, "learning_rate": 4.633848188614639e-06, "loss": 0.07625293731689453, "step": 4001 }, { "epoch": 0.5576534522399499, "grad_norm": 0.5363906621932983, "learning_rate": 4.631497773531199e-06, "loss": 0.08051300048828125, "step": 4002 }, { "epoch": 0.5577927959311643, "grad_norm": 0.8732468485832214, "learning_rate": 4.629147440320539e-06, "loss": 0.08528804779052734, "step": 4003 }, { "epoch": 0.5579321396223786, "grad_norm": 0.7421971559524536, "learning_rate": 4.626797189504855e-06, "loss": 0.075897216796875, "step": 4004 }, { "epoch": 0.558071483313593, "grad_norm": 0.4808681905269623, "learning_rate": 4.624447021606311e-06, "loss": 0.06118011474609375, "step": 4005 }, { "epoch": 0.5582108270048074, "grad_norm": 0.7493415474891663, "learning_rate": 4.6220969371470665e-06, "loss": 0.0990447998046875, "step": 4006 }, { "epoch": 0.5583501706960218, "grad_norm": 0.6475850939750671, "learning_rate": 4.619746936649254e-06, "loss": 0.0688924789428711, "step": 4007 }, { "epoch": 0.5584895143872362, "grad_norm": 1.0097912549972534, "learning_rate": 4.617397020634991e-06, "loss": 0.08783483505249023, "step": 4008 }, { "epoch": 0.5586288580784505, "grad_norm": 0.6044729351997375, "learning_rate": 4.615047189626376e-06, "loss": 0.060309410095214844, "step": 4009 }, { "epoch": 0.5587682017696649, "grad_norm": 0.6226065754890442, "learning_rate": 4.612697444145487e-06, "loss": 0.06730842590332031, "step": 4010 }, { "epoch": 0.5589075454608793, "grad_norm": 0.39665427803993225, "learning_rate": 4.610347784714383e-06, "loss": 0.06255340576171875, "step": 4011 }, { "epoch": 0.5590468891520937, "grad_norm": 1.2422966957092285, "learning_rate": 4.6079982118551045e-06, "loss": 0.08363914489746094, "step": 4012 }, { "epoch": 0.559186232843308, "grad_norm": 1.3879753351211548, "learning_rate": 4.605648726089674e-06, "loss": 0.09134864807128906, "step": 4013 }, { "epoch": 0.5593255765345224, "grad_norm": 0.8109109997749329, "learning_rate": 4.603299327940094e-06, "loss": 0.07289791107177734, "step": 4014 }, { "epoch": 0.5594649202257368, "grad_norm": 1.267975926399231, "learning_rate": 4.600950017928348e-06, "loss": 0.096282958984375, "step": 4015 }, { "epoch": 0.5596042639169512, "grad_norm": 1.0541484355926514, "learning_rate": 4.598600796576395e-06, "loss": 0.07826423645019531, "step": 4016 }, { "epoch": 0.5597436076081655, "grad_norm": 0.5919758677482605, "learning_rate": 4.596251664406182e-06, "loss": 0.08107280731201172, "step": 4017 }, { "epoch": 0.5598829512993799, "grad_norm": 0.7500582337379456, "learning_rate": 4.593902621939632e-06, "loss": 0.07749462127685547, "step": 4018 }, { "epoch": 0.5600222949905943, "grad_norm": 0.9640753865242004, "learning_rate": 4.591553669698646e-06, "loss": 0.08782339096069336, "step": 4019 }, { "epoch": 0.5601616386818087, "grad_norm": 0.6623979210853577, "learning_rate": 4.589204808205113e-06, "loss": 0.06926345825195312, "step": 4020 }, { "epoch": 0.5603009823730231, "grad_norm": 1.032300591468811, "learning_rate": 4.58685603798089e-06, "loss": 0.11128997802734375, "step": 4021 }, { "epoch": 0.5604403260642374, "grad_norm": 0.6648444533348083, "learning_rate": 4.5845073595478245e-06, "loss": 0.07602977752685547, "step": 4022 }, { "epoch": 0.5605796697554518, "grad_norm": 0.7454730868339539, "learning_rate": 4.5821587734277374e-06, "loss": 0.0790557861328125, "step": 4023 }, { "epoch": 0.5607190134466662, "grad_norm": 0.8603084087371826, "learning_rate": 4.57981028014243e-06, "loss": 0.0726776123046875, "step": 4024 }, { "epoch": 0.5608583571378806, "grad_norm": 1.17264986038208, "learning_rate": 4.577461880213688e-06, "loss": 0.0841207504272461, "step": 4025 }, { "epoch": 0.560997700829095, "grad_norm": 1.2677481174468994, "learning_rate": 4.575113574163269e-06, "loss": 0.10247421264648438, "step": 4026 }, { "epoch": 0.5611370445203093, "grad_norm": 0.6554252505302429, "learning_rate": 4.572765362512912e-06, "loss": 0.0687398910522461, "step": 4027 }, { "epoch": 0.5612763882115237, "grad_norm": 0.6113523244857788, "learning_rate": 4.570417245784337e-06, "loss": 0.0785665512084961, "step": 4028 }, { "epoch": 0.5614157319027381, "grad_norm": 0.7746636271476746, "learning_rate": 4.568069224499244e-06, "loss": 0.08738899230957031, "step": 4029 }, { "epoch": 0.5615550755939525, "grad_norm": 0.49934321641921997, "learning_rate": 4.565721299179308e-06, "loss": 0.06740188598632812, "step": 4030 }, { "epoch": 0.5616944192851668, "grad_norm": 0.49505066871643066, "learning_rate": 4.563373470346186e-06, "loss": 0.06891822814941406, "step": 4031 }, { "epoch": 0.5618337629763812, "grad_norm": 0.8197705149650574, "learning_rate": 4.561025738521508e-06, "loss": 0.10361099243164062, "step": 4032 }, { "epoch": 0.5619731066675956, "grad_norm": 1.282368779182434, "learning_rate": 4.55867810422689e-06, "loss": 0.10516643524169922, "step": 4033 }, { "epoch": 0.56211245035881, "grad_norm": 0.8372423648834229, "learning_rate": 4.5563305679839214e-06, "loss": 0.07268619537353516, "step": 4034 }, { "epoch": 0.5622517940500243, "grad_norm": 0.6318808197975159, "learning_rate": 4.553983130314171e-06, "loss": 0.09345579147338867, "step": 4035 }, { "epoch": 0.5623911377412387, "grad_norm": 0.7793890833854675, "learning_rate": 4.551635791739188e-06, "loss": 0.08742523193359375, "step": 4036 }, { "epoch": 0.5625304814324531, "grad_norm": 0.5281451940536499, "learning_rate": 4.549288552780494e-06, "loss": 0.07004642486572266, "step": 4037 }, { "epoch": 0.5626698251236675, "grad_norm": 0.7616621255874634, "learning_rate": 4.546941413959595e-06, "loss": 0.1032247543334961, "step": 4038 }, { "epoch": 0.5628091688148819, "grad_norm": 0.6919656991958618, "learning_rate": 4.544594375797969e-06, "loss": 0.08370208740234375, "step": 4039 }, { "epoch": 0.5629485125060962, "grad_norm": 1.0662568807601929, "learning_rate": 4.542247438817076e-06, "loss": 0.08335304260253906, "step": 4040 }, { "epoch": 0.5630878561973107, "grad_norm": 0.659421980381012, "learning_rate": 4.539900603538352e-06, "loss": 0.07116222381591797, "step": 4041 }, { "epoch": 0.5632271998885251, "grad_norm": 0.6829744577407837, "learning_rate": 4.53755387048321e-06, "loss": 0.0726613998413086, "step": 4042 }, { "epoch": 0.5633665435797395, "grad_norm": 0.48976850509643555, "learning_rate": 4.53520724017304e-06, "loss": 0.07883834838867188, "step": 4043 }, { "epoch": 0.5635058872709539, "grad_norm": 0.6787797212600708, "learning_rate": 4.532860713129208e-06, "loss": 0.07390022277832031, "step": 4044 }, { "epoch": 0.5636452309621682, "grad_norm": 0.5588542819023132, "learning_rate": 4.530514289873062e-06, "loss": 0.07028388977050781, "step": 4045 }, { "epoch": 0.5637845746533826, "grad_norm": 0.7750270366668701, "learning_rate": 4.528167970925922e-06, "loss": 0.07579421997070312, "step": 4046 }, { "epoch": 0.563923918344597, "grad_norm": 0.6950191855430603, "learning_rate": 4.525821756809088e-06, "loss": 0.08113479614257812, "step": 4047 }, { "epoch": 0.5640632620358114, "grad_norm": 0.7853871583938599, "learning_rate": 4.523475648043832e-06, "loss": 0.08815860748291016, "step": 4048 }, { "epoch": 0.5642026057270257, "grad_norm": 0.3875129222869873, "learning_rate": 4.5211296451514085e-06, "loss": 0.060189247131347656, "step": 4049 }, { "epoch": 0.5643419494182401, "grad_norm": 0.7049263715744019, "learning_rate": 4.518783748653045e-06, "loss": 0.09197044372558594, "step": 4050 }, { "epoch": 0.5644812931094545, "grad_norm": 0.5730843544006348, "learning_rate": 4.516437959069946e-06, "loss": 0.06796455383300781, "step": 4051 }, { "epoch": 0.5646206368006689, "grad_norm": 1.1226669549942017, "learning_rate": 4.514092276923295e-06, "loss": 0.07553958892822266, "step": 4052 }, { "epoch": 0.5647599804918833, "grad_norm": 1.2590385675430298, "learning_rate": 4.5117467027342435e-06, "loss": 0.09908771514892578, "step": 4053 }, { "epoch": 0.5648993241830976, "grad_norm": 0.6886626482009888, "learning_rate": 4.509401237023928e-06, "loss": 0.09200000762939453, "step": 4054 }, { "epoch": 0.565038667874312, "grad_norm": 0.748066782951355, "learning_rate": 4.507055880313458e-06, "loss": 0.06851482391357422, "step": 4055 }, { "epoch": 0.5651780115655264, "grad_norm": 1.011123538017273, "learning_rate": 4.504710633123917e-06, "loss": 0.08552742004394531, "step": 4056 }, { "epoch": 0.5653173552567408, "grad_norm": 0.9900496006011963, "learning_rate": 4.502365495976367e-06, "loss": 0.1044626235961914, "step": 4057 }, { "epoch": 0.5654566989479551, "grad_norm": 0.5025486350059509, "learning_rate": 4.5000204693918405e-06, "loss": 0.07134246826171875, "step": 4058 }, { "epoch": 0.5655960426391695, "grad_norm": 0.5970032811164856, "learning_rate": 4.497675553891352e-06, "loss": 0.07650089263916016, "step": 4059 }, { "epoch": 0.5657353863303839, "grad_norm": 0.5941214561462402, "learning_rate": 4.495330749995887e-06, "loss": 0.07473182678222656, "step": 4060 }, { "epoch": 0.5658747300215983, "grad_norm": 0.6228459477424622, "learning_rate": 4.492986058226407e-06, "loss": 0.0717916488647461, "step": 4061 }, { "epoch": 0.5660140737128126, "grad_norm": 0.7170287370681763, "learning_rate": 4.490641479103851e-06, "loss": 0.08894157409667969, "step": 4062 }, { "epoch": 0.566153417404027, "grad_norm": 0.49784791469573975, "learning_rate": 4.4882970131491286e-06, "loss": 0.07367229461669922, "step": 4063 }, { "epoch": 0.5662927610952414, "grad_norm": 1.5187841653823853, "learning_rate": 4.485952660883126e-06, "loss": 0.10422706604003906, "step": 4064 }, { "epoch": 0.5664321047864558, "grad_norm": 0.8119434714317322, "learning_rate": 4.483608422826708e-06, "loss": 0.08253765106201172, "step": 4065 }, { "epoch": 0.5665714484776702, "grad_norm": 1.2690539360046387, "learning_rate": 4.481264299500709e-06, "loss": 0.10512542724609375, "step": 4066 }, { "epoch": 0.5667107921688845, "grad_norm": 0.8745653033256531, "learning_rate": 4.478920291425939e-06, "loss": 0.08842086791992188, "step": 4067 }, { "epoch": 0.5668501358600989, "grad_norm": 0.8624082803726196, "learning_rate": 4.476576399123187e-06, "loss": 0.07071971893310547, "step": 4068 }, { "epoch": 0.5669894795513133, "grad_norm": 0.73952317237854, "learning_rate": 4.474232623113204e-06, "loss": 0.07007026672363281, "step": 4069 }, { "epoch": 0.5671288232425277, "grad_norm": 1.0606229305267334, "learning_rate": 4.471888963916732e-06, "loss": 0.07660484313964844, "step": 4070 }, { "epoch": 0.567268166933742, "grad_norm": 1.5170018672943115, "learning_rate": 4.4695454220544735e-06, "loss": 0.09966373443603516, "step": 4071 }, { "epoch": 0.5674075106249564, "grad_norm": 0.5885374546051025, "learning_rate": 4.467201998047112e-06, "loss": 0.05969953536987305, "step": 4072 }, { "epoch": 0.5675468543161708, "grad_norm": 0.6388907432556152, "learning_rate": 4.464858692415304e-06, "loss": 0.08226442337036133, "step": 4073 }, { "epoch": 0.5676861980073852, "grad_norm": 0.48777368664741516, "learning_rate": 4.462515505679677e-06, "loss": 0.06519126892089844, "step": 4074 }, { "epoch": 0.5678255416985996, "grad_norm": 0.9328823685646057, "learning_rate": 4.460172438360832e-06, "loss": 0.09131240844726562, "step": 4075 }, { "epoch": 0.5679648853898139, "grad_norm": 2.287175178527832, "learning_rate": 4.457829490979347e-06, "loss": 0.10760211944580078, "step": 4076 }, { "epoch": 0.5681042290810283, "grad_norm": 0.9306060671806335, "learning_rate": 4.455486664055772e-06, "loss": 0.07642555236816406, "step": 4077 }, { "epoch": 0.5682435727722427, "grad_norm": 0.9742950201034546, "learning_rate": 4.4531439581106295e-06, "loss": 0.0809488296508789, "step": 4078 }, { "epoch": 0.5683829164634571, "grad_norm": 0.6682515144348145, "learning_rate": 4.450801373664413e-06, "loss": 0.06537532806396484, "step": 4079 }, { "epoch": 0.5685222601546714, "grad_norm": 0.5980101227760315, "learning_rate": 4.448458911237593e-06, "loss": 0.06845808029174805, "step": 4080 }, { "epoch": 0.5686616038458859, "grad_norm": 0.9772241711616516, "learning_rate": 4.446116571350611e-06, "loss": 0.10070133209228516, "step": 4081 }, { "epoch": 0.5688009475371003, "grad_norm": 0.6520940065383911, "learning_rate": 4.443774354523883e-06, "loss": 0.07691001892089844, "step": 4082 }, { "epoch": 0.5689402912283147, "grad_norm": 0.6228557825088501, "learning_rate": 4.441432261277794e-06, "loss": 0.07752704620361328, "step": 4083 }, { "epoch": 0.5690796349195291, "grad_norm": 0.8460066914558411, "learning_rate": 4.4390902921327025e-06, "loss": 0.08970355987548828, "step": 4084 }, { "epoch": 0.5692189786107434, "grad_norm": 0.8367364406585693, "learning_rate": 4.436748447608944e-06, "loss": 0.0828704833984375, "step": 4085 }, { "epoch": 0.5693583223019578, "grad_norm": 0.7570791840553284, "learning_rate": 4.43440672822682e-06, "loss": 0.06601810455322266, "step": 4086 }, { "epoch": 0.5694976659931722, "grad_norm": 0.7503257393836975, "learning_rate": 4.432065134506608e-06, "loss": 0.061865806579589844, "step": 4087 }, { "epoch": 0.5696370096843866, "grad_norm": 0.9253273606300354, "learning_rate": 4.429723666968559e-06, "loss": 0.09763145446777344, "step": 4088 }, { "epoch": 0.569776353375601, "grad_norm": 0.7832478880882263, "learning_rate": 4.427382326132892e-06, "loss": 0.08731842041015625, "step": 4089 }, { "epoch": 0.5699156970668153, "grad_norm": 0.5365923643112183, "learning_rate": 4.425041112519797e-06, "loss": 0.0668649673461914, "step": 4090 }, { "epoch": 0.5700550407580297, "grad_norm": 0.7021584510803223, "learning_rate": 4.42270002664944e-06, "loss": 0.08989906311035156, "step": 4091 }, { "epoch": 0.5701943844492441, "grad_norm": 0.47333821654319763, "learning_rate": 4.4203590690419575e-06, "loss": 0.07175159454345703, "step": 4092 }, { "epoch": 0.5703337281404585, "grad_norm": 0.7009646892547607, "learning_rate": 4.418018240217457e-06, "loss": 0.0866994857788086, "step": 4093 }, { "epoch": 0.5704730718316728, "grad_norm": 1.2927721738815308, "learning_rate": 4.415677540696017e-06, "loss": 0.0802469253540039, "step": 4094 }, { "epoch": 0.5706124155228872, "grad_norm": 0.42417091131210327, "learning_rate": 4.413336970997687e-06, "loss": 0.06031513214111328, "step": 4095 }, { "epoch": 0.5707517592141016, "grad_norm": 0.6015578508377075, "learning_rate": 4.410996531642487e-06, "loss": 0.053946495056152344, "step": 4096 }, { "epoch": 0.570891102905316, "grad_norm": 0.5780800580978394, "learning_rate": 4.408656223150412e-06, "loss": 0.07975870370864868, "step": 4097 }, { "epoch": 0.5710304465965303, "grad_norm": 0.8844369053840637, "learning_rate": 4.406316046041423e-06, "loss": 0.09279346466064453, "step": 4098 }, { "epoch": 0.5711697902877447, "grad_norm": 0.6620111465454102, "learning_rate": 4.4039760008354556e-06, "loss": 0.07703399658203125, "step": 4099 }, { "epoch": 0.5713091339789591, "grad_norm": 1.3581233024597168, "learning_rate": 4.401636088052411e-06, "loss": 0.07130050659179688, "step": 4100 }, { "epoch": 0.5714484776701735, "grad_norm": 0.5926243662834167, "learning_rate": 4.399296308212168e-06, "loss": 0.07805633544921875, "step": 4101 }, { "epoch": 0.5715878213613879, "grad_norm": 0.6442028284072876, "learning_rate": 4.396956661834571e-06, "loss": 0.05859732627868652, "step": 4102 }, { "epoch": 0.5717271650526022, "grad_norm": 0.574529230594635, "learning_rate": 4.394617149439435e-06, "loss": 0.0708017349243164, "step": 4103 }, { "epoch": 0.5718665087438166, "grad_norm": 0.948366105556488, "learning_rate": 4.392277771546549e-06, "loss": 0.07832193374633789, "step": 4104 }, { "epoch": 0.572005852435031, "grad_norm": 0.9991426467895508, "learning_rate": 4.389938528675668e-06, "loss": 0.08933639526367188, "step": 4105 }, { "epoch": 0.5721451961262454, "grad_norm": 1.076090931892395, "learning_rate": 4.387599421346517e-06, "loss": 0.08162403106689453, "step": 4106 }, { "epoch": 0.5722845398174597, "grad_norm": 0.47277238965034485, "learning_rate": 4.385260450078793e-06, "loss": 0.06894111633300781, "step": 4107 }, { "epoch": 0.5724238835086741, "grad_norm": 0.6842625141143799, "learning_rate": 4.382921615392162e-06, "loss": 0.0739893913269043, "step": 4108 }, { "epoch": 0.5725632271998885, "grad_norm": 0.9853471517562866, "learning_rate": 4.38058291780626e-06, "loss": 0.0934743881225586, "step": 4109 }, { "epoch": 0.5727025708911029, "grad_norm": 0.49117934703826904, "learning_rate": 4.378244357840694e-06, "loss": 0.07251739501953125, "step": 4110 }, { "epoch": 0.5728419145823173, "grad_norm": 0.5267102718353271, "learning_rate": 4.375905936015035e-06, "loss": 0.07912635803222656, "step": 4111 }, { "epoch": 0.5729812582735316, "grad_norm": 0.5701683163642883, "learning_rate": 4.373567652848828e-06, "loss": 0.056891441345214844, "step": 4112 }, { "epoch": 0.573120601964746, "grad_norm": 0.6217593550682068, "learning_rate": 4.371229508861588e-06, "loss": 0.05257129669189453, "step": 4113 }, { "epoch": 0.5732599456559604, "grad_norm": 0.6254033446311951, "learning_rate": 4.368891504572796e-06, "loss": 0.08993339538574219, "step": 4114 }, { "epoch": 0.5733992893471748, "grad_norm": 0.6501300930976868, "learning_rate": 4.3665536405019045e-06, "loss": 0.08038806915283203, "step": 4115 }, { "epoch": 0.5735386330383891, "grad_norm": 1.4395273923873901, "learning_rate": 4.36421591716833e-06, "loss": 0.1043548583984375, "step": 4116 }, { "epoch": 0.5736779767296035, "grad_norm": 1.4268866777420044, "learning_rate": 4.361878335091464e-06, "loss": 0.09238815307617188, "step": 4117 }, { "epoch": 0.5738173204208179, "grad_norm": 0.4684198200702667, "learning_rate": 4.3595408947906644e-06, "loss": 0.07498741149902344, "step": 4118 }, { "epoch": 0.5739566641120323, "grad_norm": 0.47412803769111633, "learning_rate": 4.357203596785254e-06, "loss": 0.07130956649780273, "step": 4119 }, { "epoch": 0.5740960078032467, "grad_norm": 1.0493115186691284, "learning_rate": 4.3548664415945326e-06, "loss": 0.09241676330566406, "step": 4120 }, { "epoch": 0.5742353514944611, "grad_norm": 1.049464464187622, "learning_rate": 4.3525294297377566e-06, "loss": 0.09272193908691406, "step": 4121 }, { "epoch": 0.5743746951856755, "grad_norm": 0.939888596534729, "learning_rate": 4.35019256173416e-06, "loss": 0.08123970031738281, "step": 4122 }, { "epoch": 0.5745140388768899, "grad_norm": 0.6221691966056824, "learning_rate": 4.34785583810294e-06, "loss": 0.07179450988769531, "step": 4123 }, { "epoch": 0.5746533825681043, "grad_norm": 0.5012480020523071, "learning_rate": 4.345519259363264e-06, "loss": 0.06549656391143799, "step": 4124 }, { "epoch": 0.5747927262593187, "grad_norm": 0.6298246383666992, "learning_rate": 4.343182826034268e-06, "loss": 0.0625467300415039, "step": 4125 }, { "epoch": 0.574932069950533, "grad_norm": 0.5734561681747437, "learning_rate": 4.340846538635053e-06, "loss": 0.06762290000915527, "step": 4126 }, { "epoch": 0.5750714136417474, "grad_norm": 0.5664179921150208, "learning_rate": 4.338510397684687e-06, "loss": 0.07481765747070312, "step": 4127 }, { "epoch": 0.5752107573329618, "grad_norm": 0.6893370747566223, "learning_rate": 4.336174403702208e-06, "loss": 0.06233406066894531, "step": 4128 }, { "epoch": 0.5753501010241762, "grad_norm": 0.8374460935592651, "learning_rate": 4.333838557206623e-06, "loss": 0.07561779022216797, "step": 4129 }, { "epoch": 0.5754894447153905, "grad_norm": 0.9308785200119019, "learning_rate": 4.3315028587169e-06, "loss": 0.0886688232421875, "step": 4130 }, { "epoch": 0.5756287884066049, "grad_norm": 0.518459677696228, "learning_rate": 4.329167308751982e-06, "loss": 0.06969261169433594, "step": 4131 }, { "epoch": 0.5757681320978193, "grad_norm": 0.506443202495575, "learning_rate": 4.3268319078307695e-06, "loss": 0.06453514099121094, "step": 4132 }, { "epoch": 0.5759074757890337, "grad_norm": 0.6815585494041443, "learning_rate": 4.324496656472141e-06, "loss": 0.07830333709716797, "step": 4133 }, { "epoch": 0.576046819480248, "grad_norm": 0.8456711173057556, "learning_rate": 4.322161555194932e-06, "loss": 0.07631778717041016, "step": 4134 }, { "epoch": 0.5761861631714624, "grad_norm": 0.5514520406723022, "learning_rate": 4.31982660451795e-06, "loss": 0.06778907775878906, "step": 4135 }, { "epoch": 0.5763255068626768, "grad_norm": 0.7982619404792786, "learning_rate": 4.3174918049599705e-06, "loss": 0.0850229263305664, "step": 4136 }, { "epoch": 0.5764648505538912, "grad_norm": 0.7711899876594543, "learning_rate": 4.315157157039727e-06, "loss": 0.0733499526977539, "step": 4137 }, { "epoch": 0.5766041942451056, "grad_norm": 0.7743672132492065, "learning_rate": 4.312822661275929e-06, "loss": 0.06987380981445312, "step": 4138 }, { "epoch": 0.5767435379363199, "grad_norm": 0.7088069915771484, "learning_rate": 4.310488318187247e-06, "loss": 0.0788111686706543, "step": 4139 }, { "epoch": 0.5768828816275343, "grad_norm": 0.6555392146110535, "learning_rate": 4.308154128292318e-06, "loss": 0.06904983520507812, "step": 4140 }, { "epoch": 0.5770222253187487, "grad_norm": 0.741001546382904, "learning_rate": 4.305820092109748e-06, "loss": 0.05999040603637695, "step": 4141 }, { "epoch": 0.5771615690099631, "grad_norm": 1.4897968769073486, "learning_rate": 4.303486210158106e-06, "loss": 0.11356449127197266, "step": 4142 }, { "epoch": 0.5773009127011774, "grad_norm": 1.0123192071914673, "learning_rate": 4.301152482955926e-06, "loss": 0.1041860580444336, "step": 4143 }, { "epoch": 0.5774402563923918, "grad_norm": 0.49896326661109924, "learning_rate": 4.298818911021707e-06, "loss": 0.07794380187988281, "step": 4144 }, { "epoch": 0.5775796000836062, "grad_norm": 0.767411470413208, "learning_rate": 4.296485494873919e-06, "loss": 0.07096481323242188, "step": 4145 }, { "epoch": 0.5777189437748206, "grad_norm": 0.45547881722450256, "learning_rate": 4.294152235030993e-06, "loss": 0.06637763977050781, "step": 4146 }, { "epoch": 0.577858287466035, "grad_norm": 0.4284384250640869, "learning_rate": 4.291819132011327e-06, "loss": 0.06866836547851562, "step": 4147 }, { "epoch": 0.5779976311572493, "grad_norm": 1.1018221378326416, "learning_rate": 4.2894861863332785e-06, "loss": 0.07953119277954102, "step": 4148 }, { "epoch": 0.5781369748484637, "grad_norm": 0.79856938123703, "learning_rate": 4.28715339851518e-06, "loss": 0.10440826416015625, "step": 4149 }, { "epoch": 0.5782763185396781, "grad_norm": 0.7556654810905457, "learning_rate": 4.284820769075322e-06, "loss": 0.09497261047363281, "step": 4150 }, { "epoch": 0.5784156622308925, "grad_norm": 0.7027143239974976, "learning_rate": 4.282488298531959e-06, "loss": 0.060721397399902344, "step": 4151 }, { "epoch": 0.5785550059221068, "grad_norm": 0.5243555903434753, "learning_rate": 4.28015598740332e-06, "loss": 0.06838417053222656, "step": 4152 }, { "epoch": 0.5786943496133212, "grad_norm": 0.5672584772109985, "learning_rate": 4.277823836207581e-06, "loss": 0.06927776336669922, "step": 4153 }, { "epoch": 0.5788336933045356, "grad_norm": 0.7767509818077087, "learning_rate": 4.275491845462901e-06, "loss": 0.0780935287475586, "step": 4154 }, { "epoch": 0.57897303699575, "grad_norm": 0.8436468839645386, "learning_rate": 4.27316001568739e-06, "loss": 0.09742355346679688, "step": 4155 }, { "epoch": 0.5791123806869644, "grad_norm": 1.2256892919540405, "learning_rate": 4.270828347399131e-06, "loss": 0.09046792984008789, "step": 4156 }, { "epoch": 0.5792517243781787, "grad_norm": 0.9253489375114441, "learning_rate": 4.268496841116166e-06, "loss": 0.08886146545410156, "step": 4157 }, { "epoch": 0.5793910680693931, "grad_norm": 0.561612606048584, "learning_rate": 4.266165497356503e-06, "loss": 0.06868648529052734, "step": 4158 }, { "epoch": 0.5795304117606075, "grad_norm": 0.9971222877502441, "learning_rate": 4.2638343166381115e-06, "loss": 0.07209014892578125, "step": 4159 }, { "epoch": 0.5796697554518219, "grad_norm": 0.4760473668575287, "learning_rate": 4.261503299478928e-06, "loss": 0.06011009216308594, "step": 4160 }, { "epoch": 0.5798090991430362, "grad_norm": 0.6057067513465881, "learning_rate": 4.259172446396851e-06, "loss": 0.05569171905517578, "step": 4161 }, { "epoch": 0.5799484428342507, "grad_norm": 0.5321839451789856, "learning_rate": 4.256841757909744e-06, "loss": 0.07131195068359375, "step": 4162 }, { "epoch": 0.5800877865254651, "grad_norm": 0.6719017624855042, "learning_rate": 4.254511234535432e-06, "loss": 0.07353019714355469, "step": 4163 }, { "epoch": 0.5802271302166795, "grad_norm": 0.9240105152130127, "learning_rate": 4.2521808767917024e-06, "loss": 0.0673074722290039, "step": 4164 }, { "epoch": 0.5803664739078939, "grad_norm": 1.0608954429626465, "learning_rate": 4.2498506851963095e-06, "loss": 0.08842658996582031, "step": 4165 }, { "epoch": 0.5805058175991082, "grad_norm": 0.848332941532135, "learning_rate": 4.247520660266969e-06, "loss": 0.08180046081542969, "step": 4166 }, { "epoch": 0.5806451612903226, "grad_norm": 0.9293779134750366, "learning_rate": 4.245190802521356e-06, "loss": 0.08084344863891602, "step": 4167 }, { "epoch": 0.580784504981537, "grad_norm": 1.0594427585601807, "learning_rate": 4.2428611124771184e-06, "loss": 0.07487106323242188, "step": 4168 }, { "epoch": 0.5809238486727514, "grad_norm": 0.9799529314041138, "learning_rate": 4.240531590651853e-06, "loss": 0.07906723022460938, "step": 4169 }, { "epoch": 0.5810631923639658, "grad_norm": 0.7650821208953857, "learning_rate": 4.238202237563129e-06, "loss": 0.08136177062988281, "step": 4170 }, { "epoch": 0.5812025360551801, "grad_norm": 0.7627467513084412, "learning_rate": 4.235873053728475e-06, "loss": 0.08106613159179688, "step": 4171 }, { "epoch": 0.5813418797463945, "grad_norm": 1.530131459236145, "learning_rate": 4.233544039665385e-06, "loss": 0.09929466247558594, "step": 4172 }, { "epoch": 0.5814812234376089, "grad_norm": 0.7414228916168213, "learning_rate": 4.231215195891311e-06, "loss": 0.08157825469970703, "step": 4173 }, { "epoch": 0.5816205671288233, "grad_norm": 0.6383551955223083, "learning_rate": 4.228886522923668e-06, "loss": 0.07488083839416504, "step": 4174 }, { "epoch": 0.5817599108200376, "grad_norm": 0.5463244915008545, "learning_rate": 4.2265580212798355e-06, "loss": 0.06817150115966797, "step": 4175 }, { "epoch": 0.581899254511252, "grad_norm": 1.033147931098938, "learning_rate": 4.224229691477151e-06, "loss": 0.08031558990478516, "step": 4176 }, { "epoch": 0.5820385982024664, "grad_norm": 0.49356555938720703, "learning_rate": 4.221901534032918e-06, "loss": 0.06698083877563477, "step": 4177 }, { "epoch": 0.5821779418936808, "grad_norm": 0.6876826286315918, "learning_rate": 4.219573549464403e-06, "loss": 0.06875038146972656, "step": 4178 }, { "epoch": 0.5823172855848952, "grad_norm": 0.41303908824920654, "learning_rate": 4.217245738288825e-06, "loss": 0.05712318420410156, "step": 4179 }, { "epoch": 0.5824566292761095, "grad_norm": 0.7776801586151123, "learning_rate": 4.2149181010233734e-06, "loss": 0.07965946197509766, "step": 4180 }, { "epoch": 0.5825959729673239, "grad_norm": 0.5647065043449402, "learning_rate": 4.212590638185196e-06, "loss": 0.06144893169403076, "step": 4181 }, { "epoch": 0.5827353166585383, "grad_norm": 0.5217100381851196, "learning_rate": 4.2102633502914035e-06, "loss": 0.05893135070800781, "step": 4182 }, { "epoch": 0.5828746603497527, "grad_norm": 0.8373509049415588, "learning_rate": 4.2079362378590625e-06, "loss": 0.07753276824951172, "step": 4183 }, { "epoch": 0.583014004040967, "grad_norm": 0.7384604215621948, "learning_rate": 4.2056093014052085e-06, "loss": 0.07039737701416016, "step": 4184 }, { "epoch": 0.5831533477321814, "grad_norm": 0.4525028169155121, "learning_rate": 4.20328254144683e-06, "loss": 0.05319857597351074, "step": 4185 }, { "epoch": 0.5832926914233958, "grad_norm": 0.7318892478942871, "learning_rate": 4.2009559585008826e-06, "loss": 0.09201908111572266, "step": 4186 }, { "epoch": 0.5834320351146102, "grad_norm": 0.915611207485199, "learning_rate": 4.198629553084277e-06, "loss": 0.08432722091674805, "step": 4187 }, { "epoch": 0.5835713788058245, "grad_norm": 0.6756796836853027, "learning_rate": 4.1963033257138904e-06, "loss": 0.0697641372680664, "step": 4188 }, { "epoch": 0.5837107224970389, "grad_norm": 0.7324190139770508, "learning_rate": 4.193977276906557e-06, "loss": 0.072174072265625, "step": 4189 }, { "epoch": 0.5838500661882533, "grad_norm": 0.41446653008461, "learning_rate": 4.191651407179069e-06, "loss": 0.05839252471923828, "step": 4190 }, { "epoch": 0.5839894098794677, "grad_norm": 0.8912923336029053, "learning_rate": 4.189325717048185e-06, "loss": 0.09493398666381836, "step": 4191 }, { "epoch": 0.5841287535706821, "grad_norm": 1.2692906856536865, "learning_rate": 4.187000207030616e-06, "loss": 0.0911874771118164, "step": 4192 }, { "epoch": 0.5842680972618964, "grad_norm": 0.8227731585502625, "learning_rate": 4.184674877643042e-06, "loss": 0.0860891342163086, "step": 4193 }, { "epoch": 0.5844074409531108, "grad_norm": 0.713229238986969, "learning_rate": 4.182349729402097e-06, "loss": 0.08785438537597656, "step": 4194 }, { "epoch": 0.5845467846443252, "grad_norm": 1.841418981552124, "learning_rate": 4.180024762824374e-06, "loss": 0.11074447631835938, "step": 4195 }, { "epoch": 0.5846861283355396, "grad_norm": 0.5364300012588501, "learning_rate": 4.177699978426426e-06, "loss": 0.0670323371887207, "step": 4196 }, { "epoch": 0.584825472026754, "grad_norm": 0.9711835384368896, "learning_rate": 4.175375376724772e-06, "loss": 0.0867624282836914, "step": 4197 }, { "epoch": 0.5849648157179683, "grad_norm": 0.9633090496063232, "learning_rate": 4.173050958235882e-06, "loss": 0.07303619384765625, "step": 4198 }, { "epoch": 0.5851041594091827, "grad_norm": 0.8985379934310913, "learning_rate": 4.170726723476189e-06, "loss": 0.08555793762207031, "step": 4199 }, { "epoch": 0.5852435031003971, "grad_norm": 0.8210683465003967, "learning_rate": 4.168402672962086e-06, "loss": 0.06752967834472656, "step": 4200 }, { "epoch": 0.5853828467916115, "grad_norm": 0.7402666211128235, "learning_rate": 4.166078807209924e-06, "loss": 0.0674448013305664, "step": 4201 }, { "epoch": 0.5855221904828259, "grad_norm": 0.9581471085548401, "learning_rate": 4.163755126736011e-06, "loss": 0.08420181274414062, "step": 4202 }, { "epoch": 0.5856615341740403, "grad_norm": 0.8258485794067383, "learning_rate": 4.1614316320566174e-06, "loss": 0.06894826889038086, "step": 4203 }, { "epoch": 0.5858008778652547, "grad_norm": 0.6859636306762695, "learning_rate": 4.159108323687971e-06, "loss": 0.09189510345458984, "step": 4204 }, { "epoch": 0.5859402215564691, "grad_norm": 0.621566653251648, "learning_rate": 4.156785202146257e-06, "loss": 0.08650398254394531, "step": 4205 }, { "epoch": 0.5860795652476835, "grad_norm": 1.160341501235962, "learning_rate": 4.154462267947621e-06, "loss": 0.07793951034545898, "step": 4206 }, { "epoch": 0.5862189089388978, "grad_norm": 0.7911876440048218, "learning_rate": 4.152139521608164e-06, "loss": 0.07169055938720703, "step": 4207 }, { "epoch": 0.5863582526301122, "grad_norm": 0.8050450682640076, "learning_rate": 4.149816963643947e-06, "loss": 0.06428337097167969, "step": 4208 }, { "epoch": 0.5864975963213266, "grad_norm": 0.6210260391235352, "learning_rate": 4.147494594570992e-06, "loss": 0.06220817565917969, "step": 4209 }, { "epoch": 0.586636940012541, "grad_norm": 1.1365793943405151, "learning_rate": 4.1451724149052764e-06, "loss": 0.1030874252319336, "step": 4210 }, { "epoch": 0.5867762837037553, "grad_norm": 0.6044105887413025, "learning_rate": 4.1428504251627335e-06, "loss": 0.0661325454711914, "step": 4211 }, { "epoch": 0.5869156273949697, "grad_norm": 0.5227201581001282, "learning_rate": 4.140528625859254e-06, "loss": 0.07531356811523438, "step": 4212 }, { "epoch": 0.5870549710861841, "grad_norm": 0.7634769082069397, "learning_rate": 4.138207017510696e-06, "loss": 0.07043600082397461, "step": 4213 }, { "epoch": 0.5871943147773985, "grad_norm": 0.6513663530349731, "learning_rate": 4.1358856006328614e-06, "loss": 0.0736546516418457, "step": 4214 }, { "epoch": 0.5873336584686129, "grad_norm": 0.9584241509437561, "learning_rate": 4.1335643757415195e-06, "loss": 0.09006023406982422, "step": 4215 }, { "epoch": 0.5874730021598272, "grad_norm": 0.5462920665740967, "learning_rate": 4.131243343352391e-06, "loss": 0.058976173400878906, "step": 4216 }, { "epoch": 0.5876123458510416, "grad_norm": 0.6119789481163025, "learning_rate": 4.128922503981158e-06, "loss": 0.06449222564697266, "step": 4217 }, { "epoch": 0.587751689542256, "grad_norm": 0.7456467151641846, "learning_rate": 4.126601858143457e-06, "loss": 0.08473214507102966, "step": 4218 }, { "epoch": 0.5878910332334704, "grad_norm": 0.8393121361732483, "learning_rate": 4.124281406354883e-06, "loss": 0.07620811462402344, "step": 4219 }, { "epoch": 0.5880303769246847, "grad_norm": 0.7189034819602966, "learning_rate": 4.121961149130989e-06, "loss": 0.09409904479980469, "step": 4220 }, { "epoch": 0.5881697206158991, "grad_norm": 0.8392673134803772, "learning_rate": 4.119641086987282e-06, "loss": 0.08198928833007812, "step": 4221 }, { "epoch": 0.5883090643071135, "grad_norm": 0.8345174193382263, "learning_rate": 4.1173212204392245e-06, "loss": 0.07094860076904297, "step": 4222 }, { "epoch": 0.5884484079983279, "grad_norm": 0.9583436250686646, "learning_rate": 4.115001550002241e-06, "loss": 0.06475973129272461, "step": 4223 }, { "epoch": 0.5885877516895422, "grad_norm": 0.9182549715042114, "learning_rate": 4.1126820761917075e-06, "loss": 0.07564163208007812, "step": 4224 }, { "epoch": 0.5887270953807566, "grad_norm": 1.4658360481262207, "learning_rate": 4.11036279952296e-06, "loss": 0.08757877349853516, "step": 4225 }, { "epoch": 0.588866439071971, "grad_norm": 0.978757917881012, "learning_rate": 4.108043720511287e-06, "loss": 0.0677485466003418, "step": 4226 }, { "epoch": 0.5890057827631854, "grad_norm": 0.7238304018974304, "learning_rate": 4.105724839671936e-06, "loss": 0.07394218444824219, "step": 4227 }, { "epoch": 0.5891451264543998, "grad_norm": 0.6296975016593933, "learning_rate": 4.103406157520108e-06, "loss": 0.068328857421875, "step": 4228 }, { "epoch": 0.5892844701456141, "grad_norm": 0.912176787853241, "learning_rate": 4.101087674570963e-06, "loss": 0.0801839828491211, "step": 4229 }, { "epoch": 0.5894238138368285, "grad_norm": 1.0438082218170166, "learning_rate": 4.0987693913396145e-06, "loss": 0.11116218566894531, "step": 4230 }, { "epoch": 0.5895631575280429, "grad_norm": 1.1442371606826782, "learning_rate": 4.096451308341132e-06, "loss": 0.08459949493408203, "step": 4231 }, { "epoch": 0.5897025012192573, "grad_norm": 0.6314463019371033, "learning_rate": 4.094133426090539e-06, "loss": 0.07101297378540039, "step": 4232 }, { "epoch": 0.5898418449104716, "grad_norm": 0.548953115940094, "learning_rate": 4.091815745102818e-06, "loss": 0.06730461120605469, "step": 4233 }, { "epoch": 0.589981188601686, "grad_norm": 0.6757421493530273, "learning_rate": 4.089498265892905e-06, "loss": 0.09356498718261719, "step": 4234 }, { "epoch": 0.5901205322929004, "grad_norm": 0.8188360929489136, "learning_rate": 4.0871809889756884e-06, "loss": 0.06249427795410156, "step": 4235 }, { "epoch": 0.5902598759841148, "grad_norm": 0.7324691414833069, "learning_rate": 4.084863914866018e-06, "loss": 0.09411764144897461, "step": 4236 }, { "epoch": 0.5903992196753292, "grad_norm": 0.6569265127182007, "learning_rate": 4.082547044078693e-06, "loss": 0.07280731201171875, "step": 4237 }, { "epoch": 0.5905385633665435, "grad_norm": 0.5939230918884277, "learning_rate": 4.0802303771284685e-06, "loss": 0.07048416137695312, "step": 4238 }, { "epoch": 0.5906779070577579, "grad_norm": 0.8853437304496765, "learning_rate": 4.0779139145300536e-06, "loss": 0.06929397583007812, "step": 4239 }, { "epoch": 0.5908172507489723, "grad_norm": 0.7705318927764893, "learning_rate": 4.075597656798117e-06, "loss": 0.07127952575683594, "step": 4240 }, { "epoch": 0.5909565944401867, "grad_norm": 0.4872743785381317, "learning_rate": 4.073281604447277e-06, "loss": 0.06574630737304688, "step": 4241 }, { "epoch": 0.5910959381314012, "grad_norm": 1.560097336769104, "learning_rate": 4.0709657579921075e-06, "loss": 0.10626411437988281, "step": 4242 }, { "epoch": 0.5912352818226155, "grad_norm": 0.864122211933136, "learning_rate": 4.068650117947135e-06, "loss": 0.08549642562866211, "step": 4243 }, { "epoch": 0.5913746255138299, "grad_norm": 0.7326685190200806, "learning_rate": 4.0663346848268435e-06, "loss": 0.08658885955810547, "step": 4244 }, { "epoch": 0.5915139692050443, "grad_norm": 0.7304664850234985, "learning_rate": 4.064019459145669e-06, "loss": 0.07420635223388672, "step": 4245 }, { "epoch": 0.5916533128962587, "grad_norm": 0.5800800323486328, "learning_rate": 4.061704441418002e-06, "loss": 0.07705163955688477, "step": 4246 }, { "epoch": 0.591792656587473, "grad_norm": 0.7664672136306763, "learning_rate": 4.059389632158189e-06, "loss": 0.07551765441894531, "step": 4247 }, { "epoch": 0.5919320002786874, "grad_norm": 0.9422216415405273, "learning_rate": 4.057075031880521e-06, "loss": 0.08427047729492188, "step": 4248 }, { "epoch": 0.5920713439699018, "grad_norm": 0.5336235761642456, "learning_rate": 4.054760641099256e-06, "loss": 0.0743246078491211, "step": 4249 }, { "epoch": 0.5922106876611162, "grad_norm": 0.7252978682518005, "learning_rate": 4.052446460328595e-06, "loss": 0.08340263366699219, "step": 4250 }, { "epoch": 0.5923500313523306, "grad_norm": 0.7032591104507446, "learning_rate": 4.050132490082698e-06, "loss": 0.08006954193115234, "step": 4251 }, { "epoch": 0.5924893750435449, "grad_norm": 0.4457019865512848, "learning_rate": 4.0478187308756775e-06, "loss": 0.06650733947753906, "step": 4252 }, { "epoch": 0.5926287187347593, "grad_norm": 0.8756943345069885, "learning_rate": 4.045505183221594e-06, "loss": 0.09003448486328125, "step": 4253 }, { "epoch": 0.5927680624259737, "grad_norm": 0.5203773379325867, "learning_rate": 4.043191847634469e-06, "loss": 0.06136655807495117, "step": 4254 }, { "epoch": 0.5929074061171881, "grad_norm": 1.4969077110290527, "learning_rate": 4.040878724628269e-06, "loss": 0.11285209655761719, "step": 4255 }, { "epoch": 0.5930467498084024, "grad_norm": 0.6639319658279419, "learning_rate": 4.038565814716921e-06, "loss": 0.0740213394165039, "step": 4256 }, { "epoch": 0.5931860934996168, "grad_norm": 1.2510398626327515, "learning_rate": 4.036253118414299e-06, "loss": 0.1054682731628418, "step": 4257 }, { "epoch": 0.5933254371908312, "grad_norm": 0.8715949058532715, "learning_rate": 4.033940636234233e-06, "loss": 0.07158184051513672, "step": 4258 }, { "epoch": 0.5934647808820456, "grad_norm": 1.4152380228042603, "learning_rate": 4.0316283686905e-06, "loss": 0.07763671875, "step": 4259 }, { "epoch": 0.59360412457326, "grad_norm": 0.790739119052887, "learning_rate": 4.029316316296834e-06, "loss": 0.08949947357177734, "step": 4260 }, { "epoch": 0.5937434682644743, "grad_norm": 0.5640100836753845, "learning_rate": 4.027004479566923e-06, "loss": 0.06564474105834961, "step": 4261 }, { "epoch": 0.5938828119556887, "grad_norm": 0.7507496476173401, "learning_rate": 4.024692859014403e-06, "loss": 0.08649063110351562, "step": 4262 }, { "epoch": 0.5940221556469031, "grad_norm": 0.5275675058364868, "learning_rate": 4.022381455152863e-06, "loss": 0.059238433837890625, "step": 4263 }, { "epoch": 0.5941614993381175, "grad_norm": 0.8127150535583496, "learning_rate": 4.020070268495844e-06, "loss": 0.08974266052246094, "step": 4264 }, { "epoch": 0.5943008430293318, "grad_norm": 0.9673686623573303, "learning_rate": 4.017759299556838e-06, "loss": 0.07238054275512695, "step": 4265 }, { "epoch": 0.5944401867205462, "grad_norm": 0.7607194781303406, "learning_rate": 4.015448548849293e-06, "loss": 0.07352256774902344, "step": 4266 }, { "epoch": 0.5945795304117606, "grad_norm": 0.7041110396385193, "learning_rate": 4.0131380168866e-06, "loss": 0.07568550109863281, "step": 4267 }, { "epoch": 0.594718874102975, "grad_norm": 0.6898688673973083, "learning_rate": 4.010827704182113e-06, "loss": 0.07300758361816406, "step": 4268 }, { "epoch": 0.5948582177941893, "grad_norm": 0.752535343170166, "learning_rate": 4.0085176112491245e-06, "loss": 0.06504535675048828, "step": 4269 }, { "epoch": 0.5949975614854037, "grad_norm": 0.5806765556335449, "learning_rate": 4.006207738600887e-06, "loss": 0.060260772705078125, "step": 4270 }, { "epoch": 0.5951369051766181, "grad_norm": 0.5175461173057556, "learning_rate": 4.0038980867506e-06, "loss": 0.07467460632324219, "step": 4271 }, { "epoch": 0.5952762488678325, "grad_norm": 0.7443585991859436, "learning_rate": 4.001588656211418e-06, "loss": 0.07384395599365234, "step": 4272 }, { "epoch": 0.5954155925590469, "grad_norm": 0.7025538086891174, "learning_rate": 3.999279447496444e-06, "loss": 0.07738876342773438, "step": 4273 }, { "epoch": 0.5955549362502612, "grad_norm": 0.4805254340171814, "learning_rate": 3.996970461118729e-06, "loss": 0.062305450439453125, "step": 4274 }, { "epoch": 0.5956942799414756, "grad_norm": 0.6508928537368774, "learning_rate": 3.994661697591278e-06, "loss": 0.07395362854003906, "step": 4275 }, { "epoch": 0.59583362363269, "grad_norm": 0.8314549922943115, "learning_rate": 3.992353157427044e-06, "loss": 0.07835865020751953, "step": 4276 }, { "epoch": 0.5959729673239044, "grad_norm": 0.6537046432495117, "learning_rate": 3.990044841138934e-06, "loss": 0.07371902465820312, "step": 4277 }, { "epoch": 0.5961123110151187, "grad_norm": 0.9314897060394287, "learning_rate": 3.987736749239804e-06, "loss": 0.10262584686279297, "step": 4278 }, { "epoch": 0.5962516547063331, "grad_norm": 0.6704654693603516, "learning_rate": 3.985428882242458e-06, "loss": 0.09258842468261719, "step": 4279 }, { "epoch": 0.5963909983975475, "grad_norm": 1.1296793222427368, "learning_rate": 3.983121240659649e-06, "loss": 0.09641695022583008, "step": 4280 }, { "epoch": 0.5965303420887619, "grad_norm": 0.7931644916534424, "learning_rate": 3.980813825004086e-06, "loss": 0.08053445816040039, "step": 4281 }, { "epoch": 0.5966696857799764, "grad_norm": 0.7069551944732666, "learning_rate": 3.978506635788423e-06, "loss": 0.08197784423828125, "step": 4282 }, { "epoch": 0.5968090294711907, "grad_norm": 0.6841185688972473, "learning_rate": 3.976199673525263e-06, "loss": 0.09083938598632812, "step": 4283 }, { "epoch": 0.5969483731624051, "grad_norm": 0.8029910326004028, "learning_rate": 3.973892938727164e-06, "loss": 0.07769584655761719, "step": 4284 }, { "epoch": 0.5970877168536195, "grad_norm": 0.6230928301811218, "learning_rate": 3.971586431906627e-06, "loss": 0.08356952667236328, "step": 4285 }, { "epoch": 0.5972270605448339, "grad_norm": 0.5974656343460083, "learning_rate": 3.969280153576105e-06, "loss": 0.07249975204467773, "step": 4286 }, { "epoch": 0.5973664042360483, "grad_norm": 0.9101149439811707, "learning_rate": 3.966974104248001e-06, "loss": 0.07880020141601562, "step": 4287 }, { "epoch": 0.5975057479272626, "grad_norm": 0.586671769618988, "learning_rate": 3.964668284434666e-06, "loss": 0.0776529312133789, "step": 4288 }, { "epoch": 0.597645091618477, "grad_norm": 0.6531924605369568, "learning_rate": 3.962362694648404e-06, "loss": 0.0821533203125, "step": 4289 }, { "epoch": 0.5977844353096914, "grad_norm": 0.5595383644104004, "learning_rate": 3.960057335401459e-06, "loss": 0.06534576416015625, "step": 4290 }, { "epoch": 0.5979237790009058, "grad_norm": 0.7021380066871643, "learning_rate": 3.9577522072060336e-06, "loss": 0.07387256622314453, "step": 4291 }, { "epoch": 0.5980631226921201, "grad_norm": 1.260929822921753, "learning_rate": 3.95544731057427e-06, "loss": 0.09919071197509766, "step": 4292 }, { "epoch": 0.5982024663833345, "grad_norm": 0.6312010884284973, "learning_rate": 3.953142646018269e-06, "loss": 0.06567764282226562, "step": 4293 }, { "epoch": 0.5983418100745489, "grad_norm": 0.6414533257484436, "learning_rate": 3.95083821405007e-06, "loss": 0.07694816589355469, "step": 4294 }, { "epoch": 0.5984811537657633, "grad_norm": 1.0088403224945068, "learning_rate": 3.948534015181671e-06, "loss": 0.08763694763183594, "step": 4295 }, { "epoch": 0.5986204974569777, "grad_norm": 0.6692929267883301, "learning_rate": 3.946230049925004e-06, "loss": 0.06441116333007812, "step": 4296 }, { "epoch": 0.598759841148192, "grad_norm": 0.8394016027450562, "learning_rate": 3.9439263187919635e-06, "loss": 0.07421684265136719, "step": 4297 }, { "epoch": 0.5988991848394064, "grad_norm": 0.7968789339065552, "learning_rate": 3.941622822294385e-06, "loss": 0.07713985443115234, "step": 4298 }, { "epoch": 0.5990385285306208, "grad_norm": 0.8227296471595764, "learning_rate": 3.939319560944051e-06, "loss": 0.0779581069946289, "step": 4299 }, { "epoch": 0.5991778722218352, "grad_norm": 0.6486475467681885, "learning_rate": 3.937016535252696e-06, "loss": 0.06167197227478027, "step": 4300 }, { "epoch": 0.5993172159130495, "grad_norm": 0.7296062707901001, "learning_rate": 3.934713745731998e-06, "loss": 0.07601070404052734, "step": 4301 }, { "epoch": 0.5994565596042639, "grad_norm": 0.5933747291564941, "learning_rate": 3.932411192893586e-06, "loss": 0.06582832336425781, "step": 4302 }, { "epoch": 0.5995959032954783, "grad_norm": 0.574333667755127, "learning_rate": 3.93010887724903e-06, "loss": 0.06886529922485352, "step": 4303 }, { "epoch": 0.5997352469866927, "grad_norm": 0.7184052467346191, "learning_rate": 3.927806799309859e-06, "loss": 0.0795602798461914, "step": 4304 }, { "epoch": 0.599874590677907, "grad_norm": 0.5136051774024963, "learning_rate": 3.925504959587538e-06, "loss": 0.06239074468612671, "step": 4305 }, { "epoch": 0.6000139343691214, "grad_norm": 0.9271430969238281, "learning_rate": 3.9232033585934835e-06, "loss": 0.07778263092041016, "step": 4306 }, { "epoch": 0.6001532780603358, "grad_norm": 0.8494870066642761, "learning_rate": 3.920901996839059e-06, "loss": 0.08123445510864258, "step": 4307 }, { "epoch": 0.6002926217515502, "grad_norm": 0.7923383116722107, "learning_rate": 3.918600874835573e-06, "loss": 0.09552955627441406, "step": 4308 }, { "epoch": 0.6004319654427646, "grad_norm": 0.9074344635009766, "learning_rate": 3.916299993094285e-06, "loss": 0.0933833122253418, "step": 4309 }, { "epoch": 0.6005713091339789, "grad_norm": 0.573043704032898, "learning_rate": 3.913999352126399e-06, "loss": 0.07124710083007812, "step": 4310 }, { "epoch": 0.6007106528251933, "grad_norm": 0.8818239569664001, "learning_rate": 3.9116989524430615e-06, "loss": 0.10512685775756836, "step": 4311 }, { "epoch": 0.6008499965164077, "grad_norm": 0.6101093292236328, "learning_rate": 3.90939879455537e-06, "loss": 0.07342815399169922, "step": 4312 }, { "epoch": 0.6009893402076221, "grad_norm": 0.61506187915802, "learning_rate": 3.907098878974367e-06, "loss": 0.06854820251464844, "step": 4313 }, { "epoch": 0.6011286838988364, "grad_norm": 0.9944456815719604, "learning_rate": 3.9047992062110435e-06, "loss": 0.0864405632019043, "step": 4314 }, { "epoch": 0.6012680275900508, "grad_norm": 0.6332154870033264, "learning_rate": 3.902499776776331e-06, "loss": 0.07474136352539062, "step": 4315 }, { "epoch": 0.6014073712812652, "grad_norm": 0.6759952902793884, "learning_rate": 3.900200591181114e-06, "loss": 0.08503532409667969, "step": 4316 }, { "epoch": 0.6015467149724796, "grad_norm": 0.4836839735507965, "learning_rate": 3.897901649936215e-06, "loss": 0.06221771240234375, "step": 4317 }, { "epoch": 0.601686058663694, "grad_norm": 0.5359088778495789, "learning_rate": 3.895602953552408e-06, "loss": 0.060943603515625, "step": 4318 }, { "epoch": 0.6018254023549083, "grad_norm": 0.8783189654350281, "learning_rate": 3.8933045025404105e-06, "loss": 0.08842658996582031, "step": 4319 }, { "epoch": 0.6019647460461227, "grad_norm": 0.7382382154464722, "learning_rate": 3.891006297410887e-06, "loss": 0.07642936706542969, "step": 4320 }, { "epoch": 0.6021040897373371, "grad_norm": 1.1777639389038086, "learning_rate": 3.888708338674447e-06, "loss": 0.0913534164428711, "step": 4321 }, { "epoch": 0.6022434334285516, "grad_norm": 0.5305924415588379, "learning_rate": 3.8864106268416416e-06, "loss": 0.06847763061523438, "step": 4322 }, { "epoch": 0.602382777119766, "grad_norm": 0.6359217762947083, "learning_rate": 3.884113162422971e-06, "loss": 0.06326866149902344, "step": 4323 }, { "epoch": 0.6025221208109803, "grad_norm": 1.022457242012024, "learning_rate": 3.881815945928879e-06, "loss": 0.08957576751708984, "step": 4324 }, { "epoch": 0.6026614645021947, "grad_norm": 0.6921687126159668, "learning_rate": 3.879518977869755e-06, "loss": 0.09232139587402344, "step": 4325 }, { "epoch": 0.6028008081934091, "grad_norm": 0.8315995931625366, "learning_rate": 3.8772222587559345e-06, "loss": 0.07229423522949219, "step": 4326 }, { "epoch": 0.6029401518846235, "grad_norm": 0.5228809714317322, "learning_rate": 3.874925789097695e-06, "loss": 0.07685089111328125, "step": 4327 }, { "epoch": 0.6030794955758378, "grad_norm": 0.523419201374054, "learning_rate": 3.872629569405257e-06, "loss": 0.061944007873535156, "step": 4328 }, { "epoch": 0.6032188392670522, "grad_norm": 1.077052116394043, "learning_rate": 3.870333600188792e-06, "loss": 0.07235240936279297, "step": 4329 }, { "epoch": 0.6033581829582666, "grad_norm": 0.6459034085273743, "learning_rate": 3.86803788195841e-06, "loss": 0.08607959747314453, "step": 4330 }, { "epoch": 0.603497526649481, "grad_norm": 1.3130711317062378, "learning_rate": 3.865742415224169e-06, "loss": 0.11230850219726562, "step": 4331 }, { "epoch": 0.6036368703406954, "grad_norm": 0.65816730260849, "learning_rate": 3.863447200496065e-06, "loss": 0.08785629272460938, "step": 4332 }, { "epoch": 0.6037762140319097, "grad_norm": 0.6516565680503845, "learning_rate": 3.8611522382840476e-06, "loss": 0.0760507583618164, "step": 4333 }, { "epoch": 0.6039155577231241, "grad_norm": 0.7217513918876648, "learning_rate": 3.858857529098001e-06, "loss": 0.07089996337890625, "step": 4334 }, { "epoch": 0.6040549014143385, "grad_norm": 0.7474568486213684, "learning_rate": 3.8565630734477575e-06, "loss": 0.07926177978515625, "step": 4335 }, { "epoch": 0.6041942451055529, "grad_norm": 0.9775040149688721, "learning_rate": 3.854268871843096e-06, "loss": 0.08973121643066406, "step": 4336 }, { "epoch": 0.6043335887967672, "grad_norm": 0.8507336378097534, "learning_rate": 3.851974924793734e-06, "loss": 0.08262825012207031, "step": 4337 }, { "epoch": 0.6044729324879816, "grad_norm": 1.3194245100021362, "learning_rate": 3.8496812328093335e-06, "loss": 0.09914112091064453, "step": 4338 }, { "epoch": 0.604612276179196, "grad_norm": 1.267433524131775, "learning_rate": 3.8473877963995e-06, "loss": 0.07239389419555664, "step": 4339 }, { "epoch": 0.6047516198704104, "grad_norm": 0.5416774749755859, "learning_rate": 3.845094616073783e-06, "loss": 0.06356048583984375, "step": 4340 }, { "epoch": 0.6048909635616248, "grad_norm": 0.30907222628593445, "learning_rate": 3.8428016923416775e-06, "loss": 0.055014610290527344, "step": 4341 }, { "epoch": 0.6050303072528391, "grad_norm": 0.595300018787384, "learning_rate": 3.840509025712616e-06, "loss": 0.06793594360351562, "step": 4342 }, { "epoch": 0.6051696509440535, "grad_norm": 0.7252150774002075, "learning_rate": 3.838216616695977e-06, "loss": 0.07719039916992188, "step": 4343 }, { "epoch": 0.6053089946352679, "grad_norm": 0.6291027069091797, "learning_rate": 3.835924465801081e-06, "loss": 0.0774078369140625, "step": 4344 }, { "epoch": 0.6054483383264823, "grad_norm": 0.5901118516921997, "learning_rate": 3.833632573537193e-06, "loss": 0.09292793273925781, "step": 4345 }, { "epoch": 0.6055876820176966, "grad_norm": 0.5450825691223145, "learning_rate": 3.831340940413519e-06, "loss": 0.07536697387695312, "step": 4346 }, { "epoch": 0.605727025708911, "grad_norm": 0.8120288848876953, "learning_rate": 3.8290495669392085e-06, "loss": 0.06648063659667969, "step": 4347 }, { "epoch": 0.6058663694001254, "grad_norm": 1.2942126989364624, "learning_rate": 3.826758453623348e-06, "loss": 0.11629581451416016, "step": 4348 }, { "epoch": 0.6060057130913398, "grad_norm": 1.3100533485412598, "learning_rate": 3.8244676009749745e-06, "loss": 0.11692863702774048, "step": 4349 }, { "epoch": 0.6061450567825541, "grad_norm": 0.6337969899177551, "learning_rate": 3.8221770095030625e-06, "loss": 0.05343437194824219, "step": 4350 }, { "epoch": 0.6062844004737685, "grad_norm": 0.853819727897644, "learning_rate": 3.819886679716528e-06, "loss": 0.08650016784667969, "step": 4351 }, { "epoch": 0.6064237441649829, "grad_norm": 0.5388771295547485, "learning_rate": 3.8175966121242314e-06, "loss": 0.07145500183105469, "step": 4352 }, { "epoch": 0.6065630878561973, "grad_norm": 0.8766190409660339, "learning_rate": 3.815306807234974e-06, "loss": 0.103515625, "step": 4353 }, { "epoch": 0.6067024315474117, "grad_norm": 0.750085175037384, "learning_rate": 3.8130172655574963e-06, "loss": 0.08060646057128906, "step": 4354 }, { "epoch": 0.606841775238626, "grad_norm": 0.5588322281837463, "learning_rate": 3.810727987600482e-06, "loss": 0.07439422607421875, "step": 4355 }, { "epoch": 0.6069811189298404, "grad_norm": 0.9335166811943054, "learning_rate": 3.808438973872558e-06, "loss": 0.07511711120605469, "step": 4356 }, { "epoch": 0.6071204626210548, "grad_norm": 1.1095190048217773, "learning_rate": 3.80615022488229e-06, "loss": 0.09304046630859375, "step": 4357 }, { "epoch": 0.6072598063122692, "grad_norm": 0.7841212153434753, "learning_rate": 3.8038617411381876e-06, "loss": 0.09152793884277344, "step": 4358 }, { "epoch": 0.6073991500034835, "grad_norm": 0.6327508687973022, "learning_rate": 3.8015735231486974e-06, "loss": 0.0889749526977539, "step": 4359 }, { "epoch": 0.6075384936946979, "grad_norm": 0.649229884147644, "learning_rate": 3.799285571422208e-06, "loss": 0.06990289688110352, "step": 4360 }, { "epoch": 0.6076778373859123, "grad_norm": 0.7100394368171692, "learning_rate": 3.7969978864670527e-06, "loss": 0.06519126892089844, "step": 4361 }, { "epoch": 0.6078171810771267, "grad_norm": 0.5987770557403564, "learning_rate": 3.794710468791502e-06, "loss": 0.07960033416748047, "step": 4362 }, { "epoch": 0.6079565247683412, "grad_norm": 0.3748518228530884, "learning_rate": 3.7924233189037697e-06, "loss": 0.057244300842285156, "step": 4363 }, { "epoch": 0.6080958684595555, "grad_norm": 0.773492157459259, "learning_rate": 3.7901364373120036e-06, "loss": 0.08240127563476562, "step": 4364 }, { "epoch": 0.6082352121507699, "grad_norm": 0.7370126247406006, "learning_rate": 3.787849824524301e-06, "loss": 0.07392692565917969, "step": 4365 }, { "epoch": 0.6083745558419843, "grad_norm": 0.45733165740966797, "learning_rate": 3.7855634810486936e-06, "loss": 0.06058025360107422, "step": 4366 }, { "epoch": 0.6085138995331987, "grad_norm": 0.5427346229553223, "learning_rate": 3.7832774073931535e-06, "loss": 0.07320022583007812, "step": 4367 }, { "epoch": 0.608653243224413, "grad_norm": 0.5028555393218994, "learning_rate": 3.780991604065598e-06, "loss": 0.061187744140625, "step": 4368 }, { "epoch": 0.6087925869156274, "grad_norm": 0.5126957893371582, "learning_rate": 3.778706071573875e-06, "loss": 0.07297039031982422, "step": 4369 }, { "epoch": 0.6089319306068418, "grad_norm": 0.73592609167099, "learning_rate": 3.776420810425781e-06, "loss": 0.09452199935913086, "step": 4370 }, { "epoch": 0.6090712742980562, "grad_norm": 0.7471990585327148, "learning_rate": 3.774135821129047e-06, "loss": 0.07698726654052734, "step": 4371 }, { "epoch": 0.6092106179892706, "grad_norm": 0.7114197611808777, "learning_rate": 3.771851104191348e-06, "loss": 0.06645011901855469, "step": 4372 }, { "epoch": 0.6093499616804849, "grad_norm": 0.6423380374908447, "learning_rate": 3.7695666601202944e-06, "loss": 0.07459163665771484, "step": 4373 }, { "epoch": 0.6094893053716993, "grad_norm": 0.6441123485565186, "learning_rate": 3.7672824894234388e-06, "loss": 0.0784444808959961, "step": 4374 }, { "epoch": 0.6096286490629137, "grad_norm": 0.5723817348480225, "learning_rate": 3.7649985926082695e-06, "loss": 0.07116055488586426, "step": 4375 }, { "epoch": 0.6097679927541281, "grad_norm": 0.860230028629303, "learning_rate": 3.762714970182216e-06, "loss": 0.09092044830322266, "step": 4376 }, { "epoch": 0.6099073364453425, "grad_norm": 0.6411107778549194, "learning_rate": 3.76043162265265e-06, "loss": 0.063690185546875, "step": 4377 }, { "epoch": 0.6100466801365568, "grad_norm": 0.7385891079902649, "learning_rate": 3.758148550526877e-06, "loss": 0.07537269592285156, "step": 4378 }, { "epoch": 0.6101860238277712, "grad_norm": 0.5268428921699524, "learning_rate": 3.7558657543121456e-06, "loss": 0.059477806091308594, "step": 4379 }, { "epoch": 0.6103253675189856, "grad_norm": 0.8573254942893982, "learning_rate": 3.7535832345156376e-06, "loss": 0.08577919006347656, "step": 4380 }, { "epoch": 0.6104647112102, "grad_norm": 1.3278496265411377, "learning_rate": 3.7513009916444797e-06, "loss": 0.10027885437011719, "step": 4381 }, { "epoch": 0.6106040549014143, "grad_norm": 0.5458519458770752, "learning_rate": 3.7490190262057322e-06, "loss": 0.07077789306640625, "step": 4382 }, { "epoch": 0.6107433985926287, "grad_norm": 0.7097036242485046, "learning_rate": 3.7467373387063973e-06, "loss": 0.06625843048095703, "step": 4383 }, { "epoch": 0.6108827422838431, "grad_norm": 1.097740650177002, "learning_rate": 3.7444559296534144e-06, "loss": 0.0962228775024414, "step": 4384 }, { "epoch": 0.6110220859750575, "grad_norm": 0.8550710678100586, "learning_rate": 3.7421747995536585e-06, "loss": 0.08878564834594727, "step": 4385 }, { "epoch": 0.6111614296662718, "grad_norm": 0.7085119485855103, "learning_rate": 3.739893948913945e-06, "loss": 0.07197284698486328, "step": 4386 }, { "epoch": 0.6113007733574862, "grad_norm": 0.7569177746772766, "learning_rate": 3.7376133782410275e-06, "loss": 0.08911895751953125, "step": 4387 }, { "epoch": 0.6114401170487006, "grad_norm": 0.6299844980239868, "learning_rate": 3.7353330880415963e-06, "loss": 0.07415962219238281, "step": 4388 }, { "epoch": 0.611579460739915, "grad_norm": 1.1857609748840332, "learning_rate": 3.7330530788222807e-06, "loss": 0.08039379119873047, "step": 4389 }, { "epoch": 0.6117188044311294, "grad_norm": 0.4822901785373688, "learning_rate": 3.730773351089647e-06, "loss": 0.0688943862915039, "step": 4390 }, { "epoch": 0.6118581481223437, "grad_norm": 0.7344595193862915, "learning_rate": 3.7284939053501966e-06, "loss": 0.0712127685546875, "step": 4391 }, { "epoch": 0.6119974918135581, "grad_norm": 0.8286502361297607, "learning_rate": 3.7262147421103713e-06, "loss": 0.07763862609863281, "step": 4392 }, { "epoch": 0.6121368355047725, "grad_norm": 0.75954270362854, "learning_rate": 3.723935861876549e-06, "loss": 0.08724498748779297, "step": 4393 }, { "epoch": 0.6122761791959869, "grad_norm": 1.0407577753067017, "learning_rate": 3.7216572651550453e-06, "loss": 0.07533740997314453, "step": 4394 }, { "epoch": 0.6124155228872012, "grad_norm": 0.4924258887767792, "learning_rate": 3.7193789524521146e-06, "loss": 0.05701255798339844, "step": 4395 }, { "epoch": 0.6125548665784156, "grad_norm": 1.083828091621399, "learning_rate": 3.717100924273941e-06, "loss": 0.06853103637695312, "step": 4396 }, { "epoch": 0.61269421026963, "grad_norm": 0.5778712034225464, "learning_rate": 3.714823181126653e-06, "loss": 0.08489418029785156, "step": 4397 }, { "epoch": 0.6128335539608444, "grad_norm": 0.7301135659217834, "learning_rate": 3.7125457235163144e-06, "loss": 0.07057762145996094, "step": 4398 }, { "epoch": 0.6129728976520588, "grad_norm": 0.9094704985618591, "learning_rate": 3.710268551948921e-06, "loss": 0.09301090240478516, "step": 4399 }, { "epoch": 0.6131122413432731, "grad_norm": 0.5936276912689209, "learning_rate": 3.7079916669304127e-06, "loss": 0.07810783386230469, "step": 4400 }, { "epoch": 0.6132515850344875, "grad_norm": 0.9509102702140808, "learning_rate": 3.7057150689666577e-06, "loss": 0.07930469512939453, "step": 4401 }, { "epoch": 0.6133909287257019, "grad_norm": 0.6353742480278015, "learning_rate": 3.7034387585634656e-06, "loss": 0.07029342651367188, "step": 4402 }, { "epoch": 0.6135302724169164, "grad_norm": 0.7581635117530823, "learning_rate": 3.701162736226579e-06, "loss": 0.07931220531463623, "step": 4403 }, { "epoch": 0.6136696161081308, "grad_norm": 0.7161125540733337, "learning_rate": 3.6988870024616807e-06, "loss": 0.07434844970703125, "step": 4404 }, { "epoch": 0.6138089597993451, "grad_norm": 0.963810384273529, "learning_rate": 3.6966115577743865e-06, "loss": 0.09893417358398438, "step": 4405 }, { "epoch": 0.6139483034905595, "grad_norm": 0.5477190613746643, "learning_rate": 3.6943364026702466e-06, "loss": 0.07433295249938965, "step": 4406 }, { "epoch": 0.6140876471817739, "grad_norm": 0.5964879393577576, "learning_rate": 3.6920615376547487e-06, "loss": 0.0795888900756836, "step": 4407 }, { "epoch": 0.6142269908729883, "grad_norm": 1.9054133892059326, "learning_rate": 3.6897869632333157e-06, "loss": 0.11301231384277344, "step": 4408 }, { "epoch": 0.6143663345642026, "grad_norm": 0.7492195963859558, "learning_rate": 3.687512679911307e-06, "loss": 0.07269763946533203, "step": 4409 }, { "epoch": 0.614505678255417, "grad_norm": 0.5016090273857117, "learning_rate": 3.685238688194016e-06, "loss": 0.07129764556884766, "step": 4410 }, { "epoch": 0.6146450219466314, "grad_norm": 0.6092597246170044, "learning_rate": 3.682964988586675e-06, "loss": 0.07843589782714844, "step": 4411 }, { "epoch": 0.6147843656378458, "grad_norm": 0.7711814045906067, "learning_rate": 3.6806915815944422e-06, "loss": 0.09989356994628906, "step": 4412 }, { "epoch": 0.6149237093290602, "grad_norm": 0.3839167356491089, "learning_rate": 3.6784184677224204e-06, "loss": 0.0595775842666626, "step": 4413 }, { "epoch": 0.6150630530202745, "grad_norm": 0.49540242552757263, "learning_rate": 3.676145647475643e-06, "loss": 0.06371593475341797, "step": 4414 }, { "epoch": 0.6152023967114889, "grad_norm": 0.5842924118041992, "learning_rate": 3.673873121359077e-06, "loss": 0.06864738464355469, "step": 4415 }, { "epoch": 0.6153417404027033, "grad_norm": 0.6909725069999695, "learning_rate": 3.6716008898776306e-06, "loss": 0.08333635330200195, "step": 4416 }, { "epoch": 0.6154810840939177, "grad_norm": 0.5923606753349304, "learning_rate": 3.669328953536137e-06, "loss": 0.07332372665405273, "step": 4417 }, { "epoch": 0.615620427785132, "grad_norm": 0.5694776773452759, "learning_rate": 3.6670573128393704e-06, "loss": 0.07829093933105469, "step": 4418 }, { "epoch": 0.6157597714763464, "grad_norm": 0.5876986384391785, "learning_rate": 3.664785968292036e-06, "loss": 0.0753622055053711, "step": 4419 }, { "epoch": 0.6158991151675608, "grad_norm": 0.570625901222229, "learning_rate": 3.662514920398777e-06, "loss": 0.07889175415039062, "step": 4420 }, { "epoch": 0.6160384588587752, "grad_norm": 0.7167767286300659, "learning_rate": 3.6602441696641684e-06, "loss": 0.081390380859375, "step": 4421 }, { "epoch": 0.6161778025499896, "grad_norm": 0.7966193556785583, "learning_rate": 3.6579737165927176e-06, "loss": 0.10007381439208984, "step": 4422 }, { "epoch": 0.6163171462412039, "grad_norm": 0.5578175187110901, "learning_rate": 3.655703561688867e-06, "loss": 0.08725357055664062, "step": 4423 }, { "epoch": 0.6164564899324183, "grad_norm": 0.8497603535652161, "learning_rate": 3.653433705456994e-06, "loss": 0.10169410705566406, "step": 4424 }, { "epoch": 0.6165958336236327, "grad_norm": 0.43952277302742004, "learning_rate": 3.651164148401409e-06, "loss": 0.07293510437011719, "step": 4425 }, { "epoch": 0.6167351773148471, "grad_norm": 0.476101279258728, "learning_rate": 3.648894891026358e-06, "loss": 0.07068347930908203, "step": 4426 }, { "epoch": 0.6168745210060614, "grad_norm": 1.1239060163497925, "learning_rate": 3.646625933836015e-06, "loss": 0.08936452865600586, "step": 4427 }, { "epoch": 0.6170138646972758, "grad_norm": 0.4922407269477844, "learning_rate": 3.64435727733449e-06, "loss": 0.06692004203796387, "step": 4428 }, { "epoch": 0.6171532083884902, "grad_norm": 0.8810414671897888, "learning_rate": 3.6420889220258295e-06, "loss": 0.1155233383178711, "step": 4429 }, { "epoch": 0.6172925520797046, "grad_norm": 0.8849236965179443, "learning_rate": 3.639820868414008e-06, "loss": 0.07428216934204102, "step": 4430 }, { "epoch": 0.617431895770919, "grad_norm": 0.6023670434951782, "learning_rate": 3.6375531170029356e-06, "loss": 0.08826065063476562, "step": 4431 }, { "epoch": 0.6175712394621333, "grad_norm": 0.859091579914093, "learning_rate": 3.6352856682964576e-06, "loss": 0.09094810485839844, "step": 4432 }, { "epoch": 0.6177105831533477, "grad_norm": 0.7980836629867554, "learning_rate": 3.633018522798346e-06, "loss": 0.0778508186340332, "step": 4433 }, { "epoch": 0.6178499268445621, "grad_norm": 0.8347374200820923, "learning_rate": 3.6307516810123095e-06, "loss": 0.07433366775512695, "step": 4434 }, { "epoch": 0.6179892705357765, "grad_norm": 0.5022826194763184, "learning_rate": 3.6284851434419886e-06, "loss": 0.05412769317626953, "step": 4435 }, { "epoch": 0.6181286142269908, "grad_norm": 0.48596882820129395, "learning_rate": 3.6262189105909574e-06, "loss": 0.06184864044189453, "step": 4436 }, { "epoch": 0.6182679579182052, "grad_norm": 1.2146110534667969, "learning_rate": 3.6239529829627214e-06, "loss": 0.11786079406738281, "step": 4437 }, { "epoch": 0.6184073016094196, "grad_norm": 0.9644686579704285, "learning_rate": 3.6216873610607155e-06, "loss": 0.08608055114746094, "step": 4438 }, { "epoch": 0.618546645300634, "grad_norm": 0.5768442153930664, "learning_rate": 3.61942204538831e-06, "loss": 0.08120346069335938, "step": 4439 }, { "epoch": 0.6186859889918483, "grad_norm": 0.9865733981132507, "learning_rate": 3.6171570364488075e-06, "loss": 0.08400344848632812, "step": 4440 }, { "epoch": 0.6188253326830627, "grad_norm": 0.733873724937439, "learning_rate": 3.6148923347454413e-06, "loss": 0.07939577102661133, "step": 4441 }, { "epoch": 0.6189646763742771, "grad_norm": 0.7077635526657104, "learning_rate": 3.6126279407813765e-06, "loss": 0.09007549285888672, "step": 4442 }, { "epoch": 0.6191040200654916, "grad_norm": 0.8223574757575989, "learning_rate": 3.6103638550597074e-06, "loss": 0.08846330642700195, "step": 4443 }, { "epoch": 0.619243363756706, "grad_norm": 0.7038217186927795, "learning_rate": 3.6081000780834635e-06, "loss": 0.08640861511230469, "step": 4444 }, { "epoch": 0.6193827074479203, "grad_norm": 0.9246658682823181, "learning_rate": 3.6058366103556055e-06, "loss": 0.07788467407226562, "step": 4445 }, { "epoch": 0.6195220511391347, "grad_norm": 0.8650348782539368, "learning_rate": 3.6035734523790235e-06, "loss": 0.09811687469482422, "step": 4446 }, { "epoch": 0.6196613948303491, "grad_norm": 0.8142459988594055, "learning_rate": 3.6013106046565383e-06, "loss": 0.06706619262695312, "step": 4447 }, { "epoch": 0.6198007385215635, "grad_norm": 1.2355074882507324, "learning_rate": 3.5990480676909055e-06, "loss": 0.07927322387695312, "step": 4448 }, { "epoch": 0.6199400822127779, "grad_norm": 0.6793022155761719, "learning_rate": 3.5967858419848077e-06, "loss": 0.07587623596191406, "step": 4449 }, { "epoch": 0.6200794259039922, "grad_norm": 0.8173874020576477, "learning_rate": 3.5945239280408596e-06, "loss": 0.06467199325561523, "step": 4450 }, { "epoch": 0.6202187695952066, "grad_norm": 0.8051761984825134, "learning_rate": 3.592262326361606e-06, "loss": 0.07104682922363281, "step": 4451 }, { "epoch": 0.620358113286421, "grad_norm": 0.6002299189567566, "learning_rate": 3.5900010374495252e-06, "loss": 0.0827169418334961, "step": 4452 }, { "epoch": 0.6204974569776354, "grad_norm": 1.2069339752197266, "learning_rate": 3.587740061807024e-06, "loss": 0.08740615844726562, "step": 4453 }, { "epoch": 0.6206368006688497, "grad_norm": 0.7934018969535828, "learning_rate": 3.585479399936438e-06, "loss": 0.05861473083496094, "step": 4454 }, { "epoch": 0.6207761443600641, "grad_norm": 0.5086268782615662, "learning_rate": 3.583219052340034e-06, "loss": 0.05996990203857422, "step": 4455 }, { "epoch": 0.6209154880512785, "grad_norm": 0.8033154010772705, "learning_rate": 3.5809590195200115e-06, "loss": 0.08419227600097656, "step": 4456 }, { "epoch": 0.6210548317424929, "grad_norm": 1.00428307056427, "learning_rate": 3.578699301978499e-06, "loss": 0.08443927764892578, "step": 4457 }, { "epoch": 0.6211941754337073, "grad_norm": 0.9485251307487488, "learning_rate": 3.576439900217552e-06, "loss": 0.08469581604003906, "step": 4458 }, { "epoch": 0.6213335191249216, "grad_norm": 1.250658392906189, "learning_rate": 3.5741808147391587e-06, "loss": 0.10146045684814453, "step": 4459 }, { "epoch": 0.621472862816136, "grad_norm": 1.0106204748153687, "learning_rate": 3.571922046045235e-06, "loss": 0.07752084732055664, "step": 4460 }, { "epoch": 0.6216122065073504, "grad_norm": 0.5389053821563721, "learning_rate": 3.5696635946376305e-06, "loss": 0.0733346939086914, "step": 4461 }, { "epoch": 0.6217515501985648, "grad_norm": 0.8874285817146301, "learning_rate": 3.5674054610181203e-06, "loss": 0.08460140228271484, "step": 4462 }, { "epoch": 0.6218908938897791, "grad_norm": 0.5687423348426819, "learning_rate": 3.5651476456884103e-06, "loss": 0.07873916625976562, "step": 4463 }, { "epoch": 0.6220302375809935, "grad_norm": 0.8134894371032715, "learning_rate": 3.562890149150134e-06, "loss": 0.09578132629394531, "step": 4464 }, { "epoch": 0.6221695812722079, "grad_norm": 0.553753137588501, "learning_rate": 3.560632971904857e-06, "loss": 0.0637965202331543, "step": 4465 }, { "epoch": 0.6223089249634223, "grad_norm": 0.8119927644729614, "learning_rate": 3.558376114454073e-06, "loss": 0.0827474594116211, "step": 4466 }, { "epoch": 0.6224482686546366, "grad_norm": 0.8174152374267578, "learning_rate": 3.556119577299202e-06, "loss": 0.09004497528076172, "step": 4467 }, { "epoch": 0.622587612345851, "grad_norm": 0.6433756947517395, "learning_rate": 3.553863360941598e-06, "loss": 0.0741567611694336, "step": 4468 }, { "epoch": 0.6227269560370654, "grad_norm": 0.5756398439407349, "learning_rate": 3.55160746588254e-06, "loss": 0.07775497436523438, "step": 4469 }, { "epoch": 0.6228662997282798, "grad_norm": 1.1020463705062866, "learning_rate": 3.5493518926232352e-06, "loss": 0.09613227844238281, "step": 4470 }, { "epoch": 0.6230056434194942, "grad_norm": 1.1217286586761475, "learning_rate": 3.547096641664819e-06, "loss": 0.09298133850097656, "step": 4471 }, { "epoch": 0.6231449871107085, "grad_norm": 0.6043747067451477, "learning_rate": 3.5448417135083603e-06, "loss": 0.0864107608795166, "step": 4472 }, { "epoch": 0.6232843308019229, "grad_norm": 0.42442503571510315, "learning_rate": 3.5425871086548513e-06, "loss": 0.06357383728027344, "step": 4473 }, { "epoch": 0.6234236744931373, "grad_norm": 0.8135936260223389, "learning_rate": 3.540332827605214e-06, "loss": 0.06513214111328125, "step": 4474 }, { "epoch": 0.6235630181843517, "grad_norm": 0.6701084971427917, "learning_rate": 3.538078870860297e-06, "loss": 0.08845329284667969, "step": 4475 }, { "epoch": 0.623702361875566, "grad_norm": 0.6995745301246643, "learning_rate": 3.5358252389208777e-06, "loss": 0.08239364624023438, "step": 4476 }, { "epoch": 0.6238417055667804, "grad_norm": 0.5378878116607666, "learning_rate": 3.533571932287663e-06, "loss": 0.09020423889160156, "step": 4477 }, { "epoch": 0.6239810492579948, "grad_norm": 0.4561738967895508, "learning_rate": 3.5313189514612867e-06, "loss": 0.05769634246826172, "step": 4478 }, { "epoch": 0.6241203929492092, "grad_norm": 0.7177755236625671, "learning_rate": 3.5290662969423097e-06, "loss": 0.09323883056640625, "step": 4479 }, { "epoch": 0.6242597366404236, "grad_norm": 0.5869435667991638, "learning_rate": 3.5268139692312163e-06, "loss": 0.08171272277832031, "step": 4480 }, { "epoch": 0.6243990803316379, "grad_norm": 0.6637368202209473, "learning_rate": 3.5245619688284277e-06, "loss": 0.07150411605834961, "step": 4481 }, { "epoch": 0.6245384240228523, "grad_norm": 0.822104811668396, "learning_rate": 3.522310296234285e-06, "loss": 0.0889749526977539, "step": 4482 }, { "epoch": 0.6246777677140668, "grad_norm": 0.8044558167457581, "learning_rate": 3.520058951949056e-06, "loss": 0.08399295806884766, "step": 4483 }, { "epoch": 0.6248171114052812, "grad_norm": 1.1439744234085083, "learning_rate": 3.517807936472942e-06, "loss": 0.10489702224731445, "step": 4484 }, { "epoch": 0.6249564550964956, "grad_norm": 0.9395781755447388, "learning_rate": 3.515557250306067e-06, "loss": 0.10610198974609375, "step": 4485 }, { "epoch": 0.6250957987877099, "grad_norm": 0.6723631620407104, "learning_rate": 3.5133068939484793e-06, "loss": 0.08399009704589844, "step": 4486 }, { "epoch": 0.6252351424789243, "grad_norm": 0.8415093421936035, "learning_rate": 3.511056867900157e-06, "loss": 0.09829902648925781, "step": 4487 }, { "epoch": 0.6253744861701387, "grad_norm": 0.8338758945465088, "learning_rate": 3.508807172661006e-06, "loss": 0.0791635513305664, "step": 4488 }, { "epoch": 0.6255138298613531, "grad_norm": 1.1930330991744995, "learning_rate": 3.506557808730857e-06, "loss": 0.08724308013916016, "step": 4489 }, { "epoch": 0.6256531735525674, "grad_norm": 0.8726964592933655, "learning_rate": 3.504308776609468e-06, "loss": 0.072967529296875, "step": 4490 }, { "epoch": 0.6257925172437818, "grad_norm": 0.9745309352874756, "learning_rate": 3.502060076796521e-06, "loss": 0.0822916030883789, "step": 4491 }, { "epoch": 0.6259318609349962, "grad_norm": 0.750465452671051, "learning_rate": 3.4998117097916247e-06, "loss": 0.08515167236328125, "step": 4492 }, { "epoch": 0.6260712046262106, "grad_norm": 0.8485553860664368, "learning_rate": 3.4975636760943177e-06, "loss": 0.0722208023071289, "step": 4493 }, { "epoch": 0.626210548317425, "grad_norm": 0.770072340965271, "learning_rate": 3.49531597620406e-06, "loss": 0.07541847229003906, "step": 4494 }, { "epoch": 0.6263498920086393, "grad_norm": 0.9408285617828369, "learning_rate": 3.4930686106202428e-06, "loss": 0.08956623077392578, "step": 4495 }, { "epoch": 0.6264892356998537, "grad_norm": 0.4980471432209015, "learning_rate": 3.4908215798421737e-06, "loss": 0.07531452178955078, "step": 4496 }, { "epoch": 0.6266285793910681, "grad_norm": 0.7382539510726929, "learning_rate": 3.488574884369095e-06, "loss": 0.07655572891235352, "step": 4497 }, { "epoch": 0.6267679230822825, "grad_norm": 0.5089014172554016, "learning_rate": 3.486328524700171e-06, "loss": 0.06210756301879883, "step": 4498 }, { "epoch": 0.6269072667734968, "grad_norm": 0.6127238273620605, "learning_rate": 3.4840825013344897e-06, "loss": 0.08837699890136719, "step": 4499 }, { "epoch": 0.6270466104647112, "grad_norm": 0.8206783533096313, "learning_rate": 3.48183681477107e-06, "loss": 0.07710075378417969, "step": 4500 }, { "epoch": 0.6271859541559256, "grad_norm": 0.7969608902931213, "learning_rate": 3.4795914655088486e-06, "loss": 0.07484817504882812, "step": 4501 }, { "epoch": 0.62732529784714, "grad_norm": 0.5776578187942505, "learning_rate": 3.4773464540466917e-06, "loss": 0.0725555419921875, "step": 4502 }, { "epoch": 0.6274646415383544, "grad_norm": 0.8487640619277954, "learning_rate": 3.47510178088339e-06, "loss": 0.06137418746948242, "step": 4503 }, { "epoch": 0.6276039852295687, "grad_norm": 0.6219324469566345, "learning_rate": 3.4728574465176585e-06, "loss": 0.0672616958618164, "step": 4504 }, { "epoch": 0.6277433289207831, "grad_norm": 0.9445246458053589, "learning_rate": 3.4706134514481372e-06, "loss": 0.08728313446044922, "step": 4505 }, { "epoch": 0.6278826726119975, "grad_norm": 0.6544594168663025, "learning_rate": 3.468369796173392e-06, "loss": 0.08057212829589844, "step": 4506 }, { "epoch": 0.6280220163032119, "grad_norm": 0.9069169759750366, "learning_rate": 3.4661264811919093e-06, "loss": 0.08999824523925781, "step": 4507 }, { "epoch": 0.6281613599944262, "grad_norm": 0.567584216594696, "learning_rate": 3.4638835070021027e-06, "loss": 0.06231498718261719, "step": 4508 }, { "epoch": 0.6283007036856406, "grad_norm": 0.483320415019989, "learning_rate": 3.4616408741023113e-06, "loss": 0.06676864624023438, "step": 4509 }, { "epoch": 0.628440047376855, "grad_norm": 0.6472811102867126, "learning_rate": 3.459398582990795e-06, "loss": 0.062408447265625, "step": 4510 }, { "epoch": 0.6285793910680694, "grad_norm": 0.7628535628318787, "learning_rate": 3.4571566341657446e-06, "loss": 0.07865142822265625, "step": 4511 }, { "epoch": 0.6287187347592837, "grad_norm": 0.46493175625801086, "learning_rate": 3.4549150281252635e-06, "loss": 0.05392932891845703, "step": 4512 }, { "epoch": 0.6288580784504981, "grad_norm": 0.4801939129829407, "learning_rate": 3.452673765367389e-06, "loss": 0.06291770935058594, "step": 4513 }, { "epoch": 0.6289974221417125, "grad_norm": 0.7272585034370422, "learning_rate": 3.450432846390078e-06, "loss": 0.07953906059265137, "step": 4514 }, { "epoch": 0.6291367658329269, "grad_norm": 0.8361619710922241, "learning_rate": 3.4481922716912097e-06, "loss": 0.0769205093383789, "step": 4515 }, { "epoch": 0.6292761095241413, "grad_norm": 0.8503139615058899, "learning_rate": 3.445952041768593e-06, "loss": 0.10537052154541016, "step": 4516 }, { "epoch": 0.6294154532153556, "grad_norm": 0.5686396360397339, "learning_rate": 3.443712157119952e-06, "loss": 0.06415367126464844, "step": 4517 }, { "epoch": 0.62955479690657, "grad_norm": 0.46587935090065, "learning_rate": 3.4414726182429388e-06, "loss": 0.07218265533447266, "step": 4518 }, { "epoch": 0.6296941405977844, "grad_norm": 0.8864328265190125, "learning_rate": 3.4392334256351265e-06, "loss": 0.10408878326416016, "step": 4519 }, { "epoch": 0.6298334842889988, "grad_norm": 0.9535520076751709, "learning_rate": 3.436994579794016e-06, "loss": 0.1068267822265625, "step": 4520 }, { "epoch": 0.6299728279802131, "grad_norm": 0.6030274629592896, "learning_rate": 3.4347560812170267e-06, "loss": 0.0748748779296875, "step": 4521 }, { "epoch": 0.6301121716714275, "grad_norm": 0.8374924063682556, "learning_rate": 3.4325179304014997e-06, "loss": 0.0768280029296875, "step": 4522 }, { "epoch": 0.6302515153626419, "grad_norm": 0.9481292366981506, "learning_rate": 3.4302801278447028e-06, "loss": 0.09642982482910156, "step": 4523 }, { "epoch": 0.6303908590538564, "grad_norm": 0.7615745067596436, "learning_rate": 3.428042674043822e-06, "loss": 0.09220218658447266, "step": 4524 }, { "epoch": 0.6305302027450708, "grad_norm": 0.5220261216163635, "learning_rate": 3.425805569495973e-06, "loss": 0.07726764678955078, "step": 4525 }, { "epoch": 0.6306695464362851, "grad_norm": 0.708674430847168, "learning_rate": 3.4235688146981854e-06, "loss": 0.09016704559326172, "step": 4526 }, { "epoch": 0.6308088901274995, "grad_norm": 0.6022070646286011, "learning_rate": 3.42133241014742e-06, "loss": 0.08472537994384766, "step": 4527 }, { "epoch": 0.6309482338187139, "grad_norm": 0.7286180257797241, "learning_rate": 3.4190963563405482e-06, "loss": 0.09459304809570312, "step": 4528 }, { "epoch": 0.6310875775099283, "grad_norm": 0.7365534901618958, "learning_rate": 3.416860653774374e-06, "loss": 0.0802755355834961, "step": 4529 }, { "epoch": 0.6312269212011427, "grad_norm": 1.0185017585754395, "learning_rate": 3.4146253029456195e-06, "loss": 0.09096813201904297, "step": 4530 }, { "epoch": 0.631366264892357, "grad_norm": 0.8367666006088257, "learning_rate": 3.4123903043509267e-06, "loss": 0.08365631103515625, "step": 4531 }, { "epoch": 0.6315056085835714, "grad_norm": 0.8717544674873352, "learning_rate": 3.4101556584868646e-06, "loss": 0.0808248519897461, "step": 4532 }, { "epoch": 0.6316449522747858, "grad_norm": 0.6289443969726562, "learning_rate": 3.407921365849917e-06, "loss": 0.07989978790283203, "step": 4533 }, { "epoch": 0.6317842959660002, "grad_norm": 0.9436551928520203, "learning_rate": 3.4056874269364946e-06, "loss": 0.09184646606445312, "step": 4534 }, { "epoch": 0.6319236396572145, "grad_norm": 1.0217645168304443, "learning_rate": 3.4034538422429263e-06, "loss": 0.09129095077514648, "step": 4535 }, { "epoch": 0.6320629833484289, "grad_norm": 0.7051357626914978, "learning_rate": 3.401220612265465e-06, "loss": 0.08676624298095703, "step": 4536 }, { "epoch": 0.6322023270396433, "grad_norm": 0.5318107604980469, "learning_rate": 3.3989877375002846e-06, "loss": 0.07521247863769531, "step": 4537 }, { "epoch": 0.6323416707308577, "grad_norm": 0.6208105087280273, "learning_rate": 3.3967552184434753e-06, "loss": 0.07008647918701172, "step": 4538 }, { "epoch": 0.632481014422072, "grad_norm": 0.577799916267395, "learning_rate": 3.3945230555910534e-06, "loss": 0.0634760856628418, "step": 4539 }, { "epoch": 0.6326203581132864, "grad_norm": 1.2867236137390137, "learning_rate": 3.3922912494389554e-06, "loss": 0.10439872741699219, "step": 4540 }, { "epoch": 0.6327597018045008, "grad_norm": 0.4622885286808014, "learning_rate": 3.3900598004830377e-06, "loss": 0.06987857818603516, "step": 4541 }, { "epoch": 0.6328990454957152, "grad_norm": 0.7837492823600769, "learning_rate": 3.387828709219075e-06, "loss": 0.09596061706542969, "step": 4542 }, { "epoch": 0.6330383891869296, "grad_norm": 0.8712462782859802, "learning_rate": 3.3855979761427705e-06, "loss": 0.07824993133544922, "step": 4543 }, { "epoch": 0.6331777328781439, "grad_norm": 0.47986480593681335, "learning_rate": 3.3833676017497353e-06, "loss": 0.06654644012451172, "step": 4544 }, { "epoch": 0.6333170765693583, "grad_norm": 0.7280769348144531, "learning_rate": 3.381137586535511e-06, "loss": 0.062188148498535156, "step": 4545 }, { "epoch": 0.6334564202605727, "grad_norm": 0.7459626197814941, "learning_rate": 3.3789079309955556e-06, "loss": 0.08215522766113281, "step": 4546 }, { "epoch": 0.6335957639517871, "grad_norm": 0.6975525617599487, "learning_rate": 3.3766786356252466e-06, "loss": 0.07302379608154297, "step": 4547 }, { "epoch": 0.6337351076430014, "grad_norm": 1.8512611389160156, "learning_rate": 3.374449700919887e-06, "loss": 0.08559989929199219, "step": 4548 }, { "epoch": 0.6338744513342158, "grad_norm": 0.7531207203865051, "learning_rate": 3.37222112737469e-06, "loss": 0.078765869140625, "step": 4549 }, { "epoch": 0.6340137950254302, "grad_norm": 0.8268770575523376, "learning_rate": 3.3699929154847957e-06, "loss": 0.07749080657958984, "step": 4550 }, { "epoch": 0.6341531387166446, "grad_norm": 0.8270609974861145, "learning_rate": 3.367765065745261e-06, "loss": 0.07878303527832031, "step": 4551 }, { "epoch": 0.634292482407859, "grad_norm": 0.5603635907173157, "learning_rate": 3.365537578651065e-06, "loss": 0.06699466705322266, "step": 4552 }, { "epoch": 0.6344318260990733, "grad_norm": 0.7750372290611267, "learning_rate": 3.3633104546971052e-06, "loss": 0.07285642623901367, "step": 4553 }, { "epoch": 0.6345711697902877, "grad_norm": 1.0715336799621582, "learning_rate": 3.3610836943781945e-06, "loss": 0.07714176177978516, "step": 4554 }, { "epoch": 0.6347105134815021, "grad_norm": 0.8378635048866272, "learning_rate": 3.358857298189069e-06, "loss": 0.06884574890136719, "step": 4555 }, { "epoch": 0.6348498571727165, "grad_norm": 1.02788507938385, "learning_rate": 3.356631266624385e-06, "loss": 0.08268260955810547, "step": 4556 }, { "epoch": 0.6349892008639308, "grad_norm": 0.6457829475402832, "learning_rate": 3.3544056001787146e-06, "loss": 0.08192062377929688, "step": 4557 }, { "epoch": 0.6351285445551452, "grad_norm": 1.2942167520523071, "learning_rate": 3.3521802993465513e-06, "loss": 0.08100318908691406, "step": 4558 }, { "epoch": 0.6352678882463596, "grad_norm": 0.58180832862854, "learning_rate": 3.3499553646223037e-06, "loss": 0.07616972923278809, "step": 4559 }, { "epoch": 0.635407231937574, "grad_norm": 0.6218830347061157, "learning_rate": 3.3477307965003026e-06, "loss": 0.08229446411132812, "step": 4560 }, { "epoch": 0.6355465756287884, "grad_norm": 0.7690730690956116, "learning_rate": 3.345506595474798e-06, "loss": 0.09326887130737305, "step": 4561 }, { "epoch": 0.6356859193200027, "grad_norm": 0.6236406564712524, "learning_rate": 3.3432827620399543e-06, "loss": 0.07844829559326172, "step": 4562 }, { "epoch": 0.6358252630112171, "grad_norm": 1.250160813331604, "learning_rate": 3.3410592966898565e-06, "loss": 0.07401180267333984, "step": 4563 }, { "epoch": 0.6359646067024316, "grad_norm": 0.7540806531906128, "learning_rate": 3.3388361999185105e-06, "loss": 0.07323741912841797, "step": 4564 }, { "epoch": 0.636103950393646, "grad_norm": 1.1482592821121216, "learning_rate": 3.3366134722198352e-06, "loss": 0.10951995849609375, "step": 4565 }, { "epoch": 0.6362432940848604, "grad_norm": 1.638981580734253, "learning_rate": 3.3343911140876704e-06, "loss": 0.10675907135009766, "step": 4566 }, { "epoch": 0.6363826377760747, "grad_norm": 0.49330949783325195, "learning_rate": 3.332169126015773e-06, "loss": 0.06955528259277344, "step": 4567 }, { "epoch": 0.6365219814672891, "grad_norm": 1.1243666410446167, "learning_rate": 3.3299475084978195e-06, "loss": 0.0964040756225586, "step": 4568 }, { "epoch": 0.6366613251585035, "grad_norm": 0.673554539680481, "learning_rate": 3.3277262620274025e-06, "loss": 0.08295059204101562, "step": 4569 }, { "epoch": 0.6368006688497179, "grad_norm": 0.7230557799339294, "learning_rate": 3.3255053870980304e-06, "loss": 0.09587860107421875, "step": 4570 }, { "epoch": 0.6369400125409322, "grad_norm": 0.777535617351532, "learning_rate": 3.3232848842031306e-06, "loss": 0.08082771301269531, "step": 4571 }, { "epoch": 0.6370793562321466, "grad_norm": 0.7330438494682312, "learning_rate": 3.3210647538360514e-06, "loss": 0.09157276153564453, "step": 4572 }, { "epoch": 0.637218699923361, "grad_norm": 0.897487223148346, "learning_rate": 3.3188449964900527e-06, "loss": 0.10998129844665527, "step": 4573 }, { "epoch": 0.6373580436145754, "grad_norm": 0.5002469420433044, "learning_rate": 3.316625612658315e-06, "loss": 0.07296562194824219, "step": 4574 }, { "epoch": 0.6374973873057898, "grad_norm": 0.6869798302650452, "learning_rate": 3.314406602833933e-06, "loss": 0.07900619506835938, "step": 4575 }, { "epoch": 0.6376367309970041, "grad_norm": 0.7817549705505371, "learning_rate": 3.3121879675099205e-06, "loss": 0.08552837371826172, "step": 4576 }, { "epoch": 0.6377760746882185, "grad_norm": 0.8232018351554871, "learning_rate": 3.3099697071792093e-06, "loss": 0.08690452575683594, "step": 4577 }, { "epoch": 0.6379154183794329, "grad_norm": 1.0079045295715332, "learning_rate": 3.3077518223346448e-06, "loss": 0.08403968811035156, "step": 4578 }, { "epoch": 0.6380547620706473, "grad_norm": 0.7173486351966858, "learning_rate": 3.30553431346899e-06, "loss": 0.0726938247680664, "step": 4579 }, { "epoch": 0.6381941057618616, "grad_norm": 0.9738990664482117, "learning_rate": 3.3033171810749274e-06, "loss": 0.09298419952392578, "step": 4580 }, { "epoch": 0.638333449453076, "grad_norm": 0.9143243432044983, "learning_rate": 3.3011004256450497e-06, "loss": 0.06858634948730469, "step": 4581 }, { "epoch": 0.6384727931442904, "grad_norm": 0.5901092290878296, "learning_rate": 3.2988840476718713e-06, "loss": 0.07868766784667969, "step": 4582 }, { "epoch": 0.6386121368355048, "grad_norm": 0.7030945420265198, "learning_rate": 3.2966680476478196e-06, "loss": 0.0694742202758789, "step": 4583 }, { "epoch": 0.6387514805267192, "grad_norm": 0.8409934043884277, "learning_rate": 3.294452426065241e-06, "loss": 0.0769505500793457, "step": 4584 }, { "epoch": 0.6388908242179335, "grad_norm": 0.7648327946662903, "learning_rate": 3.2922371834163958e-06, "loss": 0.07050323486328125, "step": 4585 }, { "epoch": 0.6390301679091479, "grad_norm": 0.7341051697731018, "learning_rate": 3.2900223201934584e-06, "loss": 0.08561515808105469, "step": 4586 }, { "epoch": 0.6391695116003623, "grad_norm": 0.5763102173805237, "learning_rate": 3.287807836888521e-06, "loss": 0.08259773254394531, "step": 4587 }, { "epoch": 0.6393088552915767, "grad_norm": 0.8066757917404175, "learning_rate": 3.2855937339935933e-06, "loss": 0.08423328399658203, "step": 4588 }, { "epoch": 0.639448198982791, "grad_norm": 0.6415677070617676, "learning_rate": 3.2833800120005977e-06, "loss": 0.06317615509033203, "step": 4589 }, { "epoch": 0.6395875426740054, "grad_norm": 0.7657480835914612, "learning_rate": 3.2811666714013724e-06, "loss": 0.07350540161132812, "step": 4590 }, { "epoch": 0.6397268863652198, "grad_norm": 0.6232535243034363, "learning_rate": 3.2789537126876714e-06, "loss": 0.06757640838623047, "step": 4591 }, { "epoch": 0.6398662300564342, "grad_norm": 0.7877157926559448, "learning_rate": 3.2767411363511613e-06, "loss": 0.08202457427978516, "step": 4592 }, { "epoch": 0.6400055737476485, "grad_norm": 0.9758917689323425, "learning_rate": 3.2745289428834294e-06, "loss": 0.08697700500488281, "step": 4593 }, { "epoch": 0.6401449174388629, "grad_norm": 0.443736732006073, "learning_rate": 3.272317132775972e-06, "loss": 0.06331539154052734, "step": 4594 }, { "epoch": 0.6402842611300773, "grad_norm": 0.48531651496887207, "learning_rate": 3.270105706520207e-06, "loss": 0.07590961456298828, "step": 4595 }, { "epoch": 0.6404236048212917, "grad_norm": 0.9069935083389282, "learning_rate": 3.267894664607457e-06, "loss": 0.08100032806396484, "step": 4596 }, { "epoch": 0.6405629485125061, "grad_norm": 0.8647286295890808, "learning_rate": 3.265684007528969e-06, "loss": 0.0680398941040039, "step": 4597 }, { "epoch": 0.6407022922037204, "grad_norm": 1.5764710903167725, "learning_rate": 3.2634737357758994e-06, "loss": 0.08220577239990234, "step": 4598 }, { "epoch": 0.6408416358949348, "grad_norm": 0.49421408772468567, "learning_rate": 3.261263849839319e-06, "loss": 0.07334041595458984, "step": 4599 }, { "epoch": 0.6409809795861492, "grad_norm": 0.9871801137924194, "learning_rate": 3.2590543502102163e-06, "loss": 0.0954275131225586, "step": 4600 }, { "epoch": 0.6411203232773636, "grad_norm": 0.682732105255127, "learning_rate": 3.256845237379491e-06, "loss": 0.07327079772949219, "step": 4601 }, { "epoch": 0.641259666968578, "grad_norm": 0.42576777935028076, "learning_rate": 3.254636511837957e-06, "loss": 0.06180095672607422, "step": 4602 }, { "epoch": 0.6413990106597923, "grad_norm": 0.6286789774894714, "learning_rate": 3.252428174076341e-06, "loss": 0.07782649993896484, "step": 4603 }, { "epoch": 0.6415383543510068, "grad_norm": 0.8071266412734985, "learning_rate": 3.2502202245852887e-06, "loss": 0.08709239959716797, "step": 4604 }, { "epoch": 0.6416776980422212, "grad_norm": 0.7721019983291626, "learning_rate": 3.2480126638553533e-06, "loss": 0.07481193542480469, "step": 4605 }, { "epoch": 0.6418170417334356, "grad_norm": 1.1114107370376587, "learning_rate": 3.245805492377007e-06, "loss": 0.07958221435546875, "step": 4606 }, { "epoch": 0.64195638542465, "grad_norm": 0.9886832237243652, "learning_rate": 3.243598710640631e-06, "loss": 0.10088920593261719, "step": 4607 }, { "epoch": 0.6420957291158643, "grad_norm": 0.7818025350570679, "learning_rate": 3.2413923191365203e-06, "loss": 0.08837032318115234, "step": 4608 }, { "epoch": 0.6422350728070787, "grad_norm": 1.101312518119812, "learning_rate": 3.2391863183548877e-06, "loss": 0.09579086303710938, "step": 4609 }, { "epoch": 0.6423744164982931, "grad_norm": 0.7269907593727112, "learning_rate": 3.236980708785854e-06, "loss": 0.08610725402832031, "step": 4610 }, { "epoch": 0.6425137601895075, "grad_norm": 0.7791394591331482, "learning_rate": 3.2347754909194595e-06, "loss": 0.0671834945678711, "step": 4611 }, { "epoch": 0.6426531038807218, "grad_norm": 1.2632777690887451, "learning_rate": 3.232570665245648e-06, "loss": 0.09461355209350586, "step": 4612 }, { "epoch": 0.6427924475719362, "grad_norm": 0.677693784236908, "learning_rate": 3.2303662322542835e-06, "loss": 0.09958791732788086, "step": 4613 }, { "epoch": 0.6429317912631506, "grad_norm": 1.1211371421813965, "learning_rate": 3.2281621924351407e-06, "loss": 0.09424781799316406, "step": 4614 }, { "epoch": 0.643071134954365, "grad_norm": 0.5033317804336548, "learning_rate": 3.2259585462779063e-06, "loss": 0.07461357116699219, "step": 4615 }, { "epoch": 0.6432104786455793, "grad_norm": 1.0346908569335938, "learning_rate": 3.2237552942721832e-06, "loss": 0.08559608459472656, "step": 4616 }, { "epoch": 0.6433498223367937, "grad_norm": 0.7302229404449463, "learning_rate": 3.2215524369074802e-06, "loss": 0.11208057403564453, "step": 4617 }, { "epoch": 0.6434891660280081, "grad_norm": 0.8667827248573303, "learning_rate": 3.219349974673223e-06, "loss": 0.08230209350585938, "step": 4618 }, { "epoch": 0.6436285097192225, "grad_norm": 0.8974190950393677, "learning_rate": 3.2171479080587475e-06, "loss": 0.09215831756591797, "step": 4619 }, { "epoch": 0.6437678534104369, "grad_norm": 0.5892030596733093, "learning_rate": 3.2149462375533046e-06, "loss": 0.07623958587646484, "step": 4620 }, { "epoch": 0.6439071971016512, "grad_norm": 0.5162314176559448, "learning_rate": 3.212744963646054e-06, "loss": 0.07298946380615234, "step": 4621 }, { "epoch": 0.6440465407928656, "grad_norm": 0.8294979929924011, "learning_rate": 3.2105440868260706e-06, "loss": 0.07874917984008789, "step": 4622 }, { "epoch": 0.64418588448408, "grad_norm": 0.6707050204277039, "learning_rate": 3.2083436075823353e-06, "loss": 0.06802129745483398, "step": 4623 }, { "epoch": 0.6443252281752944, "grad_norm": 0.5140377283096313, "learning_rate": 3.2061435264037457e-06, "loss": 0.0631866455078125, "step": 4624 }, { "epoch": 0.6444645718665087, "grad_norm": 0.5361475348472595, "learning_rate": 3.2039438437791105e-06, "loss": 0.07885169982910156, "step": 4625 }, { "epoch": 0.6446039155577231, "grad_norm": 0.8027443289756775, "learning_rate": 3.2017445601971474e-06, "loss": 0.0816946029663086, "step": 4626 }, { "epoch": 0.6447432592489375, "grad_norm": 0.5218444466590881, "learning_rate": 3.199545676146492e-06, "loss": 0.0660867691040039, "step": 4627 }, { "epoch": 0.6448826029401519, "grad_norm": 0.6063755750656128, "learning_rate": 3.197347192115679e-06, "loss": 0.07160377502441406, "step": 4628 }, { "epoch": 0.6450219466313662, "grad_norm": 0.45971760153770447, "learning_rate": 3.1951491085931657e-06, "loss": 0.06571435928344727, "step": 4629 }, { "epoch": 0.6451612903225806, "grad_norm": 0.9962562918663025, "learning_rate": 3.1929514260673145e-06, "loss": 0.08324575424194336, "step": 4630 }, { "epoch": 0.645300634013795, "grad_norm": 0.5423087477684021, "learning_rate": 3.1907541450264003e-06, "loss": 0.06821155548095703, "step": 4631 }, { "epoch": 0.6454399777050094, "grad_norm": 0.8110535144805908, "learning_rate": 3.188557265958612e-06, "loss": 0.08840370178222656, "step": 4632 }, { "epoch": 0.6455793213962238, "grad_norm": 0.5956542491912842, "learning_rate": 3.186360789352041e-06, "loss": 0.06883716583251953, "step": 4633 }, { "epoch": 0.6457186650874381, "grad_norm": 0.687599241733551, "learning_rate": 3.184164715694697e-06, "loss": 0.07088041305541992, "step": 4634 }, { "epoch": 0.6458580087786525, "grad_norm": 0.6637515425682068, "learning_rate": 3.1819690454744956e-06, "loss": 0.07384395599365234, "step": 4635 }, { "epoch": 0.6459973524698669, "grad_norm": 0.8221317529678345, "learning_rate": 3.1797737791792672e-06, "loss": 0.08634376525878906, "step": 4636 }, { "epoch": 0.6461366961610813, "grad_norm": 0.6539313793182373, "learning_rate": 3.1775789172967486e-06, "loss": 0.07150840759277344, "step": 4637 }, { "epoch": 0.6462760398522956, "grad_norm": 0.7865381240844727, "learning_rate": 3.1753844603145894e-06, "loss": 0.08231925964355469, "step": 4638 }, { "epoch": 0.64641538354351, "grad_norm": 0.7801021933555603, "learning_rate": 3.1731904087203442e-06, "loss": 0.09393119812011719, "step": 4639 }, { "epoch": 0.6465547272347244, "grad_norm": 0.46035048365592957, "learning_rate": 3.1709967630014844e-06, "loss": 0.05673503875732422, "step": 4640 }, { "epoch": 0.6466940709259388, "grad_norm": 0.5458152294158936, "learning_rate": 3.168803523645387e-06, "loss": 0.07293701171875, "step": 4641 }, { "epoch": 0.6468334146171532, "grad_norm": 0.8133679032325745, "learning_rate": 3.166610691139338e-06, "loss": 0.08829498291015625, "step": 4642 }, { "epoch": 0.6469727583083675, "grad_norm": 0.680172860622406, "learning_rate": 3.1644182659705403e-06, "loss": 0.06813430786132812, "step": 4643 }, { "epoch": 0.647112101999582, "grad_norm": 0.7413206100463867, "learning_rate": 3.1622262486260936e-06, "loss": 0.09092044830322266, "step": 4644 }, { "epoch": 0.6472514456907964, "grad_norm": 0.6684130430221558, "learning_rate": 3.160034639593018e-06, "loss": 0.07948875427246094, "step": 4645 }, { "epoch": 0.6473907893820108, "grad_norm": 0.696881115436554, "learning_rate": 3.1578434393582392e-06, "loss": 0.07566976547241211, "step": 4646 }, { "epoch": 0.6475301330732252, "grad_norm": 0.8456196784973145, "learning_rate": 3.155652648408589e-06, "loss": 0.08736515045166016, "step": 4647 }, { "epoch": 0.6476694767644395, "grad_norm": 0.8708062767982483, "learning_rate": 3.1534622672308165e-06, "loss": 0.08403205871582031, "step": 4648 }, { "epoch": 0.6478088204556539, "grad_norm": 0.5972769260406494, "learning_rate": 3.1512722963115693e-06, "loss": 0.05935859680175781, "step": 4649 }, { "epoch": 0.6479481641468683, "grad_norm": 1.074509620666504, "learning_rate": 3.1490827361374105e-06, "loss": 0.08560371398925781, "step": 4650 }, { "epoch": 0.6480875078380827, "grad_norm": 0.8542039394378662, "learning_rate": 3.1468935871948096e-06, "loss": 0.10082435607910156, "step": 4651 }, { "epoch": 0.648226851529297, "grad_norm": 0.7467958331108093, "learning_rate": 3.1447048499701478e-06, "loss": 0.07959556579589844, "step": 4652 }, { "epoch": 0.6483661952205114, "grad_norm": 0.9858888387680054, "learning_rate": 3.1425165249497118e-06, "loss": 0.07726287841796875, "step": 4653 }, { "epoch": 0.6485055389117258, "grad_norm": 0.7181345224380493, "learning_rate": 3.1403286126196963e-06, "loss": 0.08127593994140625, "step": 4654 }, { "epoch": 0.6486448826029402, "grad_norm": 0.7937012910842896, "learning_rate": 3.138141113466205e-06, "loss": 0.08194351196289062, "step": 4655 }, { "epoch": 0.6487842262941546, "grad_norm": 0.5784011483192444, "learning_rate": 3.135954027975252e-06, "loss": 0.07581758499145508, "step": 4656 }, { "epoch": 0.6489235699853689, "grad_norm": 1.2351291179656982, "learning_rate": 3.1337673566327575e-06, "loss": 0.07556915283203125, "step": 4657 }, { "epoch": 0.6490629136765833, "grad_norm": 0.5352851152420044, "learning_rate": 3.1315810999245483e-06, "loss": 0.06836795806884766, "step": 4658 }, { "epoch": 0.6492022573677977, "grad_norm": 0.6333573460578918, "learning_rate": 3.1293952583363653e-06, "loss": 0.07991313934326172, "step": 4659 }, { "epoch": 0.6493416010590121, "grad_norm": 0.7636816501617432, "learning_rate": 3.127209832353846e-06, "loss": 0.062478065490722656, "step": 4660 }, { "epoch": 0.6494809447502264, "grad_norm": 0.851180911064148, "learning_rate": 3.1250248224625463e-06, "loss": 0.09170675277709961, "step": 4661 }, { "epoch": 0.6496202884414408, "grad_norm": 1.3333669900894165, "learning_rate": 3.1228402291479243e-06, "loss": 0.09803390502929688, "step": 4662 }, { "epoch": 0.6497596321326552, "grad_norm": 0.8676568269729614, "learning_rate": 3.1206560528953467e-06, "loss": 0.08541631698608398, "step": 4663 }, { "epoch": 0.6498989758238696, "grad_norm": 1.0467751026153564, "learning_rate": 3.1184722941900902e-06, "loss": 0.1039581298828125, "step": 4664 }, { "epoch": 0.650038319515084, "grad_norm": 0.532762885093689, "learning_rate": 3.1162889535173323e-06, "loss": 0.06759238243103027, "step": 4665 }, { "epoch": 0.6501776632062983, "grad_norm": 0.5439582467079163, "learning_rate": 3.1141060313621637e-06, "loss": 0.06571388244628906, "step": 4666 }, { "epoch": 0.6503170068975127, "grad_norm": 0.6444013118743896, "learning_rate": 3.111923528209577e-06, "loss": 0.07754135131835938, "step": 4667 }, { "epoch": 0.6504563505887271, "grad_norm": 1.3153947591781616, "learning_rate": 3.1097414445444796e-06, "loss": 0.0785975456237793, "step": 4668 }, { "epoch": 0.6505956942799415, "grad_norm": 0.5701818466186523, "learning_rate": 3.1075597808516776e-06, "loss": 0.07465648651123047, "step": 4669 }, { "epoch": 0.6507350379711558, "grad_norm": 1.2274922132492065, "learning_rate": 3.1053785376158865e-06, "loss": 0.08759641647338867, "step": 4670 }, { "epoch": 0.6508743816623702, "grad_norm": 1.0558767318725586, "learning_rate": 3.1031977153217286e-06, "loss": 0.086151123046875, "step": 4671 }, { "epoch": 0.6510137253535846, "grad_norm": 0.9892904162406921, "learning_rate": 3.1010173144537348e-06, "loss": 0.09698104858398438, "step": 4672 }, { "epoch": 0.651153069044799, "grad_norm": 0.6348336338996887, "learning_rate": 3.0988373354963387e-06, "loss": 0.08411741256713867, "step": 4673 }, { "epoch": 0.6512924127360133, "grad_norm": 0.6622371077537537, "learning_rate": 3.0966577789338812e-06, "loss": 0.07867050170898438, "step": 4674 }, { "epoch": 0.6514317564272277, "grad_norm": 0.5658945441246033, "learning_rate": 3.0944786452506147e-06, "loss": 0.06948089599609375, "step": 4675 }, { "epoch": 0.6515711001184421, "grad_norm": 0.6970974206924438, "learning_rate": 3.092299934930686e-06, "loss": 0.09372568130493164, "step": 4676 }, { "epoch": 0.6517104438096565, "grad_norm": 0.7939576506614685, "learning_rate": 3.0901216484581597e-06, "loss": 0.08093929290771484, "step": 4677 }, { "epoch": 0.6518497875008709, "grad_norm": 1.00933837890625, "learning_rate": 3.087943786316999e-06, "loss": 0.10720062255859375, "step": 4678 }, { "epoch": 0.6519891311920852, "grad_norm": 0.5184310674667358, "learning_rate": 3.085766348991076e-06, "loss": 0.07486915588378906, "step": 4679 }, { "epoch": 0.6521284748832996, "grad_norm": 0.6376537084579468, "learning_rate": 3.0835893369641694e-06, "loss": 0.08181095123291016, "step": 4680 }, { "epoch": 0.652267818574514, "grad_norm": 1.0717710256576538, "learning_rate": 3.0814127507199587e-06, "loss": 0.0996246337890625, "step": 4681 }, { "epoch": 0.6524071622657284, "grad_norm": 0.742996096611023, "learning_rate": 3.0792365907420323e-06, "loss": 0.07569408416748047, "step": 4682 }, { "epoch": 0.6525465059569427, "grad_norm": 0.9832045435905457, "learning_rate": 3.0770608575138825e-06, "loss": 0.0765380859375, "step": 4683 }, { "epoch": 0.6526858496481572, "grad_norm": 0.6279642581939697, "learning_rate": 3.0748855515189104e-06, "loss": 0.07491111755371094, "step": 4684 }, { "epoch": 0.6528251933393716, "grad_norm": 0.526736855506897, "learning_rate": 3.0727106732404183e-06, "loss": 0.0629129409790039, "step": 4685 }, { "epoch": 0.652964537030586, "grad_norm": 0.46050214767456055, "learning_rate": 3.0705362231616133e-06, "loss": 0.06870317459106445, "step": 4686 }, { "epoch": 0.6531038807218004, "grad_norm": 0.5908984541893005, "learning_rate": 3.0683622017656074e-06, "loss": 0.07019233703613281, "step": 4687 }, { "epoch": 0.6532432244130147, "grad_norm": 0.5043548941612244, "learning_rate": 3.066188609535421e-06, "loss": 0.06201171875, "step": 4688 }, { "epoch": 0.6533825681042291, "grad_norm": 0.48129546642303467, "learning_rate": 3.064015446953977e-06, "loss": 0.06603765487670898, "step": 4689 }, { "epoch": 0.6535219117954435, "grad_norm": 0.6587588787078857, "learning_rate": 3.0618427145041017e-06, "loss": 0.07069587707519531, "step": 4690 }, { "epoch": 0.6536612554866579, "grad_norm": 1.3413358926773071, "learning_rate": 3.059670412668525e-06, "loss": 0.10685539245605469, "step": 4691 }, { "epoch": 0.6538005991778723, "grad_norm": 0.4428390562534332, "learning_rate": 3.0574985419298843e-06, "loss": 0.061910152435302734, "step": 4692 }, { "epoch": 0.6539399428690866, "grad_norm": 1.0616799592971802, "learning_rate": 3.055327102770719e-06, "loss": 0.09261465072631836, "step": 4693 }, { "epoch": 0.654079286560301, "grad_norm": 0.7906513810157776, "learning_rate": 3.053156095673474e-06, "loss": 0.0892486572265625, "step": 4694 }, { "epoch": 0.6542186302515154, "grad_norm": 0.7658703327178955, "learning_rate": 3.0509855211204976e-06, "loss": 0.07686996459960938, "step": 4695 }, { "epoch": 0.6543579739427298, "grad_norm": 0.5407745242118835, "learning_rate": 3.048815379594043e-06, "loss": 0.06073284149169922, "step": 4696 }, { "epoch": 0.6544973176339441, "grad_norm": 0.6346362233161926, "learning_rate": 3.046645671576264e-06, "loss": 0.0758047103881836, "step": 4697 }, { "epoch": 0.6546366613251585, "grad_norm": 0.772803544998169, "learning_rate": 3.044476397549221e-06, "loss": 0.07071399688720703, "step": 4698 }, { "epoch": 0.6547760050163729, "grad_norm": 0.5477752089500427, "learning_rate": 3.0423075579948756e-06, "loss": 0.07751226425170898, "step": 4699 }, { "epoch": 0.6549153487075873, "grad_norm": 0.5497114658355713, "learning_rate": 3.0401391533950976e-06, "loss": 0.07062244415283203, "step": 4700 }, { "epoch": 0.6550546923988017, "grad_norm": 0.8265047669410706, "learning_rate": 3.037971184231655e-06, "loss": 0.0888051986694336, "step": 4701 }, { "epoch": 0.655194036090016, "grad_norm": 0.487436443567276, "learning_rate": 3.035803650986222e-06, "loss": 0.06000709533691406, "step": 4702 }, { "epoch": 0.6553333797812304, "grad_norm": 0.6181655526161194, "learning_rate": 3.0336365541403723e-06, "loss": 0.07985877990722656, "step": 4703 }, { "epoch": 0.6554727234724448, "grad_norm": 0.9031322002410889, "learning_rate": 3.0314698941755886e-06, "loss": 0.08628368377685547, "step": 4704 }, { "epoch": 0.6556120671636592, "grad_norm": 0.2957870662212372, "learning_rate": 3.0293036715732527e-06, "loss": 0.046335697174072266, "step": 4705 }, { "epoch": 0.6557514108548735, "grad_norm": 0.8476061224937439, "learning_rate": 3.0271378868146494e-06, "loss": 0.07502079010009766, "step": 4706 }, { "epoch": 0.6558907545460879, "grad_norm": 0.5899156332015991, "learning_rate": 3.024972540380966e-06, "loss": 0.09200477600097656, "step": 4707 }, { "epoch": 0.6560300982373023, "grad_norm": 0.9557761549949646, "learning_rate": 3.0228076327532925e-06, "loss": 0.07864952087402344, "step": 4708 }, { "epoch": 0.6561694419285167, "grad_norm": 1.0402708053588867, "learning_rate": 3.0206431644126234e-06, "loss": 0.09386730194091797, "step": 4709 }, { "epoch": 0.656308785619731, "grad_norm": 0.7721312046051025, "learning_rate": 3.0184791358398537e-06, "loss": 0.08732414245605469, "step": 4710 }, { "epoch": 0.6564481293109454, "grad_norm": 0.4496697783470154, "learning_rate": 3.016315547515783e-06, "loss": 0.0626535415649414, "step": 4711 }, { "epoch": 0.6565874730021598, "grad_norm": 0.8573461771011353, "learning_rate": 3.0141523999211065e-06, "loss": 0.09605979919433594, "step": 4712 }, { "epoch": 0.6567268166933742, "grad_norm": 0.6939491629600525, "learning_rate": 3.0119896935364305e-06, "loss": 0.07573890686035156, "step": 4713 }, { "epoch": 0.6568661603845886, "grad_norm": 0.7255886793136597, "learning_rate": 3.009827428842258e-06, "loss": 0.07724761962890625, "step": 4714 }, { "epoch": 0.6570055040758029, "grad_norm": 0.671631395816803, "learning_rate": 3.0076656063189926e-06, "loss": 0.08281517028808594, "step": 4715 }, { "epoch": 0.6571448477670173, "grad_norm": 0.7216387391090393, "learning_rate": 3.0055042264469447e-06, "loss": 0.09484577178955078, "step": 4716 }, { "epoch": 0.6572841914582317, "grad_norm": 0.7578354477882385, "learning_rate": 3.003343289706324e-06, "loss": 0.08173179626464844, "step": 4717 }, { "epoch": 0.6574235351494461, "grad_norm": 0.656990110874176, "learning_rate": 3.001182796577239e-06, "loss": 0.07577037811279297, "step": 4718 }, { "epoch": 0.6575628788406604, "grad_norm": 0.66335129737854, "learning_rate": 2.999022747539701e-06, "loss": 0.07244873046875, "step": 4719 }, { "epoch": 0.6577022225318748, "grad_norm": 0.5381746292114258, "learning_rate": 2.9968631430736274e-06, "loss": 0.05901837348937988, "step": 4720 }, { "epoch": 0.6578415662230892, "grad_norm": 0.758544385433197, "learning_rate": 2.99470398365883e-06, "loss": 0.06806468963623047, "step": 4721 }, { "epoch": 0.6579809099143036, "grad_norm": 0.4805281162261963, "learning_rate": 2.9925452697750275e-06, "loss": 0.06444835662841797, "step": 4722 }, { "epoch": 0.658120253605518, "grad_norm": 1.0113978385925293, "learning_rate": 2.990387001901834e-06, "loss": 0.10520267486572266, "step": 4723 }, { "epoch": 0.6582595972967323, "grad_norm": 0.6718996167182922, "learning_rate": 2.988229180518767e-06, "loss": 0.06726646423339844, "step": 4724 }, { "epoch": 0.6583989409879468, "grad_norm": 0.8499437570571899, "learning_rate": 2.9860718061052478e-06, "loss": 0.09369659423828125, "step": 4725 }, { "epoch": 0.6585382846791612, "grad_norm": 0.5140824317932129, "learning_rate": 2.9839148791405937e-06, "loss": 0.07381629943847656, "step": 4726 }, { "epoch": 0.6586776283703756, "grad_norm": 0.7570949792861938, "learning_rate": 2.981758400104028e-06, "loss": 0.0732421875, "step": 4727 }, { "epoch": 0.65881697206159, "grad_norm": 1.177418828010559, "learning_rate": 2.979602369474667e-06, "loss": 0.0896453857421875, "step": 4728 }, { "epoch": 0.6589563157528043, "grad_norm": 1.137342929840088, "learning_rate": 2.977446787731532e-06, "loss": 0.11609077453613281, "step": 4729 }, { "epoch": 0.6590956594440187, "grad_norm": 0.8058783411979675, "learning_rate": 2.975291655353546e-06, "loss": 0.0655517578125, "step": 4730 }, { "epoch": 0.6592350031352331, "grad_norm": 0.8761428594589233, "learning_rate": 2.9731369728195288e-06, "loss": 0.10872793197631836, "step": 4731 }, { "epoch": 0.6593743468264475, "grad_norm": 1.0170077085494995, "learning_rate": 2.9709827406082028e-06, "loss": 0.11317253112792969, "step": 4732 }, { "epoch": 0.6595136905176618, "grad_norm": 0.4755713939666748, "learning_rate": 2.9688289591981887e-06, "loss": 0.07014656066894531, "step": 4733 }, { "epoch": 0.6596530342088762, "grad_norm": 0.7314586639404297, "learning_rate": 2.9666756290680078e-06, "loss": 0.07852411270141602, "step": 4734 }, { "epoch": 0.6597923779000906, "grad_norm": 0.8823850154876709, "learning_rate": 2.964522750696079e-06, "loss": 0.1014261245727539, "step": 4735 }, { "epoch": 0.659931721591305, "grad_norm": 0.5471483469009399, "learning_rate": 2.962370324560725e-06, "loss": 0.060378074645996094, "step": 4736 }, { "epoch": 0.6600710652825194, "grad_norm": 0.6501719951629639, "learning_rate": 2.9602183511401656e-06, "loss": 0.07288265228271484, "step": 4737 }, { "epoch": 0.6602104089737337, "grad_norm": 0.6279168128967285, "learning_rate": 2.9580668309125203e-06, "loss": 0.09035110473632812, "step": 4738 }, { "epoch": 0.6603497526649481, "grad_norm": 0.7285110950469971, "learning_rate": 2.9559157643558046e-06, "loss": 0.0765385627746582, "step": 4739 }, { "epoch": 0.6604890963561625, "grad_norm": 0.7390869855880737, "learning_rate": 2.9537651519479403e-06, "loss": 0.06833267211914062, "step": 4740 }, { "epoch": 0.6606284400473769, "grad_norm": 0.9634250402450562, "learning_rate": 2.951614994166743e-06, "loss": 0.07674884796142578, "step": 4741 }, { "epoch": 0.6607677837385912, "grad_norm": 0.684539794921875, "learning_rate": 2.9494652914899267e-06, "loss": 0.0781550407409668, "step": 4742 }, { "epoch": 0.6609071274298056, "grad_norm": 0.790821373462677, "learning_rate": 2.947316044395112e-06, "loss": 0.08105182647705078, "step": 4743 }, { "epoch": 0.66104647112102, "grad_norm": 0.7193349003791809, "learning_rate": 2.945167253359806e-06, "loss": 0.0767364501953125, "step": 4744 }, { "epoch": 0.6611858148122344, "grad_norm": 1.0387845039367676, "learning_rate": 2.943018918861424e-06, "loss": 0.08544588088989258, "step": 4745 }, { "epoch": 0.6613251585034488, "grad_norm": 0.5270442962646484, "learning_rate": 2.940871041377277e-06, "loss": 0.061547279357910156, "step": 4746 }, { "epoch": 0.6614645021946631, "grad_norm": 0.5137205123901367, "learning_rate": 2.938723621384572e-06, "loss": 0.060150146484375, "step": 4747 }, { "epoch": 0.6616038458858775, "grad_norm": 0.6890869736671448, "learning_rate": 2.936576659360421e-06, "loss": 0.08508586883544922, "step": 4748 }, { "epoch": 0.6617431895770919, "grad_norm": 1.0088008642196655, "learning_rate": 2.9344301557818267e-06, "loss": 0.07724761962890625, "step": 4749 }, { "epoch": 0.6618825332683063, "grad_norm": 0.6488932371139526, "learning_rate": 2.9322841111256937e-06, "loss": 0.07832717895507812, "step": 4750 }, { "epoch": 0.6620218769595206, "grad_norm": 0.8076605796813965, "learning_rate": 2.930138525868824e-06, "loss": 0.08252906799316406, "step": 4751 }, { "epoch": 0.662161220650735, "grad_norm": 0.6409377455711365, "learning_rate": 2.927993400487919e-06, "loss": 0.07316780090332031, "step": 4752 }, { "epoch": 0.6623005643419494, "grad_norm": 0.9131014347076416, "learning_rate": 2.9258487354595754e-06, "loss": 0.0696096420288086, "step": 4753 }, { "epoch": 0.6624399080331638, "grad_norm": 0.7867572903633118, "learning_rate": 2.9237045312602908e-06, "loss": 0.08231115341186523, "step": 4754 }, { "epoch": 0.6625792517243781, "grad_norm": 0.6701031923294067, "learning_rate": 2.921560788366454e-06, "loss": 0.0677194595336914, "step": 4755 }, { "epoch": 0.6627185954155925, "grad_norm": 0.5571666955947876, "learning_rate": 2.9194175072543594e-06, "loss": 0.07436370849609375, "step": 4756 }, { "epoch": 0.6628579391068069, "grad_norm": 0.9367579221725464, "learning_rate": 2.9172746884001944e-06, "loss": 0.0850515365600586, "step": 4757 }, { "epoch": 0.6629972827980213, "grad_norm": 0.7226811051368713, "learning_rate": 2.9151323322800433e-06, "loss": 0.09099960327148438, "step": 4758 }, { "epoch": 0.6631366264892357, "grad_norm": 0.6841311454772949, "learning_rate": 2.9129904393698917e-06, "loss": 0.08335018157958984, "step": 4759 }, { "epoch": 0.66327597018045, "grad_norm": 0.9409292340278625, "learning_rate": 2.910849010145617e-06, "loss": 0.08399677276611328, "step": 4760 }, { "epoch": 0.6634153138716644, "grad_norm": 0.746535062789917, "learning_rate": 2.908708045082994e-06, "loss": 0.07628726959228516, "step": 4761 }, { "epoch": 0.6635546575628788, "grad_norm": 1.1043386459350586, "learning_rate": 2.906567544657699e-06, "loss": 0.10739707946777344, "step": 4762 }, { "epoch": 0.6636940012540932, "grad_norm": 1.1647064685821533, "learning_rate": 2.9044275093453034e-06, "loss": 0.07582855224609375, "step": 4763 }, { "epoch": 0.6638333449453075, "grad_norm": 1.0819408893585205, "learning_rate": 2.902287939621272e-06, "loss": 0.09046554565429688, "step": 4764 }, { "epoch": 0.663972688636522, "grad_norm": 0.5807091593742371, "learning_rate": 2.9001488359609676e-06, "loss": 0.058084189891815186, "step": 4765 }, { "epoch": 0.6641120323277364, "grad_norm": 0.9954507350921631, "learning_rate": 2.898010198839651e-06, "loss": 0.10534286499023438, "step": 4766 }, { "epoch": 0.6642513760189508, "grad_norm": 0.7988860607147217, "learning_rate": 2.895872028732481e-06, "loss": 0.07835245132446289, "step": 4767 }, { "epoch": 0.6643907197101652, "grad_norm": 0.9771533012390137, "learning_rate": 2.893734326114506e-06, "loss": 0.08289337158203125, "step": 4768 }, { "epoch": 0.6645300634013795, "grad_norm": 0.5363110303878784, "learning_rate": 2.8915970914606793e-06, "loss": 0.06630325317382812, "step": 4769 }, { "epoch": 0.6646694070925939, "grad_norm": 0.4402201175689697, "learning_rate": 2.8894603252458407e-06, "loss": 0.05459022521972656, "step": 4770 }, { "epoch": 0.6648087507838083, "grad_norm": 1.1649707555770874, "learning_rate": 2.8873240279447355e-06, "loss": 0.09512639045715332, "step": 4771 }, { "epoch": 0.6649480944750227, "grad_norm": 1.0484884977340698, "learning_rate": 2.8851882000319966e-06, "loss": 0.07346725463867188, "step": 4772 }, { "epoch": 0.665087438166237, "grad_norm": 0.8092277646064758, "learning_rate": 2.883052841982157e-06, "loss": 0.06753158569335938, "step": 4773 }, { "epoch": 0.6652267818574514, "grad_norm": 0.5097466707229614, "learning_rate": 2.8809179542696474e-06, "loss": 0.06576347351074219, "step": 4774 }, { "epoch": 0.6653661255486658, "grad_norm": 0.6322968602180481, "learning_rate": 2.878783537368789e-06, "loss": 0.05250263214111328, "step": 4775 }, { "epoch": 0.6655054692398802, "grad_norm": 0.7475113272666931, "learning_rate": 2.8766495917537985e-06, "loss": 0.07604217529296875, "step": 4776 }, { "epoch": 0.6656448129310946, "grad_norm": 0.5798941254615784, "learning_rate": 2.874516117898792e-06, "loss": 0.06212186813354492, "step": 4777 }, { "epoch": 0.6657841566223089, "grad_norm": 1.1810187101364136, "learning_rate": 2.8723831162777806e-06, "loss": 0.07565879821777344, "step": 4778 }, { "epoch": 0.6659235003135233, "grad_norm": 0.7023297548294067, "learning_rate": 2.8702505873646636e-06, "loss": 0.07116508483886719, "step": 4779 }, { "epoch": 0.6660628440047377, "grad_norm": 1.1288665533065796, "learning_rate": 2.8681185316332453e-06, "loss": 0.08238983154296875, "step": 4780 }, { "epoch": 0.6662021876959521, "grad_norm": 0.6527204513549805, "learning_rate": 2.865986949557218e-06, "loss": 0.059815406799316406, "step": 4781 }, { "epoch": 0.6663415313871665, "grad_norm": 0.7474888563156128, "learning_rate": 2.8638558416101683e-06, "loss": 0.09077930450439453, "step": 4782 }, { "epoch": 0.6664808750783808, "grad_norm": 0.8905330300331116, "learning_rate": 2.8617252082655813e-06, "loss": 0.09311294555664062, "step": 4783 }, { "epoch": 0.6666202187695952, "grad_norm": 0.8355523943901062, "learning_rate": 2.8595950499968352e-06, "loss": 0.07581329345703125, "step": 4784 }, { "epoch": 0.6667595624608096, "grad_norm": 0.6291463375091553, "learning_rate": 2.8574653672772068e-06, "loss": 0.0751180648803711, "step": 4785 }, { "epoch": 0.666898906152024, "grad_norm": 1.125510573387146, "learning_rate": 2.8553361605798545e-06, "loss": 0.0981450080871582, "step": 4786 }, { "epoch": 0.6670382498432383, "grad_norm": 0.6640368103981018, "learning_rate": 2.8532074303778446e-06, "loss": 0.07526922225952148, "step": 4787 }, { "epoch": 0.6671775935344527, "grad_norm": 1.1888357400894165, "learning_rate": 2.8510791771441327e-06, "loss": 0.09303855895996094, "step": 4788 }, { "epoch": 0.6673169372256671, "grad_norm": 0.620002269744873, "learning_rate": 2.8489514013515656e-06, "loss": 0.05739593505859375, "step": 4789 }, { "epoch": 0.6674562809168815, "grad_norm": 0.8591028451919556, "learning_rate": 2.8468241034728878e-06, "loss": 0.07378292083740234, "step": 4790 }, { "epoch": 0.6675956246080959, "grad_norm": 0.6311497688293457, "learning_rate": 2.8446972839807384e-06, "loss": 0.06592178344726562, "step": 4791 }, { "epoch": 0.6677349682993102, "grad_norm": 0.9189333915710449, "learning_rate": 2.8425709433476455e-06, "loss": 0.08618545532226562, "step": 4792 }, { "epoch": 0.6678743119905246, "grad_norm": 0.7135248780250549, "learning_rate": 2.8404450820460326e-06, "loss": 0.08885574340820312, "step": 4793 }, { "epoch": 0.668013655681739, "grad_norm": 0.6133647561073303, "learning_rate": 2.8383197005482187e-06, "loss": 0.06476306915283203, "step": 4794 }, { "epoch": 0.6681529993729534, "grad_norm": 0.6278826594352722, "learning_rate": 2.8361947993264185e-06, "loss": 0.07917308807373047, "step": 4795 }, { "epoch": 0.6682923430641677, "grad_norm": 0.65871661901474, "learning_rate": 2.834070378852732e-06, "loss": 0.08893013000488281, "step": 4796 }, { "epoch": 0.6684316867553821, "grad_norm": 0.6556334495544434, "learning_rate": 2.8319464395991567e-06, "loss": 0.09268569946289062, "step": 4797 }, { "epoch": 0.6685710304465965, "grad_norm": 0.6022775769233704, "learning_rate": 2.829822982037585e-06, "loss": 0.06997323036193848, "step": 4798 }, { "epoch": 0.6687103741378109, "grad_norm": 0.9773203134536743, "learning_rate": 2.8277000066398032e-06, "loss": 0.09850692749023438, "step": 4799 }, { "epoch": 0.6688497178290252, "grad_norm": 0.9707552790641785, "learning_rate": 2.8255775138774827e-06, "loss": 0.08495235443115234, "step": 4800 }, { "epoch": 0.6689890615202396, "grad_norm": 1.0020146369934082, "learning_rate": 2.823455504222198e-06, "loss": 0.09802818298339844, "step": 4801 }, { "epoch": 0.669128405211454, "grad_norm": 0.5384095311164856, "learning_rate": 2.821333978145407e-06, "loss": 0.07407093048095703, "step": 4802 }, { "epoch": 0.6692677489026684, "grad_norm": 0.6178154349327087, "learning_rate": 2.8192129361184685e-06, "loss": 0.06932210922241211, "step": 4803 }, { "epoch": 0.6694070925938828, "grad_norm": 0.5279080867767334, "learning_rate": 2.817092378612625e-06, "loss": 0.0710287094116211, "step": 4804 }, { "epoch": 0.6695464362850972, "grad_norm": 0.5189700126647949, "learning_rate": 2.814972306099018e-06, "loss": 0.07199668884277344, "step": 4805 }, { "epoch": 0.6696857799763116, "grad_norm": 0.7895768880844116, "learning_rate": 2.8128527190486823e-06, "loss": 0.09394073486328125, "step": 4806 }, { "epoch": 0.669825123667526, "grad_norm": 0.8558720946311951, "learning_rate": 2.8107336179325383e-06, "loss": 0.08262348175048828, "step": 4807 }, { "epoch": 0.6699644673587404, "grad_norm": 0.9447706937789917, "learning_rate": 2.808615003221401e-06, "loss": 0.09731054306030273, "step": 4808 }, { "epoch": 0.6701038110499548, "grad_norm": 0.5555028915405273, "learning_rate": 2.80649687538598e-06, "loss": 0.07040023803710938, "step": 4809 }, { "epoch": 0.6702431547411691, "grad_norm": 1.2739218473434448, "learning_rate": 2.8043792348968767e-06, "loss": 0.08822250366210938, "step": 4810 }, { "epoch": 0.6703824984323835, "grad_norm": 0.7982473969459534, "learning_rate": 2.8022620822245782e-06, "loss": 0.07746315002441406, "step": 4811 }, { "epoch": 0.6705218421235979, "grad_norm": 0.9752997756004333, "learning_rate": 2.8001454178394715e-06, "loss": 0.07863521575927734, "step": 4812 }, { "epoch": 0.6706611858148123, "grad_norm": 0.7542577385902405, "learning_rate": 2.7980292422118282e-06, "loss": 0.0788869857788086, "step": 4813 }, { "epoch": 0.6708005295060266, "grad_norm": 0.8728532195091248, "learning_rate": 2.795913555811817e-06, "loss": 0.09906768798828125, "step": 4814 }, { "epoch": 0.670939873197241, "grad_norm": 0.8514273762702942, "learning_rate": 2.793798359109492e-06, "loss": 0.08830022811889648, "step": 4815 }, { "epoch": 0.6710792168884554, "grad_norm": 0.83533775806427, "learning_rate": 2.7916836525748024e-06, "loss": 0.08717632293701172, "step": 4816 }, { "epoch": 0.6712185605796698, "grad_norm": 0.9864582419395447, "learning_rate": 2.7895694366775934e-06, "loss": 0.08794307708740234, "step": 4817 }, { "epoch": 0.6713579042708842, "grad_norm": 0.9130792021751404, "learning_rate": 2.7874557118875863e-06, "loss": 0.0849609375, "step": 4818 }, { "epoch": 0.6714972479620985, "grad_norm": 0.8019022941589355, "learning_rate": 2.7853424786744068e-06, "loss": 0.09687232971191406, "step": 4819 }, { "epoch": 0.6716365916533129, "grad_norm": 0.4434168040752411, "learning_rate": 2.7832297375075685e-06, "loss": 0.061992645263671875, "step": 4820 }, { "epoch": 0.6717759353445273, "grad_norm": 0.5472846031188965, "learning_rate": 2.7811174888564713e-06, "loss": 0.06109142303466797, "step": 4821 }, { "epoch": 0.6719152790357417, "grad_norm": 1.1573587656021118, "learning_rate": 2.779005733190412e-06, "loss": 0.08820152282714844, "step": 4822 }, { "epoch": 0.672054622726956, "grad_norm": 0.5803015232086182, "learning_rate": 2.7768944709785705e-06, "loss": 0.06644725799560547, "step": 4823 }, { "epoch": 0.6721939664181704, "grad_norm": 0.964099109172821, "learning_rate": 2.774783702690025e-06, "loss": 0.08291053771972656, "step": 4824 }, { "epoch": 0.6723333101093848, "grad_norm": 1.1006685495376587, "learning_rate": 2.7726734287937367e-06, "loss": 0.07661151885986328, "step": 4825 }, { "epoch": 0.6724726538005992, "grad_norm": 0.7445982694625854, "learning_rate": 2.770563649758562e-06, "loss": 0.09573841094970703, "step": 4826 }, { "epoch": 0.6726119974918136, "grad_norm": 0.6403898000717163, "learning_rate": 2.768454366053247e-06, "loss": 0.0702981948852539, "step": 4827 }, { "epoch": 0.6727513411830279, "grad_norm": 0.6944738626480103, "learning_rate": 2.7663455781464245e-06, "loss": 0.07555866241455078, "step": 4828 }, { "epoch": 0.6728906848742423, "grad_norm": 0.973394513130188, "learning_rate": 2.764237286506618e-06, "loss": 0.08865928649902344, "step": 4829 }, { "epoch": 0.6730300285654567, "grad_norm": 0.6542709469795227, "learning_rate": 2.7621294916022423e-06, "loss": 0.06624412536621094, "step": 4830 }, { "epoch": 0.6731693722566711, "grad_norm": 0.6680191159248352, "learning_rate": 2.760022193901605e-06, "loss": 0.06847667694091797, "step": 4831 }, { "epoch": 0.6733087159478854, "grad_norm": 0.8215093612670898, "learning_rate": 2.7579153938728943e-06, "loss": 0.08397817611694336, "step": 4832 }, { "epoch": 0.6734480596390998, "grad_norm": 1.8218713998794556, "learning_rate": 2.7558090919841972e-06, "loss": 0.09445571899414062, "step": 4833 }, { "epoch": 0.6735874033303142, "grad_norm": 1.1267009973526, "learning_rate": 2.753703288703482e-06, "loss": 0.08374595642089844, "step": 4834 }, { "epoch": 0.6737267470215286, "grad_norm": 0.6399454474449158, "learning_rate": 2.7515979844986148e-06, "loss": 0.08162498474121094, "step": 4835 }, { "epoch": 0.673866090712743, "grad_norm": 0.7021914124488831, "learning_rate": 2.749493179837341e-06, "loss": 0.0769338607788086, "step": 4836 }, { "epoch": 0.6740054344039573, "grad_norm": 0.6594703793525696, "learning_rate": 2.747388875187303e-06, "loss": 0.08766460418701172, "step": 4837 }, { "epoch": 0.6741447780951717, "grad_norm": 0.6624341011047363, "learning_rate": 2.7452850710160305e-06, "loss": 0.07073593139648438, "step": 4838 }, { "epoch": 0.6742841217863861, "grad_norm": 0.7427977919578552, "learning_rate": 2.74318176779094e-06, "loss": 0.07331991195678711, "step": 4839 }, { "epoch": 0.6744234654776005, "grad_norm": 0.6304770112037659, "learning_rate": 2.741078965979334e-06, "loss": 0.068878173828125, "step": 4840 }, { "epoch": 0.6745628091688148, "grad_norm": 0.899601936340332, "learning_rate": 2.7389766660484103e-06, "loss": 0.10225296020507812, "step": 4841 }, { "epoch": 0.6747021528600292, "grad_norm": 0.9182080030441284, "learning_rate": 2.736874868465253e-06, "loss": 0.09189701080322266, "step": 4842 }, { "epoch": 0.6748414965512436, "grad_norm": 0.7287792563438416, "learning_rate": 2.7347735736968318e-06, "loss": 0.06841182708740234, "step": 4843 }, { "epoch": 0.674980840242458, "grad_norm": 0.436069130897522, "learning_rate": 2.7326727822100047e-06, "loss": 0.06770086288452148, "step": 4844 }, { "epoch": 0.6751201839336725, "grad_norm": 0.5721732974052429, "learning_rate": 2.7305724944715218e-06, "loss": 0.07808971405029297, "step": 4845 }, { "epoch": 0.6752595276248868, "grad_norm": 0.7659233212471008, "learning_rate": 2.72847271094802e-06, "loss": 0.06397581100463867, "step": 4846 }, { "epoch": 0.6753988713161012, "grad_norm": 0.6519746780395508, "learning_rate": 2.7263734321060198e-06, "loss": 0.07262992858886719, "step": 4847 }, { "epoch": 0.6755382150073156, "grad_norm": 1.1542526483535767, "learning_rate": 2.7242746584119364e-06, "loss": 0.08105754852294922, "step": 4848 }, { "epoch": 0.67567755869853, "grad_norm": 0.819861888885498, "learning_rate": 2.722176390332071e-06, "loss": 0.07599449157714844, "step": 4849 }, { "epoch": 0.6758169023897443, "grad_norm": 0.914944589138031, "learning_rate": 2.720078628332605e-06, "loss": 0.07410383224487305, "step": 4850 }, { "epoch": 0.6759562460809587, "grad_norm": 0.6791440844535828, "learning_rate": 2.7179813728796156e-06, "loss": 0.07814884185791016, "step": 4851 }, { "epoch": 0.6760955897721731, "grad_norm": 0.5568680167198181, "learning_rate": 2.7158846244390657e-06, "loss": 0.07388496398925781, "step": 4852 }, { "epoch": 0.6762349334633875, "grad_norm": 0.8066065311431885, "learning_rate": 2.7137883834768076e-06, "loss": 0.08572864532470703, "step": 4853 }, { "epoch": 0.6763742771546019, "grad_norm": 0.6778931617736816, "learning_rate": 2.7116926504585756e-06, "loss": 0.07989501953125, "step": 4854 }, { "epoch": 0.6765136208458162, "grad_norm": 0.6440497040748596, "learning_rate": 2.7095974258499914e-06, "loss": 0.07705974578857422, "step": 4855 }, { "epoch": 0.6766529645370306, "grad_norm": 0.638755738735199, "learning_rate": 2.7075027101165706e-06, "loss": 0.07655572891235352, "step": 4856 }, { "epoch": 0.676792308228245, "grad_norm": 0.6857638359069824, "learning_rate": 2.7054085037237066e-06, "loss": 0.06293058395385742, "step": 4857 }, { "epoch": 0.6769316519194594, "grad_norm": 1.0941046476364136, "learning_rate": 2.7033148071366866e-06, "loss": 0.1018218994140625, "step": 4858 }, { "epoch": 0.6770709956106737, "grad_norm": 0.6131015419960022, "learning_rate": 2.701221620820685e-06, "loss": 0.07126808166503906, "step": 4859 }, { "epoch": 0.6772103393018881, "grad_norm": 0.7176694869995117, "learning_rate": 2.6991289452407564e-06, "loss": 0.07762432098388672, "step": 4860 }, { "epoch": 0.6773496829931025, "grad_norm": 0.9410369396209717, "learning_rate": 2.697036780861845e-06, "loss": 0.08652806282043457, "step": 4861 }, { "epoch": 0.6774890266843169, "grad_norm": 0.5854491591453552, "learning_rate": 2.694945128148784e-06, "loss": 0.07158279418945312, "step": 4862 }, { "epoch": 0.6776283703755313, "grad_norm": 0.6582684516906738, "learning_rate": 2.692853987566291e-06, "loss": 0.07160007953643799, "step": 4863 }, { "epoch": 0.6777677140667456, "grad_norm": 0.8859679698944092, "learning_rate": 2.690763359578969e-06, "loss": 0.0990753173828125, "step": 4864 }, { "epoch": 0.67790705775796, "grad_norm": 0.8882613778114319, "learning_rate": 2.6886732446513066e-06, "loss": 0.10949325561523438, "step": 4865 }, { "epoch": 0.6780464014491744, "grad_norm": 0.634056568145752, "learning_rate": 2.68658364324768e-06, "loss": 0.07311105728149414, "step": 4866 }, { "epoch": 0.6781857451403888, "grad_norm": 0.44184690713882446, "learning_rate": 2.684494555832353e-06, "loss": 0.05925440788269043, "step": 4867 }, { "epoch": 0.6783250888316031, "grad_norm": 1.4666439294815063, "learning_rate": 2.6824059828694715e-06, "loss": 0.11319446563720703, "step": 4868 }, { "epoch": 0.6784644325228175, "grad_norm": 0.7792767882347107, "learning_rate": 2.680317924823068e-06, "loss": 0.07078409194946289, "step": 4869 }, { "epoch": 0.6786037762140319, "grad_norm": 0.6378635764122009, "learning_rate": 2.6782303821570644e-06, "loss": 0.07538509368896484, "step": 4870 }, { "epoch": 0.6787431199052463, "grad_norm": 0.7801036834716797, "learning_rate": 2.676143355335263e-06, "loss": 0.09062767028808594, "step": 4871 }, { "epoch": 0.6788824635964607, "grad_norm": 0.5783160328865051, "learning_rate": 2.6740568448213523e-06, "loss": 0.0782175064086914, "step": 4872 }, { "epoch": 0.679021807287675, "grad_norm": 0.7343774437904358, "learning_rate": 2.6719708510789077e-06, "loss": 0.07630538940429688, "step": 4873 }, { "epoch": 0.6791611509788894, "grad_norm": 0.6973897814750671, "learning_rate": 2.669885374571392e-06, "loss": 0.09107017517089844, "step": 4874 }, { "epoch": 0.6793004946701038, "grad_norm": 1.0378090143203735, "learning_rate": 2.667800415762149e-06, "loss": 0.09180212020874023, "step": 4875 }, { "epoch": 0.6794398383613182, "grad_norm": 0.8976541757583618, "learning_rate": 2.665715975114407e-06, "loss": 0.07365608215332031, "step": 4876 }, { "epoch": 0.6795791820525325, "grad_norm": 0.6381053924560547, "learning_rate": 2.6636320530912817e-06, "loss": 0.07148551940917969, "step": 4877 }, { "epoch": 0.6797185257437469, "grad_norm": 0.701673150062561, "learning_rate": 2.6615486501557765e-06, "loss": 0.07841157913208008, "step": 4878 }, { "epoch": 0.6798578694349613, "grad_norm": 0.7985884547233582, "learning_rate": 2.659465766770772e-06, "loss": 0.09531974792480469, "step": 4879 }, { "epoch": 0.6799972131261757, "grad_norm": 0.906596302986145, "learning_rate": 2.6573834033990404e-06, "loss": 0.08939361572265625, "step": 4880 }, { "epoch": 0.68013655681739, "grad_norm": 0.9837411046028137, "learning_rate": 2.655301560503234e-06, "loss": 0.07359790802001953, "step": 4881 }, { "epoch": 0.6802759005086044, "grad_norm": 0.8636924028396606, "learning_rate": 2.6532202385458875e-06, "loss": 0.05872488021850586, "step": 4882 }, { "epoch": 0.6804152441998188, "grad_norm": 1.097930908203125, "learning_rate": 2.6511394379894274e-06, "loss": 0.07512855529785156, "step": 4883 }, { "epoch": 0.6805545878910332, "grad_norm": 0.9488765597343445, "learning_rate": 2.649059159296158e-06, "loss": 0.09237480163574219, "step": 4884 }, { "epoch": 0.6806939315822477, "grad_norm": 0.5223754644393921, "learning_rate": 2.6469794029282726e-06, "loss": 0.0740804672241211, "step": 4885 }, { "epoch": 0.680833275273462, "grad_norm": 0.5311262607574463, "learning_rate": 2.6449001693478438e-06, "loss": 0.06439781188964844, "step": 4886 }, { "epoch": 0.6809726189646764, "grad_norm": 1.1591298580169678, "learning_rate": 2.642821459016827e-06, "loss": 0.10725116729736328, "step": 4887 }, { "epoch": 0.6811119626558908, "grad_norm": 1.2355256080627441, "learning_rate": 2.6407432723970694e-06, "loss": 0.07263469696044922, "step": 4888 }, { "epoch": 0.6812513063471052, "grad_norm": 1.475519061088562, "learning_rate": 2.6386656099502917e-06, "loss": 0.08706474304199219, "step": 4889 }, { "epoch": 0.6813906500383196, "grad_norm": 0.5665653944015503, "learning_rate": 2.6365884721381045e-06, "loss": 0.07009029388427734, "step": 4890 }, { "epoch": 0.6815299937295339, "grad_norm": 0.7416821122169495, "learning_rate": 2.6345118594220044e-06, "loss": 0.07688617706298828, "step": 4891 }, { "epoch": 0.6816693374207483, "grad_norm": 0.6312206387519836, "learning_rate": 2.632435772263363e-06, "loss": 0.07283544540405273, "step": 4892 }, { "epoch": 0.6818086811119627, "grad_norm": 0.5775114893913269, "learning_rate": 2.6303602111234394e-06, "loss": 0.07384586334228516, "step": 4893 }, { "epoch": 0.6819480248031771, "grad_norm": 0.6407498717308044, "learning_rate": 2.6282851764633765e-06, "loss": 0.07850456237792969, "step": 4894 }, { "epoch": 0.6820873684943914, "grad_norm": 0.7421622276306152, "learning_rate": 2.626210668744203e-06, "loss": 0.08410501480102539, "step": 4895 }, { "epoch": 0.6822267121856058, "grad_norm": 0.808902382850647, "learning_rate": 2.624136688426824e-06, "loss": 0.06528162956237793, "step": 4896 }, { "epoch": 0.6823660558768202, "grad_norm": 1.1577380895614624, "learning_rate": 2.6220632359720287e-06, "loss": 0.08661079406738281, "step": 4897 }, { "epoch": 0.6825053995680346, "grad_norm": 0.6199420094490051, "learning_rate": 2.6199903118404934e-06, "loss": 0.06301307678222656, "step": 4898 }, { "epoch": 0.682644743259249, "grad_norm": 0.4785623252391815, "learning_rate": 2.617917916492776e-06, "loss": 0.062206268310546875, "step": 4899 }, { "epoch": 0.6827840869504633, "grad_norm": 0.7820248007774353, "learning_rate": 2.615846050389312e-06, "loss": 0.07366657257080078, "step": 4900 }, { "epoch": 0.6829234306416777, "grad_norm": 0.8929054737091064, "learning_rate": 2.6137747139904262e-06, "loss": 0.08824825286865234, "step": 4901 }, { "epoch": 0.6830627743328921, "grad_norm": 1.2001209259033203, "learning_rate": 2.611703907756319e-06, "loss": 0.08940601348876953, "step": 4902 }, { "epoch": 0.6832021180241065, "grad_norm": 0.7080310583114624, "learning_rate": 2.6096336321470796e-06, "loss": 0.0793447494506836, "step": 4903 }, { "epoch": 0.6833414617153208, "grad_norm": 0.530719518661499, "learning_rate": 2.6075638876226715e-06, "loss": 0.06835031509399414, "step": 4904 }, { "epoch": 0.6834808054065352, "grad_norm": 0.5200464725494385, "learning_rate": 2.605494674642948e-06, "loss": 0.07025527954101562, "step": 4905 }, { "epoch": 0.6836201490977496, "grad_norm": 0.7371552586555481, "learning_rate": 2.603425993667642e-06, "loss": 0.09336090087890625, "step": 4906 }, { "epoch": 0.683759492788964, "grad_norm": 0.7960427403450012, "learning_rate": 2.6013578451563653e-06, "loss": 0.06412887573242188, "step": 4907 }, { "epoch": 0.6838988364801784, "grad_norm": 0.4674857258796692, "learning_rate": 2.599290229568612e-06, "loss": 0.06019735336303711, "step": 4908 }, { "epoch": 0.6840381801713927, "grad_norm": 0.7579083442687988, "learning_rate": 2.59722314736376e-06, "loss": 0.06678032875061035, "step": 4909 }, { "epoch": 0.6841775238626071, "grad_norm": 0.8493714928627014, "learning_rate": 2.5951565990010706e-06, "loss": 0.09011515974998474, "step": 4910 }, { "epoch": 0.6843168675538215, "grad_norm": 0.48588916659355164, "learning_rate": 2.5930905849396792e-06, "loss": 0.06059837341308594, "step": 4911 }, { "epoch": 0.6844562112450359, "grad_norm": 0.5657280087471008, "learning_rate": 2.5910251056386113e-06, "loss": 0.07557064294815063, "step": 4912 }, { "epoch": 0.6845955549362502, "grad_norm": 1.0910576581954956, "learning_rate": 2.5889601615567657e-06, "loss": 0.10955333709716797, "step": 4913 }, { "epoch": 0.6847348986274646, "grad_norm": 0.7332345247268677, "learning_rate": 2.5868957531529283e-06, "loss": 0.07726573944091797, "step": 4914 }, { "epoch": 0.684874242318679, "grad_norm": 1.0123811960220337, "learning_rate": 2.584831880885761e-06, "loss": 0.10002326965332031, "step": 4915 }, { "epoch": 0.6850135860098934, "grad_norm": 0.6682595610618591, "learning_rate": 2.582768545213811e-06, "loss": 0.0966334342956543, "step": 4916 }, { "epoch": 0.6851529297011077, "grad_norm": 0.6016536951065063, "learning_rate": 2.5807057465955065e-06, "loss": 0.0668020248413086, "step": 4917 }, { "epoch": 0.6852922733923221, "grad_norm": 1.291725516319275, "learning_rate": 2.5786434854891482e-06, "loss": 0.07283306121826172, "step": 4918 }, { "epoch": 0.6854316170835365, "grad_norm": 0.7192546129226685, "learning_rate": 2.576581762352928e-06, "loss": 0.06951332092285156, "step": 4919 }, { "epoch": 0.6855709607747509, "grad_norm": 0.586175262928009, "learning_rate": 2.574520577644913e-06, "loss": 0.07421684265136719, "step": 4920 }, { "epoch": 0.6857103044659653, "grad_norm": 0.5808423757553101, "learning_rate": 2.5724599318230504e-06, "loss": 0.07414817810058594, "step": 4921 }, { "epoch": 0.6858496481571796, "grad_norm": 0.7048020362854004, "learning_rate": 2.570399825345169e-06, "loss": 0.06136751174926758, "step": 4922 }, { "epoch": 0.685988991848394, "grad_norm": 0.5365787148475647, "learning_rate": 2.5683402586689788e-06, "loss": 0.06704330444335938, "step": 4923 }, { "epoch": 0.6861283355396084, "grad_norm": 0.6267285943031311, "learning_rate": 2.566281232252068e-06, "loss": 0.07829952239990234, "step": 4924 }, { "epoch": 0.6862676792308228, "grad_norm": 0.656515896320343, "learning_rate": 2.564222746551903e-06, "loss": 0.07372379302978516, "step": 4925 }, { "epoch": 0.6864070229220373, "grad_norm": 0.764385998249054, "learning_rate": 2.562164802025834e-06, "loss": 0.0767812728881836, "step": 4926 }, { "epoch": 0.6865463666132516, "grad_norm": 1.2372572422027588, "learning_rate": 2.5601073991310903e-06, "loss": 0.10622310638427734, "step": 4927 }, { "epoch": 0.686685710304466, "grad_norm": 0.7399458289146423, "learning_rate": 2.5580505383247796e-06, "loss": 0.09082317352294922, "step": 4928 }, { "epoch": 0.6868250539956804, "grad_norm": 0.8017516136169434, "learning_rate": 2.5559942200638866e-06, "loss": 0.0887908935546875, "step": 4929 }, { "epoch": 0.6869643976868948, "grad_norm": 0.6398032903671265, "learning_rate": 2.5539384448052797e-06, "loss": 0.06929779052734375, "step": 4930 }, { "epoch": 0.6871037413781091, "grad_norm": 0.8000969290733337, "learning_rate": 2.5518832130057082e-06, "loss": 0.07397317886352539, "step": 4931 }, { "epoch": 0.6872430850693235, "grad_norm": 0.8464467525482178, "learning_rate": 2.5498285251217938e-06, "loss": 0.07519149780273438, "step": 4932 }, { "epoch": 0.6873824287605379, "grad_norm": 0.7232553958892822, "learning_rate": 2.5477743816100443e-06, "loss": 0.08406639099121094, "step": 4933 }, { "epoch": 0.6875217724517523, "grad_norm": 1.1190226078033447, "learning_rate": 2.5457207829268394e-06, "loss": 0.07541322708129883, "step": 4934 }, { "epoch": 0.6876611161429667, "grad_norm": 0.7709450125694275, "learning_rate": 2.5436677295284474e-06, "loss": 0.07140779495239258, "step": 4935 }, { "epoch": 0.687800459834181, "grad_norm": 0.6379772424697876, "learning_rate": 2.5416152218710044e-06, "loss": 0.06641769409179688, "step": 4936 }, { "epoch": 0.6879398035253954, "grad_norm": 0.8516718149185181, "learning_rate": 2.539563260410533e-06, "loss": 0.08568191528320312, "step": 4937 }, { "epoch": 0.6880791472166098, "grad_norm": 0.9993354678153992, "learning_rate": 2.5375118456029345e-06, "loss": 0.09974861145019531, "step": 4938 }, { "epoch": 0.6882184909078242, "grad_norm": 0.6646740436553955, "learning_rate": 2.5354609779039844e-06, "loss": 0.08319282531738281, "step": 4939 }, { "epoch": 0.6883578345990385, "grad_norm": 0.731066107749939, "learning_rate": 2.533410657769337e-06, "loss": 0.09739112854003906, "step": 4940 }, { "epoch": 0.6884971782902529, "grad_norm": 0.6202459931373596, "learning_rate": 2.531360885654528e-06, "loss": 0.08149433135986328, "step": 4941 }, { "epoch": 0.6886365219814673, "grad_norm": 0.7505131363868713, "learning_rate": 2.529311662014972e-06, "loss": 0.08032608032226562, "step": 4942 }, { "epoch": 0.6887758656726817, "grad_norm": 0.5546845197677612, "learning_rate": 2.5272629873059564e-06, "loss": 0.0692899227142334, "step": 4943 }, { "epoch": 0.688915209363896, "grad_norm": 0.5773563385009766, "learning_rate": 2.5252148619826535e-06, "loss": 0.058190345764160156, "step": 4944 }, { "epoch": 0.6890545530551104, "grad_norm": 1.1080988645553589, "learning_rate": 2.5231672865001056e-06, "loss": 0.09089002758264542, "step": 4945 }, { "epoch": 0.6891938967463248, "grad_norm": 0.5941068530082703, "learning_rate": 2.5211202613132413e-06, "loss": 0.06522083282470703, "step": 4946 }, { "epoch": 0.6893332404375392, "grad_norm": 0.7674636244773865, "learning_rate": 2.5190737868768592e-06, "loss": 0.09406089782714844, "step": 4947 }, { "epoch": 0.6894725841287536, "grad_norm": 0.8982937932014465, "learning_rate": 2.5170278636456413e-06, "loss": 0.0819559097290039, "step": 4948 }, { "epoch": 0.6896119278199679, "grad_norm": 0.5149781107902527, "learning_rate": 2.5149824920741493e-06, "loss": 0.06679534912109375, "step": 4949 }, { "epoch": 0.6897512715111823, "grad_norm": 0.7049599885940552, "learning_rate": 2.51293767261681e-06, "loss": 0.07785892486572266, "step": 4950 }, { "epoch": 0.6898906152023967, "grad_norm": 0.7393026351928711, "learning_rate": 2.5108934057279376e-06, "loss": 0.07782459259033203, "step": 4951 }, { "epoch": 0.6900299588936111, "grad_norm": 0.6996793150901794, "learning_rate": 2.5088496918617243e-06, "loss": 0.06612181663513184, "step": 4952 }, { "epoch": 0.6901693025848255, "grad_norm": 0.7382692098617554, "learning_rate": 2.5068065314722378e-06, "loss": 0.06548786163330078, "step": 4953 }, { "epoch": 0.6903086462760398, "grad_norm": 0.9655173420906067, "learning_rate": 2.504763925013419e-06, "loss": 0.06908988952636719, "step": 4954 }, { "epoch": 0.6904479899672542, "grad_norm": 0.6626636981964111, "learning_rate": 2.5027218729390867e-06, "loss": 0.07415008544921875, "step": 4955 }, { "epoch": 0.6905873336584686, "grad_norm": 0.5325929522514343, "learning_rate": 2.500680375702943e-06, "loss": 0.08561229705810547, "step": 4956 }, { "epoch": 0.690726677349683, "grad_norm": 0.5407887697219849, "learning_rate": 2.498639433758557e-06, "loss": 0.06831169128417969, "step": 4957 }, { "epoch": 0.6908660210408973, "grad_norm": 0.577828049659729, "learning_rate": 2.4965990475593814e-06, "loss": 0.05533647537231445, "step": 4958 }, { "epoch": 0.6910053647321117, "grad_norm": 1.1502997875213623, "learning_rate": 2.494559217558746e-06, "loss": 0.09941864013671875, "step": 4959 }, { "epoch": 0.6911447084233261, "grad_norm": 0.6986594796180725, "learning_rate": 2.492519944209853e-06, "loss": 0.06971168518066406, "step": 4960 }, { "epoch": 0.6912840521145405, "grad_norm": 0.4591493010520935, "learning_rate": 2.4904812279657792e-06, "loss": 0.07363414764404297, "step": 4961 }, { "epoch": 0.6914233958057548, "grad_norm": 0.986337423324585, "learning_rate": 2.488443069279483e-06, "loss": 0.10074329376220703, "step": 4962 }, { "epoch": 0.6915627394969692, "grad_norm": 0.6251879334449768, "learning_rate": 2.4864054686037993e-06, "loss": 0.0802927017211914, "step": 4963 }, { "epoch": 0.6917020831881836, "grad_norm": 0.5217515826225281, "learning_rate": 2.484368426391432e-06, "loss": 0.0657501220703125, "step": 4964 }, { "epoch": 0.691841426879398, "grad_norm": 0.8034390807151794, "learning_rate": 2.482331943094969e-06, "loss": 0.09066104888916016, "step": 4965 }, { "epoch": 0.6919807705706125, "grad_norm": 1.093108892440796, "learning_rate": 2.480296019166868e-06, "loss": 0.08470726013183594, "step": 4966 }, { "epoch": 0.6921201142618268, "grad_norm": 0.38151445984840393, "learning_rate": 2.478260655059467e-06, "loss": 0.06820297241210938, "step": 4967 }, { "epoch": 0.6922594579530412, "grad_norm": 0.7238003015518188, "learning_rate": 2.4762258512249745e-06, "loss": 0.08366203308105469, "step": 4968 }, { "epoch": 0.6923988016442556, "grad_norm": 0.5528267621994019, "learning_rate": 2.4741916081154786e-06, "loss": 0.06007814407348633, "step": 4969 }, { "epoch": 0.69253814533547, "grad_norm": 0.6158427596092224, "learning_rate": 2.472157926182945e-06, "loss": 0.07504749298095703, "step": 4970 }, { "epoch": 0.6926774890266844, "grad_norm": 0.6197431683540344, "learning_rate": 2.470124805879208e-06, "loss": 0.05970573425292969, "step": 4971 }, { "epoch": 0.6928168327178987, "grad_norm": 0.6542043685913086, "learning_rate": 2.468092247655979e-06, "loss": 0.07324409484863281, "step": 4972 }, { "epoch": 0.6929561764091131, "grad_norm": 0.6810263395309448, "learning_rate": 2.466060251964848e-06, "loss": 0.07908439636230469, "step": 4973 }, { "epoch": 0.6930955201003275, "grad_norm": 0.7735973596572876, "learning_rate": 2.464028819257281e-06, "loss": 0.0769186019897461, "step": 4974 }, { "epoch": 0.6932348637915419, "grad_norm": 0.5601685047149658, "learning_rate": 2.4619979499846127e-06, "loss": 0.07577037811279297, "step": 4975 }, { "epoch": 0.6933742074827562, "grad_norm": 0.6798213124275208, "learning_rate": 2.459967644598054e-06, "loss": 0.08671379089355469, "step": 4976 }, { "epoch": 0.6935135511739706, "grad_norm": 0.7448952794075012, "learning_rate": 2.457937903548695e-06, "loss": 0.07680892944335938, "step": 4977 }, { "epoch": 0.693652894865185, "grad_norm": 0.6103763580322266, "learning_rate": 2.4559087272875e-06, "loss": 0.07143783569335938, "step": 4978 }, { "epoch": 0.6937922385563994, "grad_norm": 0.4857337772846222, "learning_rate": 2.4538801162653002e-06, "loss": 0.06417274475097656, "step": 4979 }, { "epoch": 0.6939315822476138, "grad_norm": 1.0423004627227783, "learning_rate": 2.451852070932811e-06, "loss": 0.09778594970703125, "step": 4980 }, { "epoch": 0.6940709259388281, "grad_norm": 0.7380645871162415, "learning_rate": 2.4498245917406195e-06, "loss": 0.059833526611328125, "step": 4981 }, { "epoch": 0.6942102696300425, "grad_norm": 0.547625720500946, "learning_rate": 2.4477976791391784e-06, "loss": 0.071624755859375, "step": 4982 }, { "epoch": 0.6943496133212569, "grad_norm": 1.319290041923523, "learning_rate": 2.445771333578825e-06, "loss": 0.08810234069824219, "step": 4983 }, { "epoch": 0.6944889570124713, "grad_norm": 1.3813869953155518, "learning_rate": 2.443745555509768e-06, "loss": 0.08684635162353516, "step": 4984 }, { "epoch": 0.6946283007036856, "grad_norm": 0.6743261218070984, "learning_rate": 2.4417203453820892e-06, "loss": 0.07236194610595703, "step": 4985 }, { "epoch": 0.6947676443949, "grad_norm": 0.5896543264389038, "learning_rate": 2.4396957036457443e-06, "loss": 0.07660579681396484, "step": 4986 }, { "epoch": 0.6949069880861144, "grad_norm": 1.044400691986084, "learning_rate": 2.437671630750558e-06, "loss": 0.07458209991455078, "step": 4987 }, { "epoch": 0.6950463317773288, "grad_norm": 0.7609124779701233, "learning_rate": 2.4356481271462396e-06, "loss": 0.07419967651367188, "step": 4988 }, { "epoch": 0.6951856754685432, "grad_norm": 0.5235055088996887, "learning_rate": 2.4336251932823594e-06, "loss": 0.0600428581237793, "step": 4989 }, { "epoch": 0.6953250191597575, "grad_norm": 1.2009656429290771, "learning_rate": 2.4316028296083705e-06, "loss": 0.08404922485351562, "step": 4990 }, { "epoch": 0.6954643628509719, "grad_norm": 0.6991730332374573, "learning_rate": 2.4295810365735974e-06, "loss": 0.0653066635131836, "step": 4991 }, { "epoch": 0.6956037065421863, "grad_norm": 0.6916912198066711, "learning_rate": 2.427559814627234e-06, "loss": 0.07151317596435547, "step": 4992 }, { "epoch": 0.6957430502334007, "grad_norm": 0.7973744869232178, "learning_rate": 2.425539164218348e-06, "loss": 0.07274055480957031, "step": 4993 }, { "epoch": 0.695882393924615, "grad_norm": 0.6779767274856567, "learning_rate": 2.4235190857958834e-06, "loss": 0.07550573348999023, "step": 4994 }, { "epoch": 0.6960217376158294, "grad_norm": 0.8051292300224304, "learning_rate": 2.4214995798086584e-06, "loss": 0.08510494232177734, "step": 4995 }, { "epoch": 0.6961610813070438, "grad_norm": 1.1079829931259155, "learning_rate": 2.4194806467053584e-06, "loss": 0.13220953941345215, "step": 4996 }, { "epoch": 0.6963004249982582, "grad_norm": 0.6698353290557861, "learning_rate": 2.417462286934543e-06, "loss": 0.08054161071777344, "step": 4997 }, { "epoch": 0.6964397686894725, "grad_norm": 0.49504226446151733, "learning_rate": 2.4154445009446457e-06, "loss": 0.05707430839538574, "step": 4998 }, { "epoch": 0.6965791123806869, "grad_norm": 0.7488148808479309, "learning_rate": 2.413427289183977e-06, "loss": 0.08514571189880371, "step": 4999 }, { "epoch": 0.6967184560719013, "grad_norm": 0.8232452273368835, "learning_rate": 2.41141065210071e-06, "loss": 0.07708024978637695, "step": 5000 }, { "epoch": 0.6968577997631157, "grad_norm": 0.8727852702140808, "learning_rate": 2.4093945901428977e-06, "loss": 0.09502410888671875, "step": 5001 }, { "epoch": 0.6969971434543301, "grad_norm": 0.5922801494598389, "learning_rate": 2.4073791037584648e-06, "loss": 0.0636170506477356, "step": 5002 }, { "epoch": 0.6971364871455444, "grad_norm": 0.7754873037338257, "learning_rate": 2.4053641933952043e-06, "loss": 0.07500696182250977, "step": 5003 }, { "epoch": 0.6972758308367588, "grad_norm": 0.6714415550231934, "learning_rate": 2.403349859500782e-06, "loss": 0.08232593536376953, "step": 5004 }, { "epoch": 0.6974151745279732, "grad_norm": 0.7911499738693237, "learning_rate": 2.4013361025227384e-06, "loss": 0.07716035842895508, "step": 5005 }, { "epoch": 0.6975545182191877, "grad_norm": 1.1770853996276855, "learning_rate": 2.3993229229084856e-06, "loss": 0.08669757843017578, "step": 5006 }, { "epoch": 0.6976938619104021, "grad_norm": 0.6126809120178223, "learning_rate": 2.3973103211053052e-06, "loss": 0.06753826141357422, "step": 5007 }, { "epoch": 0.6978332056016164, "grad_norm": 0.8455753922462463, "learning_rate": 2.3952982975603494e-06, "loss": 0.092376708984375, "step": 5008 }, { "epoch": 0.6979725492928308, "grad_norm": 1.4154075384140015, "learning_rate": 2.393286852720645e-06, "loss": 0.10021305084228516, "step": 5009 }, { "epoch": 0.6981118929840452, "grad_norm": 0.5173611640930176, "learning_rate": 2.391275987033092e-06, "loss": 0.07270526885986328, "step": 5010 }, { "epoch": 0.6982512366752596, "grad_norm": 0.687738835811615, "learning_rate": 2.3892657009444543e-06, "loss": 0.0734701156616211, "step": 5011 }, { "epoch": 0.698390580366474, "grad_norm": 0.4640931785106659, "learning_rate": 2.387255994901376e-06, "loss": 0.0665121078491211, "step": 5012 }, { "epoch": 0.6985299240576883, "grad_norm": 0.8065513968467712, "learning_rate": 2.3852468693503635e-06, "loss": 0.08030509948730469, "step": 5013 }, { "epoch": 0.6986692677489027, "grad_norm": 0.9133215546607971, "learning_rate": 2.3832383247378025e-06, "loss": 0.08206367492675781, "step": 5014 }, { "epoch": 0.6988086114401171, "grad_norm": 0.5823646783828735, "learning_rate": 2.3812303615099423e-06, "loss": 0.06655263900756836, "step": 5015 }, { "epoch": 0.6989479551313315, "grad_norm": 1.0497411489486694, "learning_rate": 2.3792229801129086e-06, "loss": 0.08654356002807617, "step": 5016 }, { "epoch": 0.6990872988225458, "grad_norm": 0.7701457142829895, "learning_rate": 2.3772161809926973e-06, "loss": 0.08896112442016602, "step": 5017 }, { "epoch": 0.6992266425137602, "grad_norm": 0.8329918384552002, "learning_rate": 2.375209964595171e-06, "loss": 0.07497406005859375, "step": 5018 }, { "epoch": 0.6993659862049746, "grad_norm": 0.7662839293479919, "learning_rate": 2.373204331366064e-06, "loss": 0.08717036247253418, "step": 5019 }, { "epoch": 0.699505329896189, "grad_norm": 0.9800378680229187, "learning_rate": 2.3711992817509854e-06, "loss": 0.08510589599609375, "step": 5020 }, { "epoch": 0.6996446735874033, "grad_norm": 0.49187248945236206, "learning_rate": 2.3691948161954083e-06, "loss": 0.07027244567871094, "step": 5021 }, { "epoch": 0.6997840172786177, "grad_norm": 0.6782600283622742, "learning_rate": 2.3671909351446802e-06, "loss": 0.07218742370605469, "step": 5022 }, { "epoch": 0.6999233609698321, "grad_norm": 0.8755800127983093, "learning_rate": 2.365187639044021e-06, "loss": 0.08626461029052734, "step": 5023 }, { "epoch": 0.7000627046610465, "grad_norm": 0.5596309900283813, "learning_rate": 2.363184928338514e-06, "loss": 0.07570314407348633, "step": 5024 }, { "epoch": 0.7002020483522609, "grad_norm": 0.897939920425415, "learning_rate": 2.3611828034731144e-06, "loss": 0.08351802825927734, "step": 5025 }, { "epoch": 0.7003413920434752, "grad_norm": 1.1741889715194702, "learning_rate": 2.359181264892651e-06, "loss": 0.08896446228027344, "step": 5026 }, { "epoch": 0.7004807357346896, "grad_norm": 0.9147673845291138, "learning_rate": 2.3571803130418215e-06, "loss": 0.09344673156738281, "step": 5027 }, { "epoch": 0.700620079425904, "grad_norm": 0.9150894284248352, "learning_rate": 2.3551799483651894e-06, "loss": 0.10679960250854492, "step": 5028 }, { "epoch": 0.7007594231171184, "grad_norm": 0.7194359302520752, "learning_rate": 2.3531801713071887e-06, "loss": 0.10103225708007812, "step": 5029 }, { "epoch": 0.7008987668083327, "grad_norm": 0.7594836950302124, "learning_rate": 2.351180982312127e-06, "loss": 0.06959342956542969, "step": 5030 }, { "epoch": 0.7010381104995471, "grad_norm": 0.5858476758003235, "learning_rate": 2.349182381824178e-06, "loss": 0.07091617584228516, "step": 5031 }, { "epoch": 0.7011774541907615, "grad_norm": 0.5034453272819519, "learning_rate": 2.3471843702873835e-06, "loss": 0.07396078109741211, "step": 5032 }, { "epoch": 0.7013167978819759, "grad_norm": 0.7123540043830872, "learning_rate": 2.345186948145659e-06, "loss": 0.07798576354980469, "step": 5033 }, { "epoch": 0.7014561415731903, "grad_norm": 0.6680231690406799, "learning_rate": 2.343190115842782e-06, "loss": 0.07949256896972656, "step": 5034 }, { "epoch": 0.7015954852644046, "grad_norm": 1.1238791942596436, "learning_rate": 2.341193873822407e-06, "loss": 0.08264875411987305, "step": 5035 }, { "epoch": 0.701734828955619, "grad_norm": 0.9825141429901123, "learning_rate": 2.33919822252805e-06, "loss": 0.08173942565917969, "step": 5036 }, { "epoch": 0.7018741726468334, "grad_norm": 1.0910449028015137, "learning_rate": 2.337203162403101e-06, "loss": 0.0747365951538086, "step": 5037 }, { "epoch": 0.7020135163380478, "grad_norm": 1.147268533706665, "learning_rate": 2.335208693890819e-06, "loss": 0.08745384216308594, "step": 5038 }, { "epoch": 0.7021528600292621, "grad_norm": 0.6066377758979797, "learning_rate": 2.3332148174343257e-06, "loss": 0.07668304443359375, "step": 5039 }, { "epoch": 0.7022922037204765, "grad_norm": 0.6057076454162598, "learning_rate": 2.331221533476615e-06, "loss": 0.07159614562988281, "step": 5040 }, { "epoch": 0.7024315474116909, "grad_norm": 0.40638959407806396, "learning_rate": 2.3292288424605503e-06, "loss": 0.054500430822372437, "step": 5041 }, { "epoch": 0.7025708911029053, "grad_norm": 0.8548810482025146, "learning_rate": 2.327236744828864e-06, "loss": 0.07377052307128906, "step": 5042 }, { "epoch": 0.7027102347941196, "grad_norm": 0.7697225213050842, "learning_rate": 2.325245241024151e-06, "loss": 0.0712733268737793, "step": 5043 }, { "epoch": 0.702849578485334, "grad_norm": 0.7750255465507507, "learning_rate": 2.323254331488881e-06, "loss": 0.07221508026123047, "step": 5044 }, { "epoch": 0.7029889221765484, "grad_norm": 1.4849246740341187, "learning_rate": 2.3212640166653868e-06, "loss": 0.10118246078491211, "step": 5045 }, { "epoch": 0.7031282658677629, "grad_norm": 0.6185187101364136, "learning_rate": 2.319274296995872e-06, "loss": 0.06544113159179688, "step": 5046 }, { "epoch": 0.7032676095589773, "grad_norm": 0.5850864052772522, "learning_rate": 2.3172851729224056e-06, "loss": 0.0694875717163086, "step": 5047 }, { "epoch": 0.7034069532501916, "grad_norm": 0.7382546663284302, "learning_rate": 2.315296644886926e-06, "loss": 0.07178020477294922, "step": 5048 }, { "epoch": 0.703546296941406, "grad_norm": 0.6795833110809326, "learning_rate": 2.313308713331242e-06, "loss": 0.09590530395507812, "step": 5049 }, { "epoch": 0.7036856406326204, "grad_norm": 0.8324989676475525, "learning_rate": 2.3113213786970205e-06, "loss": 0.0819540023803711, "step": 5050 }, { "epoch": 0.7038249843238348, "grad_norm": 0.6832403540611267, "learning_rate": 2.3093346414258054e-06, "loss": 0.08266639709472656, "step": 5051 }, { "epoch": 0.7039643280150492, "grad_norm": 1.0886107683181763, "learning_rate": 2.3073485019590043e-06, "loss": 0.08622074127197266, "step": 5052 }, { "epoch": 0.7041036717062635, "grad_norm": 0.5172960162162781, "learning_rate": 2.305362960737893e-06, "loss": 0.08098793029785156, "step": 5053 }, { "epoch": 0.7042430153974779, "grad_norm": 0.8383650183677673, "learning_rate": 2.3033780182036127e-06, "loss": 0.09078693389892578, "step": 5054 }, { "epoch": 0.7043823590886923, "grad_norm": 0.685387134552002, "learning_rate": 2.301393674797169e-06, "loss": 0.08271217346191406, "step": 5055 }, { "epoch": 0.7045217027799067, "grad_norm": 0.6872876882553101, "learning_rate": 2.2994099309594437e-06, "loss": 0.07474136352539062, "step": 5056 }, { "epoch": 0.704661046471121, "grad_norm": 0.42583614587783813, "learning_rate": 2.297426787131174e-06, "loss": 0.054739952087402344, "step": 5057 }, { "epoch": 0.7048003901623354, "grad_norm": 1.3297932147979736, "learning_rate": 2.2954442437529705e-06, "loss": 0.09438753128051758, "step": 5058 }, { "epoch": 0.7049397338535498, "grad_norm": 0.6224335432052612, "learning_rate": 2.293462301265313e-06, "loss": 0.06738042831420898, "step": 5059 }, { "epoch": 0.7050790775447642, "grad_norm": 0.4671897888183594, "learning_rate": 2.2914809601085405e-06, "loss": 0.0677499771118164, "step": 5060 }, { "epoch": 0.7052184212359786, "grad_norm": 0.9772655367851257, "learning_rate": 2.28950022072286e-06, "loss": 0.09838390350341797, "step": 5061 }, { "epoch": 0.7053577649271929, "grad_norm": 0.54212486743927, "learning_rate": 2.2875200835483486e-06, "loss": 0.07729530334472656, "step": 5062 }, { "epoch": 0.7054971086184073, "grad_norm": 0.7272135615348816, "learning_rate": 2.2855405490249498e-06, "loss": 0.0648651123046875, "step": 5063 }, { "epoch": 0.7056364523096217, "grad_norm": 0.8903102278709412, "learning_rate": 2.283561617592467e-06, "loss": 0.08003473281860352, "step": 5064 }, { "epoch": 0.7057757960008361, "grad_norm": 0.6263367533683777, "learning_rate": 2.2815832896905772e-06, "loss": 0.07523918151855469, "step": 5065 }, { "epoch": 0.7059151396920504, "grad_norm": 0.6473862528800964, "learning_rate": 2.279605565758816e-06, "loss": 0.08211565017700195, "step": 5066 }, { "epoch": 0.7060544833832648, "grad_norm": 0.7095730304718018, "learning_rate": 2.277628446236592e-06, "loss": 0.0789637565612793, "step": 5067 }, { "epoch": 0.7061938270744792, "grad_norm": 0.7045423984527588, "learning_rate": 2.275651931563173e-06, "loss": 0.0708470344543457, "step": 5068 }, { "epoch": 0.7063331707656936, "grad_norm": 0.7163954973220825, "learning_rate": 2.273676022177697e-06, "loss": 0.07802772521972656, "step": 5069 }, { "epoch": 0.706472514456908, "grad_norm": 0.6267457008361816, "learning_rate": 2.2717007185191673e-06, "loss": 0.06781649589538574, "step": 5070 }, { "epoch": 0.7066118581481223, "grad_norm": 1.0365331172943115, "learning_rate": 2.2697260210264506e-06, "loss": 0.09778594970703125, "step": 5071 }, { "epoch": 0.7067512018393367, "grad_norm": 0.9056538343429565, "learning_rate": 2.267751930138276e-06, "loss": 0.07895469665527344, "step": 5072 }, { "epoch": 0.7068905455305511, "grad_norm": 0.4719226360321045, "learning_rate": 2.265778446293245e-06, "loss": 0.06172382831573486, "step": 5073 }, { "epoch": 0.7070298892217655, "grad_norm": 0.6098629832267761, "learning_rate": 2.263805569929821e-06, "loss": 0.06330013275146484, "step": 5074 }, { "epoch": 0.7071692329129798, "grad_norm": 0.6602234840393066, "learning_rate": 2.2618333014863296e-06, "loss": 0.08481407165527344, "step": 5075 }, { "epoch": 0.7073085766041942, "grad_norm": 0.9873190522193909, "learning_rate": 2.259861641400967e-06, "loss": 0.0705876350402832, "step": 5076 }, { "epoch": 0.7074479202954086, "grad_norm": 0.7207472324371338, "learning_rate": 2.2578905901117876e-06, "loss": 0.06882953643798828, "step": 5077 }, { "epoch": 0.707587263986623, "grad_norm": 0.55045086145401, "learning_rate": 2.255920148056717e-06, "loss": 0.0568537712097168, "step": 5078 }, { "epoch": 0.7077266076778373, "grad_norm": 0.5871217846870422, "learning_rate": 2.2539503156735392e-06, "loss": 0.08258533477783203, "step": 5079 }, { "epoch": 0.7078659513690517, "grad_norm": 0.6011961102485657, "learning_rate": 2.2519810933999085e-06, "loss": 0.07513904571533203, "step": 5080 }, { "epoch": 0.7080052950602661, "grad_norm": 0.4893859922885895, "learning_rate": 2.2500124816733437e-06, "loss": 0.060578346252441406, "step": 5081 }, { "epoch": 0.7081446387514805, "grad_norm": 0.8115606904029846, "learning_rate": 2.248044480931219e-06, "loss": 0.09014606475830078, "step": 5082 }, { "epoch": 0.7082839824426949, "grad_norm": 0.7138438820838928, "learning_rate": 2.2460770916107823e-06, "loss": 0.09473133087158203, "step": 5083 }, { "epoch": 0.7084233261339092, "grad_norm": 0.6839733719825745, "learning_rate": 2.2441103141491424e-06, "loss": 0.07600975036621094, "step": 5084 }, { "epoch": 0.7085626698251236, "grad_norm": 0.717239499092102, "learning_rate": 2.2421441489832745e-06, "loss": 0.07453346252441406, "step": 5085 }, { "epoch": 0.7087020135163381, "grad_norm": 0.6945443153381348, "learning_rate": 2.240178596550014e-06, "loss": 0.08083534240722656, "step": 5086 }, { "epoch": 0.7088413572075525, "grad_norm": 0.7778720855712891, "learning_rate": 2.23821365728606e-06, "loss": 0.09115409851074219, "step": 5087 }, { "epoch": 0.7089807008987669, "grad_norm": 0.5759051442146301, "learning_rate": 2.23624933162798e-06, "loss": 0.07467079162597656, "step": 5088 }, { "epoch": 0.7091200445899812, "grad_norm": 0.5050042271614075, "learning_rate": 2.2342856200121993e-06, "loss": 0.06402397155761719, "step": 5089 }, { "epoch": 0.7092593882811956, "grad_norm": 0.9979532957077026, "learning_rate": 2.2323225228750113e-06, "loss": 0.08699417114257812, "step": 5090 }, { "epoch": 0.70939873197241, "grad_norm": 1.1153578758239746, "learning_rate": 2.230360040652574e-06, "loss": 0.07062530517578125, "step": 5091 }, { "epoch": 0.7095380756636244, "grad_norm": 1.164331078529358, "learning_rate": 2.228398173780903e-06, "loss": 0.08322763442993164, "step": 5092 }, { "epoch": 0.7096774193548387, "grad_norm": 0.6075308322906494, "learning_rate": 2.2264369226958794e-06, "loss": 0.08084964752197266, "step": 5093 }, { "epoch": 0.7098167630460531, "grad_norm": 0.7443380355834961, "learning_rate": 2.2244762878332506e-06, "loss": 0.06692981719970703, "step": 5094 }, { "epoch": 0.7099561067372675, "grad_norm": 0.514479398727417, "learning_rate": 2.222516269628626e-06, "loss": 0.06979751586914062, "step": 5095 }, { "epoch": 0.7100954504284819, "grad_norm": 0.6592992544174194, "learning_rate": 2.220556868517473e-06, "loss": 0.07066917419433594, "step": 5096 }, { "epoch": 0.7102347941196963, "grad_norm": 0.5566838383674622, "learning_rate": 2.2185980849351295e-06, "loss": 0.07706069946289062, "step": 5097 }, { "epoch": 0.7103741378109106, "grad_norm": 0.6287621855735779, "learning_rate": 2.2166399193167905e-06, "loss": 0.07137775421142578, "step": 5098 }, { "epoch": 0.710513481502125, "grad_norm": 0.5408386588096619, "learning_rate": 2.214682372097517e-06, "loss": 0.07205009460449219, "step": 5099 }, { "epoch": 0.7106528251933394, "grad_norm": 0.681794285774231, "learning_rate": 2.212725443712229e-06, "loss": 0.07756900787353516, "step": 5100 }, { "epoch": 0.7107921688845538, "grad_norm": 0.55609130859375, "learning_rate": 2.2107691345957133e-06, "loss": 0.06550025939941406, "step": 5101 }, { "epoch": 0.7109315125757681, "grad_norm": 0.7501285076141357, "learning_rate": 2.208813445182618e-06, "loss": 0.08239173889160156, "step": 5102 }, { "epoch": 0.7110708562669825, "grad_norm": 0.5456214547157288, "learning_rate": 2.2068583759074513e-06, "loss": 0.0666046142578125, "step": 5103 }, { "epoch": 0.7112101999581969, "grad_norm": 0.7748869061470032, "learning_rate": 2.2049039272045837e-06, "loss": 0.08148860931396484, "step": 5104 }, { "epoch": 0.7113495436494113, "grad_norm": 0.7165400385856628, "learning_rate": 2.2029500995082497e-06, "loss": 0.06931447982788086, "step": 5105 }, { "epoch": 0.7114888873406257, "grad_norm": 0.3578547239303589, "learning_rate": 2.2009968932525478e-06, "loss": 0.057981014251708984, "step": 5106 }, { "epoch": 0.71162823103184, "grad_norm": 0.8599287867546082, "learning_rate": 2.199044308871434e-06, "loss": 0.09072399139404297, "step": 5107 }, { "epoch": 0.7117675747230544, "grad_norm": 0.6825665831565857, "learning_rate": 2.197092346798726e-06, "loss": 0.08896780014038086, "step": 5108 }, { "epoch": 0.7119069184142688, "grad_norm": 0.7959466576576233, "learning_rate": 2.1951410074681074e-06, "loss": 0.08881759643554688, "step": 5109 }, { "epoch": 0.7120462621054832, "grad_norm": 0.571627140045166, "learning_rate": 2.193190291313122e-06, "loss": 0.056890010833740234, "step": 5110 }, { "epoch": 0.7121856057966975, "grad_norm": 0.6130637526512146, "learning_rate": 2.1912401987671724e-06, "loss": 0.07161903381347656, "step": 5111 }, { "epoch": 0.7123249494879119, "grad_norm": 0.930518627166748, "learning_rate": 2.1892907302635246e-06, "loss": 0.08450603485107422, "step": 5112 }, { "epoch": 0.7124642931791263, "grad_norm": 0.8857187032699585, "learning_rate": 2.1873418862353095e-06, "loss": 0.0814962387084961, "step": 5113 }, { "epoch": 0.7126036368703407, "grad_norm": 0.4790021479129791, "learning_rate": 2.185393667115513e-06, "loss": 0.0568079948425293, "step": 5114 }, { "epoch": 0.712742980561555, "grad_norm": 1.318100929260254, "learning_rate": 2.1834460733369835e-06, "loss": 0.09166216850280762, "step": 5115 }, { "epoch": 0.7128823242527694, "grad_norm": 0.5252153873443604, "learning_rate": 2.181499105332433e-06, "loss": 0.06474590301513672, "step": 5116 }, { "epoch": 0.7130216679439838, "grad_norm": 0.7695261240005493, "learning_rate": 2.179552763534436e-06, "loss": 0.07808494567871094, "step": 5117 }, { "epoch": 0.7131610116351982, "grad_norm": 1.1458104848861694, "learning_rate": 2.177607048375423e-06, "loss": 0.08203208446502686, "step": 5118 }, { "epoch": 0.7133003553264126, "grad_norm": 0.9107378125190735, "learning_rate": 2.1756619602876857e-06, "loss": 0.07567262649536133, "step": 5119 }, { "epoch": 0.7134396990176269, "grad_norm": 0.7180405855178833, "learning_rate": 2.1737174997033818e-06, "loss": 0.07896232604980469, "step": 5120 }, { "epoch": 0.7135790427088413, "grad_norm": 0.619117259979248, "learning_rate": 2.1717736670545226e-06, "loss": 0.06701540946960449, "step": 5121 }, { "epoch": 0.7137183864000557, "grad_norm": 0.732549250125885, "learning_rate": 2.169830462772985e-06, "loss": 0.07686519622802734, "step": 5122 }, { "epoch": 0.7138577300912701, "grad_norm": 1.1756985187530518, "learning_rate": 2.1678878872905063e-06, "loss": 0.09335041046142578, "step": 5123 }, { "epoch": 0.7139970737824844, "grad_norm": 1.3020256757736206, "learning_rate": 2.1659459410386814e-06, "loss": 0.06687259674072266, "step": 5124 }, { "epoch": 0.7141364174736988, "grad_norm": 0.8114030957221985, "learning_rate": 2.1640046244489637e-06, "loss": 0.07512378692626953, "step": 5125 }, { "epoch": 0.7142757611649132, "grad_norm": 0.900363028049469, "learning_rate": 2.1620639379526715e-06, "loss": 0.0861978530883789, "step": 5126 }, { "epoch": 0.7144151048561277, "grad_norm": 0.8511925935745239, "learning_rate": 2.1601238819809827e-06, "loss": 0.054779052734375, "step": 5127 }, { "epoch": 0.7145544485473421, "grad_norm": 0.6678187251091003, "learning_rate": 2.158184456964932e-06, "loss": 0.06159210205078125, "step": 5128 }, { "epoch": 0.7146937922385564, "grad_norm": 0.7189115881919861, "learning_rate": 2.156245663335414e-06, "loss": 0.07290124893188477, "step": 5129 }, { "epoch": 0.7148331359297708, "grad_norm": 0.6700384616851807, "learning_rate": 2.154307501523185e-06, "loss": 0.07533073425292969, "step": 5130 }, { "epoch": 0.7149724796209852, "grad_norm": 0.9749722480773926, "learning_rate": 2.1523699719588633e-06, "loss": 0.09690189361572266, "step": 5131 }, { "epoch": 0.7151118233121996, "grad_norm": 0.5686851739883423, "learning_rate": 2.1504330750729185e-06, "loss": 0.06591415405273438, "step": 5132 }, { "epoch": 0.715251167003414, "grad_norm": 1.1475361585617065, "learning_rate": 2.1484968112956884e-06, "loss": 0.09487724304199219, "step": 5133 }, { "epoch": 0.7153905106946283, "grad_norm": 0.7970849275588989, "learning_rate": 2.146561181057368e-06, "loss": 0.08602547645568848, "step": 5134 }, { "epoch": 0.7155298543858427, "grad_norm": 0.8175115585327148, "learning_rate": 2.1446261847880073e-06, "loss": 0.06679916381835938, "step": 5135 }, { "epoch": 0.7156691980770571, "grad_norm": 0.5782576203346252, "learning_rate": 2.1426918229175175e-06, "loss": 0.06033802032470703, "step": 5136 }, { "epoch": 0.7158085417682715, "grad_norm": 0.9028175473213196, "learning_rate": 2.140758095875671e-06, "loss": 0.10062789916992188, "step": 5137 }, { "epoch": 0.7159478854594858, "grad_norm": 1.1845576763153076, "learning_rate": 2.1388250040921007e-06, "loss": 0.11495018005371094, "step": 5138 }, { "epoch": 0.7160872291507002, "grad_norm": 0.613135814666748, "learning_rate": 2.136892547996292e-06, "loss": 0.06890153884887695, "step": 5139 }, { "epoch": 0.7162265728419146, "grad_norm": 0.6924743056297302, "learning_rate": 2.1349607280175918e-06, "loss": 0.06663322448730469, "step": 5140 }, { "epoch": 0.716365916533129, "grad_norm": 0.6457023620605469, "learning_rate": 2.133029544585207e-06, "loss": 0.0731954574584961, "step": 5141 }, { "epoch": 0.7165052602243434, "grad_norm": 0.9164565801620483, "learning_rate": 2.1310989981282067e-06, "loss": 0.07378578186035156, "step": 5142 }, { "epoch": 0.7166446039155577, "grad_norm": 0.6856523752212524, "learning_rate": 2.1291690890755078e-06, "loss": 0.086181640625, "step": 5143 }, { "epoch": 0.7167839476067721, "grad_norm": 0.7898237109184265, "learning_rate": 2.127239817855897e-06, "loss": 0.08046150207519531, "step": 5144 }, { "epoch": 0.7169232912979865, "grad_norm": 0.7211244702339172, "learning_rate": 2.1253111848980113e-06, "loss": 0.08494091033935547, "step": 5145 }, { "epoch": 0.7170626349892009, "grad_norm": 0.7861214876174927, "learning_rate": 2.1233831906303514e-06, "loss": 0.0788421630859375, "step": 5146 }, { "epoch": 0.7172019786804152, "grad_norm": 0.9694390296936035, "learning_rate": 2.121455835481271e-06, "loss": 0.07440423965454102, "step": 5147 }, { "epoch": 0.7173413223716296, "grad_norm": 0.47766971588134766, "learning_rate": 2.119529119878985e-06, "loss": 0.06980705261230469, "step": 5148 }, { "epoch": 0.717480666062844, "grad_norm": 0.9177723526954651, "learning_rate": 2.1176030442515704e-06, "loss": 0.07938003540039062, "step": 5149 }, { "epoch": 0.7176200097540584, "grad_norm": 0.819714367389679, "learning_rate": 2.115677609026949e-06, "loss": 0.08336544036865234, "step": 5150 }, { "epoch": 0.7177593534452728, "grad_norm": 0.9512181282043457, "learning_rate": 2.1137528146329133e-06, "loss": 0.08234500885009766, "step": 5151 }, { "epoch": 0.7178986971364871, "grad_norm": 0.4677058458328247, "learning_rate": 2.1118286614971075e-06, "loss": 0.06125450134277344, "step": 5152 }, { "epoch": 0.7180380408277015, "grad_norm": 0.9568625688552856, "learning_rate": 2.1099051500470368e-06, "loss": 0.1151266098022461, "step": 5153 }, { "epoch": 0.7181773845189159, "grad_norm": 1.1605676412582397, "learning_rate": 2.1079822807100585e-06, "loss": 0.10472679138183594, "step": 5154 }, { "epoch": 0.7183167282101303, "grad_norm": 0.5310987830162048, "learning_rate": 2.1060600539133928e-06, "loss": 0.0705709457397461, "step": 5155 }, { "epoch": 0.7184560719013446, "grad_norm": 0.8235752582550049, "learning_rate": 2.104138470084114e-06, "loss": 0.07430100440979004, "step": 5156 }, { "epoch": 0.718595415592559, "grad_norm": 0.672135591506958, "learning_rate": 2.1022175296491516e-06, "loss": 0.07207536697387695, "step": 5157 }, { "epoch": 0.7187347592837734, "grad_norm": 1.0364563465118408, "learning_rate": 2.100297233035296e-06, "loss": 0.09513092041015625, "step": 5158 }, { "epoch": 0.7188741029749878, "grad_norm": 0.6769238114356995, "learning_rate": 2.098377580669196e-06, "loss": 0.07277631759643555, "step": 5159 }, { "epoch": 0.7190134466662021, "grad_norm": 0.8232162594795227, "learning_rate": 2.096458572977352e-06, "loss": 0.0810014009475708, "step": 5160 }, { "epoch": 0.7191527903574165, "grad_norm": 0.7313199043273926, "learning_rate": 2.0945402103861233e-06, "loss": 0.07398366928100586, "step": 5161 }, { "epoch": 0.7192921340486309, "grad_norm": 0.8248984217643738, "learning_rate": 2.0926224933217267e-06, "loss": 0.07781696319580078, "step": 5162 }, { "epoch": 0.7194314777398453, "grad_norm": 0.9786498546600342, "learning_rate": 2.0907054222102367e-06, "loss": 0.09546089172363281, "step": 5163 }, { "epoch": 0.7195708214310597, "grad_norm": 0.7977331280708313, "learning_rate": 2.0887889974775805e-06, "loss": 0.08189010620117188, "step": 5164 }, { "epoch": 0.719710165122274, "grad_norm": 0.6470556259155273, "learning_rate": 2.0868732195495463e-06, "loss": 0.07039642333984375, "step": 5165 }, { "epoch": 0.7198495088134884, "grad_norm": 0.6071632504463196, "learning_rate": 2.0849580888517733e-06, "loss": 0.06719112396240234, "step": 5166 }, { "epoch": 0.7199888525047029, "grad_norm": 0.8822236061096191, "learning_rate": 2.083043605809763e-06, "loss": 0.07242250442504883, "step": 5167 }, { "epoch": 0.7201281961959173, "grad_norm": 0.8038433194160461, "learning_rate": 2.081129770848867e-06, "loss": 0.07877683639526367, "step": 5168 }, { "epoch": 0.7202675398871317, "grad_norm": 0.5185533761978149, "learning_rate": 2.0792165843942963e-06, "loss": 0.06498050689697266, "step": 5169 }, { "epoch": 0.720406883578346, "grad_norm": 0.8913354277610779, "learning_rate": 2.0773040468711205e-06, "loss": 0.10609054565429688, "step": 5170 }, { "epoch": 0.7205462272695604, "grad_norm": 0.6736248135566711, "learning_rate": 2.0753921587042586e-06, "loss": 0.06643486022949219, "step": 5171 }, { "epoch": 0.7206855709607748, "grad_norm": 0.519349217414856, "learning_rate": 2.0734809203184873e-06, "loss": 0.0626068115234375, "step": 5172 }, { "epoch": 0.7208249146519892, "grad_norm": 1.1007920503616333, "learning_rate": 2.071570332138442e-06, "loss": 0.10321998596191406, "step": 5173 }, { "epoch": 0.7209642583432035, "grad_norm": 1.116965413093567, "learning_rate": 2.0696603945886133e-06, "loss": 0.07759952545166016, "step": 5174 }, { "epoch": 0.7211036020344179, "grad_norm": 0.5594321489334106, "learning_rate": 2.067751108093343e-06, "loss": 0.07616138458251953, "step": 5175 }, { "epoch": 0.7212429457256323, "grad_norm": 0.5546557307243347, "learning_rate": 2.0658424730768335e-06, "loss": 0.07009220123291016, "step": 5176 }, { "epoch": 0.7213822894168467, "grad_norm": 0.6522272825241089, "learning_rate": 2.063934489963137e-06, "loss": 0.08553886413574219, "step": 5177 }, { "epoch": 0.7215216331080611, "grad_norm": 1.2297638654708862, "learning_rate": 2.0620271591761666e-06, "loss": 0.10276222229003906, "step": 5178 }, { "epoch": 0.7216609767992754, "grad_norm": 0.5885260701179504, "learning_rate": 2.0601204811396847e-06, "loss": 0.08806705474853516, "step": 5179 }, { "epoch": 0.7218003204904898, "grad_norm": 0.7321394681930542, "learning_rate": 2.058214456277314e-06, "loss": 0.0725107192993164, "step": 5180 }, { "epoch": 0.7219396641817042, "grad_norm": 0.8782188892364502, "learning_rate": 2.0563090850125318e-06, "loss": 0.06830024719238281, "step": 5181 }, { "epoch": 0.7220790078729186, "grad_norm": 0.9207737445831299, "learning_rate": 2.054404367768662e-06, "loss": 0.07893753051757812, "step": 5182 }, { "epoch": 0.7222183515641329, "grad_norm": 0.7761697769165039, "learning_rate": 2.0525003049688923e-06, "loss": 0.08711814880371094, "step": 5183 }, { "epoch": 0.7223576952553473, "grad_norm": 0.8777004480361938, "learning_rate": 2.0505968970362627e-06, "loss": 0.07983112335205078, "step": 5184 }, { "epoch": 0.7224970389465617, "grad_norm": 0.5784822106361389, "learning_rate": 2.048694144393668e-06, "loss": 0.06024026870727539, "step": 5185 }, { "epoch": 0.7226363826377761, "grad_norm": 0.7159757614135742, "learning_rate": 2.0467920474638552e-06, "loss": 0.08015060424804688, "step": 5186 }, { "epoch": 0.7227757263289905, "grad_norm": 0.8516541123390198, "learning_rate": 2.0448906066694247e-06, "loss": 0.06927108764648438, "step": 5187 }, { "epoch": 0.7229150700202048, "grad_norm": 0.5745388865470886, "learning_rate": 2.042989822432837e-06, "loss": 0.06835746765136719, "step": 5188 }, { "epoch": 0.7230544137114192, "grad_norm": 1.4469634294509888, "learning_rate": 2.041089695176399e-06, "loss": 0.09374761581420898, "step": 5189 }, { "epoch": 0.7231937574026336, "grad_norm": 0.7879353761672974, "learning_rate": 2.0391902253222777e-06, "loss": 0.07640933990478516, "step": 5190 }, { "epoch": 0.723333101093848, "grad_norm": 0.795656144618988, "learning_rate": 2.037291413292494e-06, "loss": 0.08205962181091309, "step": 5191 }, { "epoch": 0.7234724447850623, "grad_norm": 0.8395958542823792, "learning_rate": 2.035393259508919e-06, "loss": 0.06978750228881836, "step": 5192 }, { "epoch": 0.7236117884762767, "grad_norm": 1.3370085954666138, "learning_rate": 2.0334957643932757e-06, "loss": 0.11274385452270508, "step": 5193 }, { "epoch": 0.7237511321674911, "grad_norm": 0.8128964304924011, "learning_rate": 2.0315989283671474e-06, "loss": 0.08853912353515625, "step": 5194 }, { "epoch": 0.7238904758587055, "grad_norm": 0.6799702048301697, "learning_rate": 2.0297027518519696e-06, "loss": 0.05575275421142578, "step": 5195 }, { "epoch": 0.7240298195499199, "grad_norm": 0.8779780864715576, "learning_rate": 2.0278072352690253e-06, "loss": 0.07776451110839844, "step": 5196 }, { "epoch": 0.7241691632411342, "grad_norm": 0.744530200958252, "learning_rate": 2.0259123790394587e-06, "loss": 0.07366180419921875, "step": 5197 }, { "epoch": 0.7243085069323486, "grad_norm": 0.8401985168457031, "learning_rate": 2.0240181835842605e-06, "loss": 0.07909059524536133, "step": 5198 }, { "epoch": 0.724447850623563, "grad_norm": 0.9877487421035767, "learning_rate": 2.0221246493242802e-06, "loss": 0.07600021362304688, "step": 5199 }, { "epoch": 0.7245871943147774, "grad_norm": 0.6440902352333069, "learning_rate": 2.0202317766802155e-06, "loss": 0.06343746185302734, "step": 5200 }, { "epoch": 0.7247265380059917, "grad_norm": 0.8542218208312988, "learning_rate": 2.0183395660726208e-06, "loss": 0.07917404174804688, "step": 5201 }, { "epoch": 0.7248658816972061, "grad_norm": 0.8861979246139526, "learning_rate": 2.0164480179219038e-06, "loss": 0.0884695053100586, "step": 5202 }, { "epoch": 0.7250052253884205, "grad_norm": 0.8677837252616882, "learning_rate": 2.014557132648321e-06, "loss": 0.07785606384277344, "step": 5203 }, { "epoch": 0.7251445690796349, "grad_norm": 0.9154777526855469, "learning_rate": 2.0126669106719833e-06, "loss": 0.09208869934082031, "step": 5204 }, { "epoch": 0.7252839127708492, "grad_norm": 1.0653607845306396, "learning_rate": 2.010777352412856e-06, "loss": 0.0754995346069336, "step": 5205 }, { "epoch": 0.7254232564620636, "grad_norm": 0.7388483285903931, "learning_rate": 2.0088884582907574e-06, "loss": 0.07474613189697266, "step": 5206 }, { "epoch": 0.7255626001532781, "grad_norm": 1.4050755500793457, "learning_rate": 2.0070002287253554e-06, "loss": 0.09065628051757812, "step": 5207 }, { "epoch": 0.7257019438444925, "grad_norm": 0.6824972033500671, "learning_rate": 2.0051126641361697e-06, "loss": 0.0777730941772461, "step": 5208 }, { "epoch": 0.7258412875357069, "grad_norm": 0.45474159717559814, "learning_rate": 2.0032257649425753e-06, "loss": 0.0673208236694336, "step": 5209 }, { "epoch": 0.7259806312269212, "grad_norm": 0.8460320830345154, "learning_rate": 2.0013395315637997e-06, "loss": 0.06763553619384766, "step": 5210 }, { "epoch": 0.7261199749181356, "grad_norm": 0.5100473761558533, "learning_rate": 1.9994539644189183e-06, "loss": 0.06905269622802734, "step": 5211 }, { "epoch": 0.72625931860935, "grad_norm": 0.9940385818481445, "learning_rate": 1.9975690639268623e-06, "loss": 0.08768844604492188, "step": 5212 }, { "epoch": 0.7263986623005644, "grad_norm": 1.123044490814209, "learning_rate": 1.9956848305064156e-06, "loss": 0.07864522933959961, "step": 5213 }, { "epoch": 0.7265380059917788, "grad_norm": 0.6928408741950989, "learning_rate": 1.99380126457621e-06, "loss": 0.0717172622680664, "step": 5214 }, { "epoch": 0.7266773496829931, "grad_norm": 0.9795722365379333, "learning_rate": 1.9919183665547285e-06, "loss": 0.086761474609375, "step": 5215 }, { "epoch": 0.7268166933742075, "grad_norm": 0.7096619606018066, "learning_rate": 1.9900361368603104e-06, "loss": 0.07659244537353516, "step": 5216 }, { "epoch": 0.7269560370654219, "grad_norm": 0.6946134567260742, "learning_rate": 1.988154575911146e-06, "loss": 0.07440948486328125, "step": 5217 }, { "epoch": 0.7270953807566363, "grad_norm": 0.5131800770759583, "learning_rate": 1.9862736841252734e-06, "loss": 0.06337881088256836, "step": 5218 }, { "epoch": 0.7272347244478506, "grad_norm": 0.7807865738868713, "learning_rate": 1.984393461920581e-06, "loss": 0.07405519485473633, "step": 5219 }, { "epoch": 0.727374068139065, "grad_norm": 0.99346524477005, "learning_rate": 1.9825139097148166e-06, "loss": 0.0915517807006836, "step": 5220 }, { "epoch": 0.7275134118302794, "grad_norm": 1.0852832794189453, "learning_rate": 1.980635027925569e-06, "loss": 0.07904052734375, "step": 5221 }, { "epoch": 0.7276527555214938, "grad_norm": 0.8425616025924683, "learning_rate": 1.9787568169702848e-06, "loss": 0.09030342102050781, "step": 5222 }, { "epoch": 0.7277920992127082, "grad_norm": 0.7062049508094788, "learning_rate": 1.9768792772662616e-06, "loss": 0.07764625549316406, "step": 5223 }, { "epoch": 0.7279314429039225, "grad_norm": 0.46922266483306885, "learning_rate": 1.975002409230644e-06, "loss": 0.06424713134765625, "step": 5224 }, { "epoch": 0.7280707865951369, "grad_norm": 0.8529178500175476, "learning_rate": 1.9731262132804275e-06, "loss": 0.0899209976196289, "step": 5225 }, { "epoch": 0.7282101302863513, "grad_norm": 0.9627546072006226, "learning_rate": 1.9712506898324613e-06, "loss": 0.08994913101196289, "step": 5226 }, { "epoch": 0.7283494739775657, "grad_norm": 0.48653483390808105, "learning_rate": 1.969375839303447e-06, "loss": 0.06661510467529297, "step": 5227 }, { "epoch": 0.72848881766878, "grad_norm": 0.8298969864845276, "learning_rate": 1.967501662109928e-06, "loss": 0.09796142578125, "step": 5228 }, { "epoch": 0.7286281613599944, "grad_norm": 0.5067328214645386, "learning_rate": 1.965628158668309e-06, "loss": 0.0687875747680664, "step": 5229 }, { "epoch": 0.7287675050512088, "grad_norm": 0.5709315538406372, "learning_rate": 1.9637553293948353e-06, "loss": 0.06057262420654297, "step": 5230 }, { "epoch": 0.7289068487424232, "grad_norm": 0.8084141612052917, "learning_rate": 1.9618831747056106e-06, "loss": 0.07098722457885742, "step": 5231 }, { "epoch": 0.7290461924336376, "grad_norm": 0.6963660717010498, "learning_rate": 1.960011695016581e-06, "loss": 0.06359481811523438, "step": 5232 }, { "epoch": 0.7291855361248519, "grad_norm": 0.556850016117096, "learning_rate": 1.958140890743549e-06, "loss": 0.06591987609863281, "step": 5233 }, { "epoch": 0.7293248798160663, "grad_norm": 0.7611872553825378, "learning_rate": 1.956270762302166e-06, "loss": 0.07463645935058594, "step": 5234 }, { "epoch": 0.7294642235072807, "grad_norm": 0.6645079851150513, "learning_rate": 1.9544013101079295e-06, "loss": 0.07600593566894531, "step": 5235 }, { "epoch": 0.7296035671984951, "grad_norm": 0.6080265045166016, "learning_rate": 1.9525325345761887e-06, "loss": 0.07919168472290039, "step": 5236 }, { "epoch": 0.7297429108897094, "grad_norm": 0.6299012899398804, "learning_rate": 1.950664436122144e-06, "loss": 0.06905555725097656, "step": 5237 }, { "epoch": 0.7298822545809238, "grad_norm": 1.2722679376602173, "learning_rate": 1.948797015160845e-06, "loss": 0.10290908813476562, "step": 5238 }, { "epoch": 0.7300215982721382, "grad_norm": 0.5834717750549316, "learning_rate": 1.94693027210719e-06, "loss": 0.0629873275756836, "step": 5239 }, { "epoch": 0.7301609419633526, "grad_norm": 0.6101067662239075, "learning_rate": 1.945064207375923e-06, "loss": 0.07509422302246094, "step": 5240 }, { "epoch": 0.730300285654567, "grad_norm": 0.7059484124183655, "learning_rate": 1.9431988213816444e-06, "loss": 0.07497119903564453, "step": 5241 }, { "epoch": 0.7304396293457813, "grad_norm": 0.8549653887748718, "learning_rate": 1.9413341145388013e-06, "loss": 0.08809661865234375, "step": 5242 }, { "epoch": 0.7305789730369957, "grad_norm": 0.6935258507728577, "learning_rate": 1.9394700872616856e-06, "loss": 0.07514476776123047, "step": 5243 }, { "epoch": 0.7307183167282101, "grad_norm": 0.8581128716468811, "learning_rate": 1.9376067399644456e-06, "loss": 0.08811569213867188, "step": 5244 }, { "epoch": 0.7308576604194245, "grad_norm": 0.5690997838973999, "learning_rate": 1.93574407306107e-06, "loss": 0.07335662841796875, "step": 5245 }, { "epoch": 0.7309970041106388, "grad_norm": 1.4525443315505981, "learning_rate": 1.9338820869654056e-06, "loss": 0.10049057006835938, "step": 5246 }, { "epoch": 0.7311363478018533, "grad_norm": 0.8847129344940186, "learning_rate": 1.9320207820911387e-06, "loss": 0.07506608963012695, "step": 5247 }, { "epoch": 0.7312756914930677, "grad_norm": 1.0900800228118896, "learning_rate": 1.930160158851811e-06, "loss": 0.09274101257324219, "step": 5248 }, { "epoch": 0.7314150351842821, "grad_norm": 0.6257441639900208, "learning_rate": 1.9283002176608116e-06, "loss": 0.07686901092529297, "step": 5249 }, { "epoch": 0.7315543788754965, "grad_norm": 1.0266711711883545, "learning_rate": 1.9264409589313767e-06, "loss": 0.09475421905517578, "step": 5250 }, { "epoch": 0.7316937225667108, "grad_norm": 0.6887300610542297, "learning_rate": 1.9245823830765874e-06, "loss": 0.08835601806640625, "step": 5251 }, { "epoch": 0.7318330662579252, "grad_norm": 1.070387363433838, "learning_rate": 1.92272449050938e-06, "loss": 0.0916748046875, "step": 5252 }, { "epoch": 0.7319724099491396, "grad_norm": 0.7181086540222168, "learning_rate": 1.920867281642538e-06, "loss": 0.08893394470214844, "step": 5253 }, { "epoch": 0.732111753640354, "grad_norm": 0.8883050084114075, "learning_rate": 1.919010756888685e-06, "loss": 0.08328723907470703, "step": 5254 }, { "epoch": 0.7322510973315683, "grad_norm": 1.4101618528366089, "learning_rate": 1.917154916660304e-06, "loss": 0.09936237335205078, "step": 5255 }, { "epoch": 0.7323904410227827, "grad_norm": 0.6505153179168701, "learning_rate": 1.9152997613697184e-06, "loss": 0.07171154022216797, "step": 5256 }, { "epoch": 0.7325297847139971, "grad_norm": 0.44508323073387146, "learning_rate": 1.913445291429099e-06, "loss": 0.05508708953857422, "step": 5257 }, { "epoch": 0.7326691284052115, "grad_norm": 0.7163666486740112, "learning_rate": 1.9115915072504683e-06, "loss": 0.07636451721191406, "step": 5258 }, { "epoch": 0.7328084720964259, "grad_norm": 0.9520854353904724, "learning_rate": 1.909738409245697e-06, "loss": 0.10010528564453125, "step": 5259 }, { "epoch": 0.7329478157876402, "grad_norm": 0.721971333026886, "learning_rate": 1.9078859978264995e-06, "loss": 0.07092428207397461, "step": 5260 }, { "epoch": 0.7330871594788546, "grad_norm": 0.7445341944694519, "learning_rate": 1.9060342734044374e-06, "loss": 0.06640625, "step": 5261 }, { "epoch": 0.733226503170069, "grad_norm": 0.6803990006446838, "learning_rate": 1.904183236390923e-06, "loss": 0.07613152265548706, "step": 5262 }, { "epoch": 0.7333658468612834, "grad_norm": 0.5634192228317261, "learning_rate": 1.9023328871972163e-06, "loss": 0.07564020156860352, "step": 5263 }, { "epoch": 0.7335051905524977, "grad_norm": 1.0191148519515991, "learning_rate": 1.9004832262344197e-06, "loss": 0.08163070678710938, "step": 5264 }, { "epoch": 0.7336445342437121, "grad_norm": 0.7221540808677673, "learning_rate": 1.8986342539134873e-06, "loss": 0.08271980285644531, "step": 5265 }, { "epoch": 0.7337838779349265, "grad_norm": 0.6529603600502014, "learning_rate": 1.8967859706452196e-06, "loss": 0.0777583122253418, "step": 5266 }, { "epoch": 0.7339232216261409, "grad_norm": 0.7650427222251892, "learning_rate": 1.894938376840262e-06, "loss": 0.08367538452148438, "step": 5267 }, { "epoch": 0.7340625653173553, "grad_norm": 1.4720429182052612, "learning_rate": 1.8930914729091055e-06, "loss": 0.07941102981567383, "step": 5268 }, { "epoch": 0.7342019090085696, "grad_norm": 0.6071799397468567, "learning_rate": 1.8912452592620916e-06, "loss": 0.07720470428466797, "step": 5269 }, { "epoch": 0.734341252699784, "grad_norm": 0.8457346558570862, "learning_rate": 1.8893997363094086e-06, "loss": 0.08475303649902344, "step": 5270 }, { "epoch": 0.7344805963909984, "grad_norm": 0.8348662853240967, "learning_rate": 1.8875549044610886e-06, "loss": 0.07118797302246094, "step": 5271 }, { "epoch": 0.7346199400822128, "grad_norm": 1.6042633056640625, "learning_rate": 1.8857107641270084e-06, "loss": 0.11431312561035156, "step": 5272 }, { "epoch": 0.7347592837734271, "grad_norm": 0.4381614327430725, "learning_rate": 1.8838673157168956e-06, "loss": 0.060436248779296875, "step": 5273 }, { "epoch": 0.7348986274646415, "grad_norm": 0.8302417397499084, "learning_rate": 1.8820245596403253e-06, "loss": 0.0889129638671875, "step": 5274 }, { "epoch": 0.7350379711558559, "grad_norm": 0.48153364658355713, "learning_rate": 1.8801824963067105e-06, "loss": 0.070098876953125, "step": 5275 }, { "epoch": 0.7351773148470703, "grad_norm": 0.7849619388580322, "learning_rate": 1.8783411261253208e-06, "loss": 0.09626150131225586, "step": 5276 }, { "epoch": 0.7353166585382847, "grad_norm": 0.7199448347091675, "learning_rate": 1.8765004495052623e-06, "loss": 0.07741165161132812, "step": 5277 }, { "epoch": 0.735456002229499, "grad_norm": 0.7581368684768677, "learning_rate": 1.8746604668554952e-06, "loss": 0.06898117065429688, "step": 5278 }, { "epoch": 0.7355953459207134, "grad_norm": 0.5271490216255188, "learning_rate": 1.8728211785848176e-06, "loss": 0.07287120819091797, "step": 5279 }, { "epoch": 0.7357346896119278, "grad_norm": 1.0914976596832275, "learning_rate": 1.8709825851018798e-06, "loss": 0.08255195617675781, "step": 5280 }, { "epoch": 0.7358740333031422, "grad_norm": 0.6737596392631531, "learning_rate": 1.869144686815178e-06, "loss": 0.08785724639892578, "step": 5281 }, { "epoch": 0.7360133769943565, "grad_norm": 0.7577081918716431, "learning_rate": 1.8673074841330447e-06, "loss": 0.08176136016845703, "step": 5282 }, { "epoch": 0.7361527206855709, "grad_norm": 0.8266630172729492, "learning_rate": 1.8654709774636676e-06, "loss": 0.09761714935302734, "step": 5283 }, { "epoch": 0.7362920643767853, "grad_norm": 0.6013491749763489, "learning_rate": 1.8636351672150771e-06, "loss": 0.06823158264160156, "step": 5284 }, { "epoch": 0.7364314080679997, "grad_norm": 0.47974294424057007, "learning_rate": 1.8618000537951496e-06, "loss": 0.06605720520019531, "step": 5285 }, { "epoch": 0.736570751759214, "grad_norm": 0.6568519473075867, "learning_rate": 1.8599656376116026e-06, "loss": 0.0758066177368164, "step": 5286 }, { "epoch": 0.7367100954504285, "grad_norm": 0.8087177276611328, "learning_rate": 1.8581319190720038e-06, "loss": 0.10533618927001953, "step": 5287 }, { "epoch": 0.7368494391416429, "grad_norm": 1.1225932836532593, "learning_rate": 1.8562988985837632e-06, "loss": 0.08178138732910156, "step": 5288 }, { "epoch": 0.7369887828328573, "grad_norm": 0.5744412541389465, "learning_rate": 1.854466576554133e-06, "loss": 0.060712337493896484, "step": 5289 }, { "epoch": 0.7371281265240717, "grad_norm": 0.7374123930931091, "learning_rate": 1.8526349533902161e-06, "loss": 0.06438255310058594, "step": 5290 }, { "epoch": 0.737267470215286, "grad_norm": 0.9233078956604004, "learning_rate": 1.8508040294989588e-06, "loss": 0.06924247741699219, "step": 5291 }, { "epoch": 0.7374068139065004, "grad_norm": 0.7500972151756287, "learning_rate": 1.8489738052871486e-06, "loss": 0.08576583862304688, "step": 5292 }, { "epoch": 0.7375461575977148, "grad_norm": 0.8607221841812134, "learning_rate": 1.8471442811614177e-06, "loss": 0.10438251495361328, "step": 5293 }, { "epoch": 0.7376855012889292, "grad_norm": 0.5596999526023865, "learning_rate": 1.8453154575282472e-06, "loss": 0.06301212310791016, "step": 5294 }, { "epoch": 0.7378248449801436, "grad_norm": 0.7370082139968872, "learning_rate": 1.8434873347939608e-06, "loss": 0.08961963653564453, "step": 5295 }, { "epoch": 0.7379641886713579, "grad_norm": 0.7023324966430664, "learning_rate": 1.8416599133647223e-06, "loss": 0.08848285675048828, "step": 5296 }, { "epoch": 0.7381035323625723, "grad_norm": 0.9943371415138245, "learning_rate": 1.839833193646547e-06, "loss": 0.08826065063476562, "step": 5297 }, { "epoch": 0.7382428760537867, "grad_norm": 0.8632526397705078, "learning_rate": 1.8380071760452862e-06, "loss": 0.07086420059204102, "step": 5298 }, { "epoch": 0.7383822197450011, "grad_norm": 0.6404529809951782, "learning_rate": 1.8361818609666433e-06, "loss": 0.08499336242675781, "step": 5299 }, { "epoch": 0.7385215634362154, "grad_norm": 0.6983682513237, "learning_rate": 1.8343572488161576e-06, "loss": 0.07665443420410156, "step": 5300 }, { "epoch": 0.7386609071274298, "grad_norm": 1.2652875185012817, "learning_rate": 1.832533339999219e-06, "loss": 0.08543205261230469, "step": 5301 }, { "epoch": 0.7388002508186442, "grad_norm": 0.7680785655975342, "learning_rate": 1.8307101349210588e-06, "loss": 0.07882881164550781, "step": 5302 }, { "epoch": 0.7389395945098586, "grad_norm": 0.7569695115089417, "learning_rate": 1.8288876339867511e-06, "loss": 0.07976818084716797, "step": 5303 }, { "epoch": 0.739078938201073, "grad_norm": 0.6617798209190369, "learning_rate": 1.8270658376012112e-06, "loss": 0.08415794372558594, "step": 5304 }, { "epoch": 0.7392182818922873, "grad_norm": 0.9209913015365601, "learning_rate": 1.8252447461692029e-06, "loss": 0.09212493896484375, "step": 5305 }, { "epoch": 0.7393576255835017, "grad_norm": 0.9317858815193176, "learning_rate": 1.8234243600953334e-06, "loss": 0.08815145492553711, "step": 5306 }, { "epoch": 0.7394969692747161, "grad_norm": 0.6595840454101562, "learning_rate": 1.8216046797840465e-06, "loss": 0.06444740295410156, "step": 5307 }, { "epoch": 0.7396363129659305, "grad_norm": 0.6028188467025757, "learning_rate": 1.8197857056396372e-06, "loss": 0.07033014297485352, "step": 5308 }, { "epoch": 0.7397756566571448, "grad_norm": 0.686127245426178, "learning_rate": 1.8179674380662372e-06, "loss": 0.09537029266357422, "step": 5309 }, { "epoch": 0.7399150003483592, "grad_norm": 0.7929590344429016, "learning_rate": 1.8161498774678271e-06, "loss": 0.08162498474121094, "step": 5310 }, { "epoch": 0.7400543440395736, "grad_norm": 0.8970034718513489, "learning_rate": 1.8143330242482244e-06, "loss": 0.09120559692382812, "step": 5311 }, { "epoch": 0.740193687730788, "grad_norm": 0.8569045066833496, "learning_rate": 1.8125168788110932e-06, "loss": 0.07605934143066406, "step": 5312 }, { "epoch": 0.7403330314220024, "grad_norm": 0.4637259244918823, "learning_rate": 1.8107014415599416e-06, "loss": 0.06938552856445312, "step": 5313 }, { "epoch": 0.7404723751132167, "grad_norm": 0.5541813373565674, "learning_rate": 1.808886712898117e-06, "loss": 0.06484031677246094, "step": 5314 }, { "epoch": 0.7406117188044311, "grad_norm": 0.7357626557350159, "learning_rate": 1.8070726932288086e-06, "loss": 0.08716106414794922, "step": 5315 }, { "epoch": 0.7407510624956455, "grad_norm": 0.3723352551460266, "learning_rate": 1.8052593829550525e-06, "loss": 0.04551982879638672, "step": 5316 }, { "epoch": 0.7408904061868599, "grad_norm": 1.0658557415008545, "learning_rate": 1.8034467824797252e-06, "loss": 0.08445978164672852, "step": 5317 }, { "epoch": 0.7410297498780742, "grad_norm": 0.5294279456138611, "learning_rate": 1.8016348922055448e-06, "loss": 0.06351947784423828, "step": 5318 }, { "epoch": 0.7411690935692886, "grad_norm": 0.488825261592865, "learning_rate": 1.7998237125350698e-06, "loss": 0.0655355453491211, "step": 5319 }, { "epoch": 0.741308437260503, "grad_norm": 0.7869157791137695, "learning_rate": 1.7980132438707059e-06, "loss": 0.09374332427978516, "step": 5320 }, { "epoch": 0.7414477809517174, "grad_norm": 0.6874285936355591, "learning_rate": 1.7962034866146954e-06, "loss": 0.09006786346435547, "step": 5321 }, { "epoch": 0.7415871246429317, "grad_norm": 0.7263461351394653, "learning_rate": 1.794394441169126e-06, "loss": 0.07755851745605469, "step": 5322 }, { "epoch": 0.7417264683341461, "grad_norm": 0.6759600043296814, "learning_rate": 1.7925861079359268e-06, "loss": 0.08373737335205078, "step": 5323 }, { "epoch": 0.7418658120253605, "grad_norm": 1.036482334136963, "learning_rate": 1.790778487316871e-06, "loss": 0.08386707305908203, "step": 5324 }, { "epoch": 0.7420051557165749, "grad_norm": 1.181027889251709, "learning_rate": 1.7889715797135643e-06, "loss": 0.08929777145385742, "step": 5325 }, { "epoch": 0.7421444994077893, "grad_norm": 0.7630329132080078, "learning_rate": 1.7871653855274634e-06, "loss": 0.07691764831542969, "step": 5326 }, { "epoch": 0.7422838430990036, "grad_norm": 0.4799972176551819, "learning_rate": 1.7853599051598658e-06, "loss": 0.06904029846191406, "step": 5327 }, { "epoch": 0.7424231867902181, "grad_norm": 0.5792919993400574, "learning_rate": 1.7835551390119033e-06, "loss": 0.07098674774169922, "step": 5328 }, { "epoch": 0.7425625304814325, "grad_norm": 1.2042237520217896, "learning_rate": 1.7817510874845585e-06, "loss": 0.09065818786621094, "step": 5329 }, { "epoch": 0.7427018741726469, "grad_norm": 0.6323380470275879, "learning_rate": 1.779947750978646e-06, "loss": 0.07615280151367188, "step": 5330 }, { "epoch": 0.7428412178638613, "grad_norm": 0.7122809886932373, "learning_rate": 1.7781451298948305e-06, "loss": 0.07347846031188965, "step": 5331 }, { "epoch": 0.7429805615550756, "grad_norm": 1.2734991312026978, "learning_rate": 1.7763432246336087e-06, "loss": 0.07513618469238281, "step": 5332 }, { "epoch": 0.74311990524629, "grad_norm": 0.9046555757522583, "learning_rate": 1.7745420355953253e-06, "loss": 0.08147454261779785, "step": 5333 }, { "epoch": 0.7432592489375044, "grad_norm": 0.7214418053627014, "learning_rate": 1.7727415631801648e-06, "loss": 0.08396148681640625, "step": 5334 }, { "epoch": 0.7433985926287188, "grad_norm": 0.5695154070854187, "learning_rate": 1.7709418077881495e-06, "loss": 0.0866708755493164, "step": 5335 }, { "epoch": 0.7435379363199331, "grad_norm": 1.1027989387512207, "learning_rate": 1.7691427698191422e-06, "loss": 0.09825515747070312, "step": 5336 }, { "epoch": 0.7436772800111475, "grad_norm": 1.260471224784851, "learning_rate": 1.7673444496728493e-06, "loss": 0.09075546264648438, "step": 5337 }, { "epoch": 0.7438166237023619, "grad_norm": 0.6252058148384094, "learning_rate": 1.7655468477488191e-06, "loss": 0.09205245971679688, "step": 5338 }, { "epoch": 0.7439559673935763, "grad_norm": 1.0925887823104858, "learning_rate": 1.763749964446435e-06, "loss": 0.09119415283203125, "step": 5339 }, { "epoch": 0.7440953110847907, "grad_norm": 0.6535592675209045, "learning_rate": 1.7619538001649228e-06, "loss": 0.07076835632324219, "step": 5340 }, { "epoch": 0.744234654776005, "grad_norm": 0.7159900665283203, "learning_rate": 1.7601583553033502e-06, "loss": 0.08323287963867188, "step": 5341 }, { "epoch": 0.7443739984672194, "grad_norm": 0.6801682710647583, "learning_rate": 1.7583636302606254e-06, "loss": 0.08505439758300781, "step": 5342 }, { "epoch": 0.7445133421584338, "grad_norm": 0.7063788175582886, "learning_rate": 1.756569625435493e-06, "loss": 0.07925891876220703, "step": 5343 }, { "epoch": 0.7446526858496482, "grad_norm": 0.6885898113250732, "learning_rate": 1.7547763412265412e-06, "loss": 0.08620262145996094, "step": 5344 }, { "epoch": 0.7447920295408625, "grad_norm": 0.6675667762756348, "learning_rate": 1.7529837780321979e-06, "loss": 0.09669113159179688, "step": 5345 }, { "epoch": 0.7449313732320769, "grad_norm": 0.5147799253463745, "learning_rate": 1.751191936250729e-06, "loss": 0.06805133819580078, "step": 5346 }, { "epoch": 0.7450707169232913, "grad_norm": 0.6870030164718628, "learning_rate": 1.7494008162802378e-06, "loss": 0.08678245544433594, "step": 5347 }, { "epoch": 0.7452100606145057, "grad_norm": 0.7117569446563721, "learning_rate": 1.7476104185186737e-06, "loss": 0.0718531608581543, "step": 5348 }, { "epoch": 0.74534940430572, "grad_norm": 0.6348384618759155, "learning_rate": 1.7458207433638225e-06, "loss": 0.073486328125, "step": 5349 }, { "epoch": 0.7454887479969344, "grad_norm": 0.9481159448623657, "learning_rate": 1.7440317912133076e-06, "loss": 0.08497810363769531, "step": 5350 }, { "epoch": 0.7456280916881488, "grad_norm": 0.9762957692146301, "learning_rate": 1.7422435624645928e-06, "loss": 0.08754730224609375, "step": 5351 }, { "epoch": 0.7457674353793632, "grad_norm": 0.8646100759506226, "learning_rate": 1.7404560575149821e-06, "loss": 0.07516956329345703, "step": 5352 }, { "epoch": 0.7459067790705776, "grad_norm": 0.5640968084335327, "learning_rate": 1.7386692767616204e-06, "loss": 0.05797410011291504, "step": 5353 }, { "epoch": 0.7460461227617919, "grad_norm": 0.9755336046218872, "learning_rate": 1.7368832206014863e-06, "loss": 0.06988716125488281, "step": 5354 }, { "epoch": 0.7461854664530063, "grad_norm": 0.6217721104621887, "learning_rate": 1.735097889431404e-06, "loss": 0.0738973617553711, "step": 5355 }, { "epoch": 0.7463248101442207, "grad_norm": 0.8870625495910645, "learning_rate": 1.733313283648032e-06, "loss": 0.07809925079345703, "step": 5356 }, { "epoch": 0.7464641538354351, "grad_norm": 0.944270670413971, "learning_rate": 1.7315294036478664e-06, "loss": 0.07453727722167969, "step": 5357 }, { "epoch": 0.7466034975266495, "grad_norm": 0.8770756125450134, "learning_rate": 1.7297462498272476e-06, "loss": 0.07903575897216797, "step": 5358 }, { "epoch": 0.7467428412178638, "grad_norm": 0.8459649085998535, "learning_rate": 1.727963822582352e-06, "loss": 0.08582592010498047, "step": 5359 }, { "epoch": 0.7468821849090782, "grad_norm": 0.7135209441184998, "learning_rate": 1.7261821223091918e-06, "loss": 0.08327770233154297, "step": 5360 }, { "epoch": 0.7470215286002926, "grad_norm": 0.7562574744224548, "learning_rate": 1.7244011494036228e-06, "loss": 0.0888814926147461, "step": 5361 }, { "epoch": 0.747160872291507, "grad_norm": 0.9370782375335693, "learning_rate": 1.722620904261334e-06, "loss": 0.0897974967956543, "step": 5362 }, { "epoch": 0.7473002159827213, "grad_norm": 0.7400057911872864, "learning_rate": 1.720841387277858e-06, "loss": 0.0903172492980957, "step": 5363 }, { "epoch": 0.7474395596739357, "grad_norm": 0.6870237588882446, "learning_rate": 1.7190625988485593e-06, "loss": 0.07556629180908203, "step": 5364 }, { "epoch": 0.7475789033651501, "grad_norm": 0.8932193517684937, "learning_rate": 1.7172845393686465e-06, "loss": 0.08887290954589844, "step": 5365 }, { "epoch": 0.7477182470563645, "grad_norm": 0.5666433572769165, "learning_rate": 1.7155072092331648e-06, "loss": 0.0718069076538086, "step": 5366 }, { "epoch": 0.7478575907475788, "grad_norm": 0.9590744972229004, "learning_rate": 1.7137306088369948e-06, "loss": 0.09004926681518555, "step": 5367 }, { "epoch": 0.7479969344387933, "grad_norm": 1.5508246421813965, "learning_rate": 1.7119547385748552e-06, "loss": 0.1152353286743164, "step": 5368 }, { "epoch": 0.7481362781300077, "grad_norm": 0.8467909693717957, "learning_rate": 1.7101795988413056e-06, "loss": 0.09342193603515625, "step": 5369 }, { "epoch": 0.7482756218212221, "grad_norm": 1.1556273698806763, "learning_rate": 1.708405190030743e-06, "loss": 0.07467317581176758, "step": 5370 }, { "epoch": 0.7484149655124365, "grad_norm": 0.9892171621322632, "learning_rate": 1.7066315125373984e-06, "loss": 0.08554267883300781, "step": 5371 }, { "epoch": 0.7485543092036508, "grad_norm": 1.1914973258972168, "learning_rate": 1.7048585667553414e-06, "loss": 0.07409095764160156, "step": 5372 }, { "epoch": 0.7486936528948652, "grad_norm": 0.9252586960792542, "learning_rate": 1.7030863530784814e-06, "loss": 0.073638916015625, "step": 5373 }, { "epoch": 0.7488329965860796, "grad_norm": 0.6498211026191711, "learning_rate": 1.7013148719005652e-06, "loss": 0.07578849792480469, "step": 5374 }, { "epoch": 0.748972340277294, "grad_norm": 0.883478581905365, "learning_rate": 1.6995441236151732e-06, "loss": 0.07159852981567383, "step": 5375 }, { "epoch": 0.7491116839685084, "grad_norm": 1.1690351963043213, "learning_rate": 1.6977741086157273e-06, "loss": 0.0783548355102539, "step": 5376 }, { "epoch": 0.7492510276597227, "grad_norm": 0.4050062894821167, "learning_rate": 1.6960048272954821e-06, "loss": 0.060441017150878906, "step": 5377 }, { "epoch": 0.7493903713509371, "grad_norm": 0.800403356552124, "learning_rate": 1.6942362800475343e-06, "loss": 0.07361888885498047, "step": 5378 }, { "epoch": 0.7495297150421515, "grad_norm": 0.44339272379875183, "learning_rate": 1.6924684672648117e-06, "loss": 0.0599365234375, "step": 5379 }, { "epoch": 0.7496690587333659, "grad_norm": 1.4408897161483765, "learning_rate": 1.6907013893400838e-06, "loss": 0.10446453094482422, "step": 5380 }, { "epoch": 0.7498084024245802, "grad_norm": 0.5637768507003784, "learning_rate": 1.6889350466659554e-06, "loss": 0.05686616897583008, "step": 5381 }, { "epoch": 0.7499477461157946, "grad_norm": 0.6986386775970459, "learning_rate": 1.687169439634867e-06, "loss": 0.06074643135070801, "step": 5382 }, { "epoch": 0.750087089807009, "grad_norm": 1.3718018531799316, "learning_rate": 1.6854045686390947e-06, "loss": 0.10126781463623047, "step": 5383 }, { "epoch": 0.7502264334982234, "grad_norm": 0.5786252021789551, "learning_rate": 1.6836404340707535e-06, "loss": 0.06777572631835938, "step": 5384 }, { "epoch": 0.7503657771894378, "grad_norm": 0.8883484601974487, "learning_rate": 1.6818770363217957e-06, "loss": 0.06701946258544922, "step": 5385 }, { "epoch": 0.7505051208806521, "grad_norm": 0.764915406703949, "learning_rate": 1.6801143757840043e-06, "loss": 0.08094024658203125, "step": 5386 }, { "epoch": 0.7506444645718665, "grad_norm": 0.695231020450592, "learning_rate": 1.678352452849007e-06, "loss": 0.07287216186523438, "step": 5387 }, { "epoch": 0.7507838082630809, "grad_norm": 0.7798178195953369, "learning_rate": 1.6765912679082592e-06, "loss": 0.07577061653137207, "step": 5388 }, { "epoch": 0.7509231519542953, "grad_norm": 0.9045630097389221, "learning_rate": 1.6748308213530555e-06, "loss": 0.08624649047851562, "step": 5389 }, { "epoch": 0.7510624956455096, "grad_norm": 1.2539021968841553, "learning_rate": 1.6730711135745287e-06, "loss": 0.07962799072265625, "step": 5390 }, { "epoch": 0.751201839336724, "grad_norm": 0.7841467261314392, "learning_rate": 1.6713121449636471e-06, "loss": 0.0778360366821289, "step": 5391 }, { "epoch": 0.7513411830279384, "grad_norm": 1.5277153253555298, "learning_rate": 1.6695539159112112e-06, "loss": 0.11208248138427734, "step": 5392 }, { "epoch": 0.7514805267191528, "grad_norm": 0.9416011571884155, "learning_rate": 1.6677964268078584e-06, "loss": 0.07082176208496094, "step": 5393 }, { "epoch": 0.7516198704103672, "grad_norm": 0.7270908951759338, "learning_rate": 1.666039678044064e-06, "loss": 0.0639495849609375, "step": 5394 }, { "epoch": 0.7517592141015815, "grad_norm": 0.7024804949760437, "learning_rate": 1.6642836700101396e-06, "loss": 0.08158111572265625, "step": 5395 }, { "epoch": 0.7518985577927959, "grad_norm": 0.9121468663215637, "learning_rate": 1.6625284030962257e-06, "loss": 0.07968640327453613, "step": 5396 }, { "epoch": 0.7520379014840103, "grad_norm": 0.9779682159423828, "learning_rate": 1.6607738776923072e-06, "loss": 0.0857095718383789, "step": 5397 }, { "epoch": 0.7521772451752247, "grad_norm": 1.5255991220474243, "learning_rate": 1.659020094188195e-06, "loss": 0.09823751449584961, "step": 5398 }, { "epoch": 0.752316588866439, "grad_norm": 0.547919511795044, "learning_rate": 1.657267052973544e-06, "loss": 0.06450891494750977, "step": 5399 }, { "epoch": 0.7524559325576534, "grad_norm": 0.6086112856864929, "learning_rate": 1.6555147544378364e-06, "loss": 0.07391548156738281, "step": 5400 }, { "epoch": 0.7525952762488678, "grad_norm": 0.8720802664756775, "learning_rate": 1.653763198970394e-06, "loss": 0.0710296630859375, "step": 5401 }, { "epoch": 0.7527346199400822, "grad_norm": 1.2632702589035034, "learning_rate": 1.652012386960375e-06, "loss": 0.09189510345458984, "step": 5402 }, { "epoch": 0.7528739636312965, "grad_norm": 0.7921751141548157, "learning_rate": 1.6502623187967675e-06, "loss": 0.06004154682159424, "step": 5403 }, { "epoch": 0.7530133073225109, "grad_norm": 1.1965363025665283, "learning_rate": 1.6485129948683954e-06, "loss": 0.08304882049560547, "step": 5404 }, { "epoch": 0.7531526510137253, "grad_norm": 0.6786020994186401, "learning_rate": 1.64676441556392e-06, "loss": 0.074005126953125, "step": 5405 }, { "epoch": 0.7532919947049397, "grad_norm": 0.6650339365005493, "learning_rate": 1.6450165812718377e-06, "loss": 0.07676506042480469, "step": 5406 }, { "epoch": 0.7534313383961541, "grad_norm": 0.7911581993103027, "learning_rate": 1.643269492380473e-06, "loss": 0.07053279876708984, "step": 5407 }, { "epoch": 0.7535706820873685, "grad_norm": 0.4627954661846161, "learning_rate": 1.6415231492779942e-06, "loss": 0.0654287338256836, "step": 5408 }, { "epoch": 0.7537100257785829, "grad_norm": 0.85662442445755, "learning_rate": 1.6397775523523946e-06, "loss": 0.07838153839111328, "step": 5409 }, { "epoch": 0.7538493694697973, "grad_norm": 0.7961012721061707, "learning_rate": 1.6380327019915088e-06, "loss": 0.08984851837158203, "step": 5410 }, { "epoch": 0.7539887131610117, "grad_norm": 0.6504292488098145, "learning_rate": 1.6362885985830001e-06, "loss": 0.07286834716796875, "step": 5411 }, { "epoch": 0.7541280568522261, "grad_norm": 0.9400845766067505, "learning_rate": 1.6345452425143705e-06, "loss": 0.0670919418334961, "step": 5412 }, { "epoch": 0.7542674005434404, "grad_norm": 0.8960081934928894, "learning_rate": 1.6328026341729547e-06, "loss": 0.08479881286621094, "step": 5413 }, { "epoch": 0.7544067442346548, "grad_norm": 0.7338082790374756, "learning_rate": 1.6310607739459188e-06, "loss": 0.08023881912231445, "step": 5414 }, { "epoch": 0.7545460879258692, "grad_norm": 1.4327468872070312, "learning_rate": 1.6293196622202635e-06, "loss": 0.10980606079101562, "step": 5415 }, { "epoch": 0.7546854316170836, "grad_norm": 0.6983835101127625, "learning_rate": 1.6275792993828249e-06, "loss": 0.07468152046203613, "step": 5416 }, { "epoch": 0.754824775308298, "grad_norm": 0.7708532810211182, "learning_rate": 1.6258396858202746e-06, "loss": 0.08162307739257812, "step": 5417 }, { "epoch": 0.7549641189995123, "grad_norm": 0.6649733781814575, "learning_rate": 1.6241008219191107e-06, "loss": 0.08260917663574219, "step": 5418 }, { "epoch": 0.7551034626907267, "grad_norm": 0.6131110191345215, "learning_rate": 1.622362708065673e-06, "loss": 0.06805610656738281, "step": 5419 }, { "epoch": 0.7552428063819411, "grad_norm": 1.749404788017273, "learning_rate": 1.6206253446461278e-06, "loss": 0.08353233337402344, "step": 5420 }, { "epoch": 0.7553821500731555, "grad_norm": 1.5894368886947632, "learning_rate": 1.618888732046478e-06, "loss": 0.06904983520507812, "step": 5421 }, { "epoch": 0.7555214937643698, "grad_norm": 1.051837682723999, "learning_rate": 1.6171528706525596e-06, "loss": 0.082733154296875, "step": 5422 }, { "epoch": 0.7556608374555842, "grad_norm": 0.8079087138175964, "learning_rate": 1.6154177608500415e-06, "loss": 0.05904436111450195, "step": 5423 }, { "epoch": 0.7558001811467986, "grad_norm": 1.325400471687317, "learning_rate": 1.6136834030244292e-06, "loss": 0.11074447631835938, "step": 5424 }, { "epoch": 0.755939524838013, "grad_norm": 0.7657250761985779, "learning_rate": 1.61194979756105e-06, "loss": 0.08086252212524414, "step": 5425 }, { "epoch": 0.7560788685292273, "grad_norm": 0.8742676377296448, "learning_rate": 1.6102169448450756e-06, "loss": 0.09025859832763672, "step": 5426 }, { "epoch": 0.7562182122204417, "grad_norm": 0.4493084251880646, "learning_rate": 1.6084848452615076e-06, "loss": 0.06139373779296875, "step": 5427 }, { "epoch": 0.7563575559116561, "grad_norm": 0.641385555267334, "learning_rate": 1.6067534991951754e-06, "loss": 0.07577991485595703, "step": 5428 }, { "epoch": 0.7564968996028705, "grad_norm": 1.5955272912979126, "learning_rate": 1.6050229070307488e-06, "loss": 0.09850502014160156, "step": 5429 }, { "epoch": 0.7566362432940849, "grad_norm": 1.063133955001831, "learning_rate": 1.6032930691527214e-06, "loss": 0.08810901641845703, "step": 5430 }, { "epoch": 0.7567755869852992, "grad_norm": 0.9708982706069946, "learning_rate": 1.6015639859454278e-06, "loss": 0.09096813201904297, "step": 5431 }, { "epoch": 0.7569149306765136, "grad_norm": 0.8698615431785583, "learning_rate": 1.5998356577930274e-06, "loss": 0.09027957916259766, "step": 5432 }, { "epoch": 0.757054274367728, "grad_norm": 1.243910551071167, "learning_rate": 1.5981080850795171e-06, "loss": 0.11212921142578125, "step": 5433 }, { "epoch": 0.7571936180589424, "grad_norm": 0.5480848550796509, "learning_rate": 1.5963812681887248e-06, "loss": 0.0676727294921875, "step": 5434 }, { "epoch": 0.7573329617501567, "grad_norm": 0.584712028503418, "learning_rate": 1.5946552075043092e-06, "loss": 0.06450319290161133, "step": 5435 }, { "epoch": 0.7574723054413711, "grad_norm": 0.606443464756012, "learning_rate": 1.592929903409759e-06, "loss": 0.0848989486694336, "step": 5436 }, { "epoch": 0.7576116491325855, "grad_norm": 1.0780861377716064, "learning_rate": 1.5912053562884e-06, "loss": 0.0938577651977539, "step": 5437 }, { "epoch": 0.7577509928237999, "grad_norm": 0.6889510154724121, "learning_rate": 1.589481566523388e-06, "loss": 0.07850503921508789, "step": 5438 }, { "epoch": 0.7578903365150143, "grad_norm": 0.6282086372375488, "learning_rate": 1.587758534497707e-06, "loss": 0.07030630111694336, "step": 5439 }, { "epoch": 0.7580296802062286, "grad_norm": 0.4450542628765106, "learning_rate": 1.5860362605941788e-06, "loss": 0.05533170700073242, "step": 5440 }, { "epoch": 0.758169023897443, "grad_norm": 1.2603176832199097, "learning_rate": 1.5843147451954493e-06, "loss": 0.09086894989013672, "step": 5441 }, { "epoch": 0.7583083675886574, "grad_norm": 1.4828388690948486, "learning_rate": 1.5825939886840036e-06, "loss": 0.11525535583496094, "step": 5442 }, { "epoch": 0.7584477112798718, "grad_norm": 0.9874989986419678, "learning_rate": 1.5808739914421512e-06, "loss": 0.09733390808105469, "step": 5443 }, { "epoch": 0.7585870549710861, "grad_norm": 0.7575240731239319, "learning_rate": 1.5791547538520386e-06, "loss": 0.0625619888305664, "step": 5444 }, { "epoch": 0.7587263986623005, "grad_norm": 0.6570984721183777, "learning_rate": 1.5774362762956414e-06, "loss": 0.07207584381103516, "step": 5445 }, { "epoch": 0.7588657423535149, "grad_norm": 0.704861044883728, "learning_rate": 1.5757185591547653e-06, "loss": 0.08788347244262695, "step": 5446 }, { "epoch": 0.7590050860447293, "grad_norm": 0.9386518597602844, "learning_rate": 1.574001602811046e-06, "loss": 0.08117914199829102, "step": 5447 }, { "epoch": 0.7591444297359438, "grad_norm": 0.6034562587738037, "learning_rate": 1.5722854076459538e-06, "loss": 0.054886817932128906, "step": 5448 }, { "epoch": 0.7592837734271581, "grad_norm": 1.1677322387695312, "learning_rate": 1.57056997404079e-06, "loss": 0.10263919830322266, "step": 5449 }, { "epoch": 0.7594231171183725, "grad_norm": 0.7753821015357971, "learning_rate": 1.5688553023766823e-06, "loss": 0.10129261016845703, "step": 5450 }, { "epoch": 0.7595624608095869, "grad_norm": 1.1200876235961914, "learning_rate": 1.5671413930345902e-06, "loss": 0.08739566802978516, "step": 5451 }, { "epoch": 0.7597018045008013, "grad_norm": 0.5246706008911133, "learning_rate": 1.5654282463953074e-06, "loss": 0.07287108898162842, "step": 5452 }, { "epoch": 0.7598411481920156, "grad_norm": 0.7671683430671692, "learning_rate": 1.5637158628394572e-06, "loss": 0.0747365951538086, "step": 5453 }, { "epoch": 0.75998049188323, "grad_norm": 0.878143310546875, "learning_rate": 1.5620042427474892e-06, "loss": 0.09493207931518555, "step": 5454 }, { "epoch": 0.7601198355744444, "grad_norm": 0.8434204459190369, "learning_rate": 1.5602933864996872e-06, "loss": 0.077667236328125, "step": 5455 }, { "epoch": 0.7602591792656588, "grad_norm": 0.64545077085495, "learning_rate": 1.5585832944761686e-06, "loss": 0.07540225982666016, "step": 5456 }, { "epoch": 0.7603985229568732, "grad_norm": 0.9615194201469421, "learning_rate": 1.5568739670568693e-06, "loss": 0.07315444946289062, "step": 5457 }, { "epoch": 0.7605378666480875, "grad_norm": 0.48759496212005615, "learning_rate": 1.555165404621567e-06, "loss": 0.06615400314331055, "step": 5458 }, { "epoch": 0.7606772103393019, "grad_norm": 0.6043574810028076, "learning_rate": 1.5534576075498664e-06, "loss": 0.06662178039550781, "step": 5459 }, { "epoch": 0.7608165540305163, "grad_norm": 0.45682621002197266, "learning_rate": 1.5517505762211982e-06, "loss": 0.0659942626953125, "step": 5460 }, { "epoch": 0.7609558977217307, "grad_norm": 0.6305111050605774, "learning_rate": 1.5500443110148283e-06, "loss": 0.07354736328125, "step": 5461 }, { "epoch": 0.761095241412945, "grad_norm": 0.9498682618141174, "learning_rate": 1.5483388123098474e-06, "loss": 0.09638094902038574, "step": 5462 }, { "epoch": 0.7612345851041594, "grad_norm": 0.6697848439216614, "learning_rate": 1.546634080485181e-06, "loss": 0.07432794570922852, "step": 5463 }, { "epoch": 0.7613739287953738, "grad_norm": 0.69011390209198, "learning_rate": 1.5449301159195785e-06, "loss": 0.08796882629394531, "step": 5464 }, { "epoch": 0.7615132724865882, "grad_norm": 1.0576244592666626, "learning_rate": 1.5432269189916237e-06, "loss": 0.06450843811035156, "step": 5465 }, { "epoch": 0.7616526161778026, "grad_norm": 0.878635823726654, "learning_rate": 1.54152449007973e-06, "loss": 0.057404518127441406, "step": 5466 }, { "epoch": 0.7617919598690169, "grad_norm": 0.5158424377441406, "learning_rate": 1.539822829562136e-06, "loss": 0.06584739685058594, "step": 5467 }, { "epoch": 0.7619313035602313, "grad_norm": 0.4972582161426544, "learning_rate": 1.5381219378169103e-06, "loss": 0.06873035430908203, "step": 5468 }, { "epoch": 0.7620706472514457, "grad_norm": 1.1650470495224, "learning_rate": 1.5364218152219545e-06, "loss": 0.10799407958984375, "step": 5469 }, { "epoch": 0.7622099909426601, "grad_norm": 0.74337238073349, "learning_rate": 1.5347224621549978e-06, "loss": 0.0691986083984375, "step": 5470 }, { "epoch": 0.7623493346338744, "grad_norm": 0.5622007250785828, "learning_rate": 1.5330238789935963e-06, "loss": 0.0741434097290039, "step": 5471 }, { "epoch": 0.7624886783250888, "grad_norm": 0.6179566979408264, "learning_rate": 1.5313260661151352e-06, "loss": 0.07543563842773438, "step": 5472 }, { "epoch": 0.7626280220163032, "grad_norm": 0.5428455471992493, "learning_rate": 1.5296290238968303e-06, "loss": 0.06842708587646484, "step": 5473 }, { "epoch": 0.7627673657075176, "grad_norm": 0.649770200252533, "learning_rate": 1.5279327527157289e-06, "loss": 0.07130241394042969, "step": 5474 }, { "epoch": 0.762906709398732, "grad_norm": 0.8420248627662659, "learning_rate": 1.526237252948699e-06, "loss": 0.07235336303710938, "step": 5475 }, { "epoch": 0.7630460530899463, "grad_norm": 0.7437911629676819, "learning_rate": 1.5245425249724443e-06, "loss": 0.07953262329101562, "step": 5476 }, { "epoch": 0.7631853967811607, "grad_norm": 0.6992809176445007, "learning_rate": 1.5228485691634964e-06, "loss": 0.07670354843139648, "step": 5477 }, { "epoch": 0.7633247404723751, "grad_norm": 0.9761107563972473, "learning_rate": 1.5211553858982115e-06, "loss": 0.08527374267578125, "step": 5478 }, { "epoch": 0.7634640841635895, "grad_norm": 0.6819570660591125, "learning_rate": 1.5194629755527746e-06, "loss": 0.077392578125, "step": 5479 }, { "epoch": 0.7636034278548038, "grad_norm": 0.5612483620643616, "learning_rate": 1.517771338503203e-06, "loss": 0.061896324157714844, "step": 5480 }, { "epoch": 0.7637427715460182, "grad_norm": 0.6560696959495544, "learning_rate": 1.5160804751253405e-06, "loss": 0.06989765167236328, "step": 5481 }, { "epoch": 0.7638821152372326, "grad_norm": 0.9008868932723999, "learning_rate": 1.5143903857948572e-06, "loss": 0.07537078857421875, "step": 5482 }, { "epoch": 0.764021458928447, "grad_norm": 1.0392088890075684, "learning_rate": 1.5127010708872513e-06, "loss": 0.08380317687988281, "step": 5483 }, { "epoch": 0.7641608026196614, "grad_norm": 0.7253733277320862, "learning_rate": 1.5110125307778506e-06, "loss": 0.0658731460571289, "step": 5484 }, { "epoch": 0.7643001463108757, "grad_norm": 0.8842921853065491, "learning_rate": 1.5093247658418125e-06, "loss": 0.08081722259521484, "step": 5485 }, { "epoch": 0.7644394900020901, "grad_norm": 0.6207011342048645, "learning_rate": 1.5076377764541162e-06, "loss": 0.08814048767089844, "step": 5486 }, { "epoch": 0.7645788336933045, "grad_norm": 0.9361530542373657, "learning_rate": 1.5059515629895754e-06, "loss": 0.08267498016357422, "step": 5487 }, { "epoch": 0.764718177384519, "grad_norm": 0.5880579948425293, "learning_rate": 1.5042661258228268e-06, "loss": 0.07523918151855469, "step": 5488 }, { "epoch": 0.7648575210757333, "grad_norm": 0.8625887632369995, "learning_rate": 1.502581465328335e-06, "loss": 0.08504962921142578, "step": 5489 }, { "epoch": 0.7649968647669477, "grad_norm": 0.8528299331665039, "learning_rate": 1.5008975818803939e-06, "loss": 0.08020305633544922, "step": 5490 }, { "epoch": 0.7651362084581621, "grad_norm": 0.5879879593849182, "learning_rate": 1.4992144758531257e-06, "loss": 0.06398963928222656, "step": 5491 }, { "epoch": 0.7652755521493765, "grad_norm": 0.6148210167884827, "learning_rate": 1.4975321476204767e-06, "loss": 0.0827484130859375, "step": 5492 }, { "epoch": 0.7654148958405909, "grad_norm": 0.6536329388618469, "learning_rate": 1.4958505975562205e-06, "loss": 0.0747976303100586, "step": 5493 }, { "epoch": 0.7655542395318052, "grad_norm": 0.5910786986351013, "learning_rate": 1.49416982603396e-06, "loss": 0.06325101852416992, "step": 5494 }, { "epoch": 0.7656935832230196, "grad_norm": 0.6908110976219177, "learning_rate": 1.4924898334271265e-06, "loss": 0.06612777709960938, "step": 5495 }, { "epoch": 0.765832926914234, "grad_norm": 0.6873926520347595, "learning_rate": 1.4908106201089722e-06, "loss": 0.07343697547912598, "step": 5496 }, { "epoch": 0.7659722706054484, "grad_norm": 0.8231342434883118, "learning_rate": 1.4891321864525826e-06, "loss": 0.07654190063476562, "step": 5497 }, { "epoch": 0.7661116142966627, "grad_norm": 0.6304823756217957, "learning_rate": 1.4874545328308681e-06, "loss": 0.06420707702636719, "step": 5498 }, { "epoch": 0.7662509579878771, "grad_norm": 0.638878345489502, "learning_rate": 1.4857776596165635e-06, "loss": 0.07215023040771484, "step": 5499 }, { "epoch": 0.7663903016790915, "grad_norm": 0.46722427010536194, "learning_rate": 1.4841015671822306e-06, "loss": 0.05544424057006836, "step": 5500 }, { "epoch": 0.7665296453703059, "grad_norm": 0.6888946890830994, "learning_rate": 1.4824262559002595e-06, "loss": 0.07258224487304688, "step": 5501 }, { "epoch": 0.7666689890615203, "grad_norm": 1.5694738626480103, "learning_rate": 1.480751726142869e-06, "loss": 0.09118938446044922, "step": 5502 }, { "epoch": 0.7668083327527346, "grad_norm": 0.4504850506782532, "learning_rate": 1.4790779782820991e-06, "loss": 0.06135368347167969, "step": 5503 }, { "epoch": 0.766947676443949, "grad_norm": 1.0905364751815796, "learning_rate": 1.4774050126898164e-06, "loss": 0.08256244659423828, "step": 5504 }, { "epoch": 0.7670870201351634, "grad_norm": 0.613170862197876, "learning_rate": 1.4757328297377177e-06, "loss": 0.06838703155517578, "step": 5505 }, { "epoch": 0.7672263638263778, "grad_norm": 0.8209344744682312, "learning_rate": 1.474061429797326e-06, "loss": 0.07941532135009766, "step": 5506 }, { "epoch": 0.7673657075175921, "grad_norm": 0.5806111693382263, "learning_rate": 1.4723908132399838e-06, "loss": 0.06545066833496094, "step": 5507 }, { "epoch": 0.7675050512088065, "grad_norm": 0.8085459470748901, "learning_rate": 1.4707209804368683e-06, "loss": 0.06988143920898438, "step": 5508 }, { "epoch": 0.7676443949000209, "grad_norm": 0.5705884099006653, "learning_rate": 1.4690519317589742e-06, "loss": 0.06772232055664062, "step": 5509 }, { "epoch": 0.7677837385912353, "grad_norm": 0.8117413520812988, "learning_rate": 1.4673836675771298e-06, "loss": 0.07218360900878906, "step": 5510 }, { "epoch": 0.7679230822824497, "grad_norm": 0.7104166150093079, "learning_rate": 1.4657161882619814e-06, "loss": 0.06166362762451172, "step": 5511 }, { "epoch": 0.768062425973664, "grad_norm": 0.9967710971832275, "learning_rate": 1.4640494941840072e-06, "loss": 0.08384490013122559, "step": 5512 }, { "epoch": 0.7682017696648784, "grad_norm": 1.249976396560669, "learning_rate": 1.4623835857135099e-06, "loss": 0.09251594543457031, "step": 5513 }, { "epoch": 0.7683411133560928, "grad_norm": 1.2481447458267212, "learning_rate": 1.460718463220615e-06, "loss": 0.07989501953125, "step": 5514 }, { "epoch": 0.7684804570473072, "grad_norm": 1.2475547790527344, "learning_rate": 1.4590541270752723e-06, "loss": 0.09002876281738281, "step": 5515 }, { "epoch": 0.7686198007385215, "grad_norm": 0.8538187146186829, "learning_rate": 1.457390577647262e-06, "loss": 0.075592041015625, "step": 5516 }, { "epoch": 0.7687591444297359, "grad_norm": 0.828806459903717, "learning_rate": 1.455727815306187e-06, "loss": 0.07321548461914062, "step": 5517 }, { "epoch": 0.7688984881209503, "grad_norm": 0.9813975691795349, "learning_rate": 1.454065840421473e-06, "loss": 0.08318328857421875, "step": 5518 }, { "epoch": 0.7690378318121647, "grad_norm": 0.7683306932449341, "learning_rate": 1.4524046533623758e-06, "loss": 0.07528209686279297, "step": 5519 }, { "epoch": 0.769177175503379, "grad_norm": 1.3298691511154175, "learning_rate": 1.450744254497972e-06, "loss": 0.08551406860351562, "step": 5520 }, { "epoch": 0.7693165191945934, "grad_norm": 0.6465832591056824, "learning_rate": 1.4490846441971624e-06, "loss": 0.07350397109985352, "step": 5521 }, { "epoch": 0.7694558628858078, "grad_norm": 1.0112065076828003, "learning_rate": 1.4474258228286758e-06, "loss": 0.09887123107910156, "step": 5522 }, { "epoch": 0.7695952065770222, "grad_norm": 0.8770616054534912, "learning_rate": 1.4457677907610646e-06, "loss": 0.06743049621582031, "step": 5523 }, { "epoch": 0.7697345502682366, "grad_norm": 0.5549059510231018, "learning_rate": 1.4441105483627088e-06, "loss": 0.07323729991912842, "step": 5524 }, { "epoch": 0.7698738939594509, "grad_norm": 0.44200795888900757, "learning_rate": 1.442454096001804e-06, "loss": 0.05499744415283203, "step": 5525 }, { "epoch": 0.7700132376506653, "grad_norm": 1.3215759992599487, "learning_rate": 1.4407984340463794e-06, "loss": 0.11599063873291016, "step": 5526 }, { "epoch": 0.7701525813418797, "grad_norm": 0.8113332986831665, "learning_rate": 1.4391435628642853e-06, "loss": 0.07814979553222656, "step": 5527 }, { "epoch": 0.7702919250330941, "grad_norm": 0.7055542469024658, "learning_rate": 1.437489482823195e-06, "loss": 0.08121871948242188, "step": 5528 }, { "epoch": 0.7704312687243086, "grad_norm": 0.7424703240394592, "learning_rate": 1.4358361942906097e-06, "loss": 0.08468961715698242, "step": 5529 }, { "epoch": 0.7705706124155229, "grad_norm": 1.0887314081192017, "learning_rate": 1.4341836976338485e-06, "loss": 0.08884429931640625, "step": 5530 }, { "epoch": 0.7707099561067373, "grad_norm": 1.1112290620803833, "learning_rate": 1.4325319932200631e-06, "loss": 0.07688426971435547, "step": 5531 }, { "epoch": 0.7708492997979517, "grad_norm": 0.7900729179382324, "learning_rate": 1.43088108141622e-06, "loss": 0.08228540420532227, "step": 5532 }, { "epoch": 0.7709886434891661, "grad_norm": 0.8013310432434082, "learning_rate": 1.4292309625891166e-06, "loss": 0.06756591796875, "step": 5533 }, { "epoch": 0.7711279871803804, "grad_norm": 0.5787621736526489, "learning_rate": 1.4275816371053725e-06, "loss": 0.0775146484375, "step": 5534 }, { "epoch": 0.7712673308715948, "grad_norm": 0.737527072429657, "learning_rate": 1.425933105331429e-06, "loss": 0.07218503952026367, "step": 5535 }, { "epoch": 0.7714066745628092, "grad_norm": 0.8020791411399841, "learning_rate": 1.424285367633551e-06, "loss": 0.08576583862304688, "step": 5536 }, { "epoch": 0.7715460182540236, "grad_norm": 0.8344311118125916, "learning_rate": 1.422638424377829e-06, "loss": 0.08893966674804688, "step": 5537 }, { "epoch": 0.771685361945238, "grad_norm": 0.719127893447876, "learning_rate": 1.420992275930178e-06, "loss": 0.08255767822265625, "step": 5538 }, { "epoch": 0.7718247056364523, "grad_norm": 1.2515283823013306, "learning_rate": 1.4193469226563322e-06, "loss": 0.09572315216064453, "step": 5539 }, { "epoch": 0.7719640493276667, "grad_norm": 0.9607320427894592, "learning_rate": 1.4177023649218536e-06, "loss": 0.07938289642333984, "step": 5540 }, { "epoch": 0.7721033930188811, "grad_norm": 0.4378115236759186, "learning_rate": 1.4160586030921224e-06, "loss": 0.05416393280029297, "step": 5541 }, { "epoch": 0.7722427367100955, "grad_norm": 0.6225910782814026, "learning_rate": 1.4144156375323486e-06, "loss": 0.07265186309814453, "step": 5542 }, { "epoch": 0.7723820804013098, "grad_norm": 0.6991958022117615, "learning_rate": 1.4127734686075589e-06, "loss": 0.08770132064819336, "step": 5543 }, { "epoch": 0.7725214240925242, "grad_norm": 0.7123138308525085, "learning_rate": 1.411132096682606e-06, "loss": 0.07292747497558594, "step": 5544 }, { "epoch": 0.7726607677837386, "grad_norm": 0.872014045715332, "learning_rate": 1.4094915221221677e-06, "loss": 0.07774925231933594, "step": 5545 }, { "epoch": 0.772800111474953, "grad_norm": 1.1010324954986572, "learning_rate": 1.4078517452907403e-06, "loss": 0.09727954864501953, "step": 5546 }, { "epoch": 0.7729394551661674, "grad_norm": 0.8175250887870789, "learning_rate": 1.4062127665526438e-06, "loss": 0.07100296020507812, "step": 5547 }, { "epoch": 0.7730787988573817, "grad_norm": 1.1670212745666504, "learning_rate": 1.4045745862720227e-06, "loss": 0.0781850814819336, "step": 5548 }, { "epoch": 0.7732181425485961, "grad_norm": 0.6816756725311279, "learning_rate": 1.4029372048128454e-06, "loss": 0.09077930450439453, "step": 5549 }, { "epoch": 0.7733574862398105, "grad_norm": 0.45673567056655884, "learning_rate": 1.401300622538897e-06, "loss": 0.0621185302734375, "step": 5550 }, { "epoch": 0.7734968299310249, "grad_norm": 0.6897209286689758, "learning_rate": 1.3996648398137924e-06, "loss": 0.07553958892822266, "step": 5551 }, { "epoch": 0.7736361736222392, "grad_norm": 0.6800487637519836, "learning_rate": 1.398029857000962e-06, "loss": 0.07120895385742188, "step": 5552 }, { "epoch": 0.7737755173134536, "grad_norm": 0.7604575753211975, "learning_rate": 1.3963956744636642e-06, "loss": 0.07946968078613281, "step": 5553 }, { "epoch": 0.773914861004668, "grad_norm": 0.5184527039527893, "learning_rate": 1.394762292564974e-06, "loss": 0.07084274291992188, "step": 5554 }, { "epoch": 0.7740542046958824, "grad_norm": 0.5746959447860718, "learning_rate": 1.393129711667794e-06, "loss": 0.06700658798217773, "step": 5555 }, { "epoch": 0.7741935483870968, "grad_norm": 0.7975823283195496, "learning_rate": 1.3914979321348488e-06, "loss": 0.07915210723876953, "step": 5556 }, { "epoch": 0.7743328920783111, "grad_norm": 0.5239653587341309, "learning_rate": 1.3898669543286763e-06, "loss": 0.07010364532470703, "step": 5557 }, { "epoch": 0.7744722357695255, "grad_norm": 0.6469536423683167, "learning_rate": 1.3882367786116458e-06, "loss": 0.07297706604003906, "step": 5558 }, { "epoch": 0.7746115794607399, "grad_norm": 0.6632233262062073, "learning_rate": 1.3866074053459465e-06, "loss": 0.06782817840576172, "step": 5559 }, { "epoch": 0.7747509231519543, "grad_norm": 1.205389142036438, "learning_rate": 1.3849788348935856e-06, "loss": 0.0793447494506836, "step": 5560 }, { "epoch": 0.7748902668431686, "grad_norm": 0.5508533716201782, "learning_rate": 1.3833510676163963e-06, "loss": 0.06612372398376465, "step": 5561 }, { "epoch": 0.775029610534383, "grad_norm": 0.6492975950241089, "learning_rate": 1.3817241038760287e-06, "loss": 0.06438827514648438, "step": 5562 }, { "epoch": 0.7751689542255974, "grad_norm": 0.859824001789093, "learning_rate": 1.3800979440339602e-06, "loss": 0.08188629150390625, "step": 5563 }, { "epoch": 0.7753082979168118, "grad_norm": 0.7169352173805237, "learning_rate": 1.3784725884514833e-06, "loss": 0.07165145874023438, "step": 5564 }, { "epoch": 0.7754476416080262, "grad_norm": 0.6385821104049683, "learning_rate": 1.3768480374897163e-06, "loss": 0.09271812438964844, "step": 5565 }, { "epoch": 0.7755869852992405, "grad_norm": 1.0528297424316406, "learning_rate": 1.3752242915095993e-06, "loss": 0.10087728500366211, "step": 5566 }, { "epoch": 0.7757263289904549, "grad_norm": 0.5877470970153809, "learning_rate": 1.3736013508718892e-06, "loss": 0.06907463073730469, "step": 5567 }, { "epoch": 0.7758656726816693, "grad_norm": 0.9251430630683899, "learning_rate": 1.371979215937166e-06, "loss": 0.08184981346130371, "step": 5568 }, { "epoch": 0.7760050163728838, "grad_norm": 0.7060887217521667, "learning_rate": 1.3703578870658312e-06, "loss": 0.08679866790771484, "step": 5569 }, { "epoch": 0.7761443600640981, "grad_norm": 0.6920716762542725, "learning_rate": 1.3687373646181095e-06, "loss": 0.0823984146118164, "step": 5570 }, { "epoch": 0.7762837037553125, "grad_norm": 0.6885605454444885, "learning_rate": 1.3671176489540406e-06, "loss": 0.07686614990234375, "step": 5571 }, { "epoch": 0.7764230474465269, "grad_norm": 0.8322058916091919, "learning_rate": 1.3654987404334917e-06, "loss": 0.08558940887451172, "step": 5572 }, { "epoch": 0.7765623911377413, "grad_norm": 0.6545951962471008, "learning_rate": 1.363880639416144e-06, "loss": 0.06895828247070312, "step": 5573 }, { "epoch": 0.7767017348289557, "grad_norm": 0.9004982113838196, "learning_rate": 1.3622633462615058e-06, "loss": 0.08008098602294922, "step": 5574 }, { "epoch": 0.77684107852017, "grad_norm": 0.7598037719726562, "learning_rate": 1.3606468613288997e-06, "loss": 0.07612323760986328, "step": 5575 }, { "epoch": 0.7769804222113844, "grad_norm": 0.6607161164283752, "learning_rate": 1.359031184977473e-06, "loss": 0.06717586517333984, "step": 5576 }, { "epoch": 0.7771197659025988, "grad_norm": 0.7567028403282166, "learning_rate": 1.3574163175661936e-06, "loss": 0.0849313735961914, "step": 5577 }, { "epoch": 0.7772591095938132, "grad_norm": 0.7602774500846863, "learning_rate": 1.3558022594538473e-06, "loss": 0.07953929901123047, "step": 5578 }, { "epoch": 0.7773984532850275, "grad_norm": 0.8597493767738342, "learning_rate": 1.3541890109990386e-06, "loss": 0.10345077514648438, "step": 5579 }, { "epoch": 0.7775377969762419, "grad_norm": 0.5725386142730713, "learning_rate": 1.3525765725601964e-06, "loss": 0.07424163818359375, "step": 5580 }, { "epoch": 0.7776771406674563, "grad_norm": 0.7563608884811401, "learning_rate": 1.3509649444955697e-06, "loss": 0.10214424133300781, "step": 5581 }, { "epoch": 0.7778164843586707, "grad_norm": 0.9374300241470337, "learning_rate": 1.3493541271632227e-06, "loss": 0.09690666198730469, "step": 5582 }, { "epoch": 0.7779558280498851, "grad_norm": 0.7445089817047119, "learning_rate": 1.3477441209210418e-06, "loss": 0.0735011100769043, "step": 5583 }, { "epoch": 0.7780951717410994, "grad_norm": 0.6243624091148376, "learning_rate": 1.3461349261267347e-06, "loss": 0.0768757238984108, "step": 5584 }, { "epoch": 0.7782345154323138, "grad_norm": 0.5213335156440735, "learning_rate": 1.3445265431378297e-06, "loss": 0.05959153175354004, "step": 5585 }, { "epoch": 0.7783738591235282, "grad_norm": 0.6324864029884338, "learning_rate": 1.3429189723116693e-06, "loss": 0.07226228713989258, "step": 5586 }, { "epoch": 0.7785132028147426, "grad_norm": 0.43839865922927856, "learning_rate": 1.3413122140054219e-06, "loss": 0.062835693359375, "step": 5587 }, { "epoch": 0.7786525465059569, "grad_norm": 0.6940987706184387, "learning_rate": 1.3397062685760715e-06, "loss": 0.06900787353515625, "step": 5588 }, { "epoch": 0.7787918901971713, "grad_norm": 0.6991786360740662, "learning_rate": 1.3381011363804208e-06, "loss": 0.08220863342285156, "step": 5589 }, { "epoch": 0.7789312338883857, "grad_norm": 0.7723941206932068, "learning_rate": 1.3364968177750953e-06, "loss": 0.09129977226257324, "step": 5590 }, { "epoch": 0.7790705775796001, "grad_norm": 0.604074239730835, "learning_rate": 1.3348933131165387e-06, "loss": 0.07057273387908936, "step": 5591 }, { "epoch": 0.7792099212708145, "grad_norm": 0.5963191390037537, "learning_rate": 1.333290622761011e-06, "loss": 0.0666341781616211, "step": 5592 }, { "epoch": 0.7793492649620288, "grad_norm": 0.8938719630241394, "learning_rate": 1.3316887470645956e-06, "loss": 0.06898736953735352, "step": 5593 }, { "epoch": 0.7794886086532432, "grad_norm": 0.5675240159034729, "learning_rate": 1.3300876863831903e-06, "loss": 0.05815410614013672, "step": 5594 }, { "epoch": 0.7796279523444576, "grad_norm": 0.7026058435440063, "learning_rate": 1.3284874410725174e-06, "loss": 0.08531785011291504, "step": 5595 }, { "epoch": 0.779767296035672, "grad_norm": 1.0714246034622192, "learning_rate": 1.3268880114881112e-06, "loss": 0.08025741577148438, "step": 5596 }, { "epoch": 0.7799066397268863, "grad_norm": 0.5254615545272827, "learning_rate": 1.3252893979853304e-06, "loss": 0.07370758056640625, "step": 5597 }, { "epoch": 0.7800459834181007, "grad_norm": 0.8921324014663696, "learning_rate": 1.3236916009193517e-06, "loss": 0.07470512390136719, "step": 5598 }, { "epoch": 0.7801853271093151, "grad_norm": 0.8000070452690125, "learning_rate": 1.3220946206451678e-06, "loss": 0.08353614807128906, "step": 5599 }, { "epoch": 0.7803246708005295, "grad_norm": 0.8252284526824951, "learning_rate": 1.3204984575175893e-06, "loss": 0.06933784484863281, "step": 5600 }, { "epoch": 0.7804640144917439, "grad_norm": 1.030918002128601, "learning_rate": 1.31890311189125e-06, "loss": 0.0827484130859375, "step": 5601 }, { "epoch": 0.7806033581829582, "grad_norm": 0.6888065338134766, "learning_rate": 1.317308584120599e-06, "loss": 0.07599830627441406, "step": 5602 }, { "epoch": 0.7807427018741726, "grad_norm": 0.8345720767974854, "learning_rate": 1.3157148745599035e-06, "loss": 0.07614898681640625, "step": 5603 }, { "epoch": 0.780882045565387, "grad_norm": 1.3790546655654907, "learning_rate": 1.314121983563248e-06, "loss": 0.07501068711280823, "step": 5604 }, { "epoch": 0.7810213892566014, "grad_norm": 1.0128380060195923, "learning_rate": 1.3125299114845375e-06, "loss": 0.09411239624023438, "step": 5605 }, { "epoch": 0.7811607329478157, "grad_norm": 1.048728108406067, "learning_rate": 1.3109386586774958e-06, "loss": 0.07778072357177734, "step": 5606 }, { "epoch": 0.7813000766390301, "grad_norm": 0.9785100221633911, "learning_rate": 1.3093482254956602e-06, "loss": 0.08457279205322266, "step": 5607 }, { "epoch": 0.7814394203302445, "grad_norm": 0.5021911263465881, "learning_rate": 1.3077586122923896e-06, "loss": 0.06883621215820312, "step": 5608 }, { "epoch": 0.781578764021459, "grad_norm": 1.2195401191711426, "learning_rate": 1.3061698194208616e-06, "loss": 0.08622169494628906, "step": 5609 }, { "epoch": 0.7817181077126734, "grad_norm": 0.7752107977867126, "learning_rate": 1.3045818472340683e-06, "loss": 0.06913614273071289, "step": 5610 }, { "epoch": 0.7818574514038877, "grad_norm": 0.6768199801445007, "learning_rate": 1.3029946960848188e-06, "loss": 0.059725284576416016, "step": 5611 }, { "epoch": 0.7819967950951021, "grad_norm": 0.9353742599487305, "learning_rate": 1.3014083663257443e-06, "loss": 0.09797096252441406, "step": 5612 }, { "epoch": 0.7821361387863165, "grad_norm": 1.1650303602218628, "learning_rate": 1.299822858309292e-06, "loss": 0.0800018310546875, "step": 5613 }, { "epoch": 0.7822754824775309, "grad_norm": 0.503937840461731, "learning_rate": 1.2982381723877235e-06, "loss": 0.06399345397949219, "step": 5614 }, { "epoch": 0.7824148261687452, "grad_norm": 0.5699520707130432, "learning_rate": 1.2966543089131196e-06, "loss": 0.060448646545410156, "step": 5615 }, { "epoch": 0.7825541698599596, "grad_norm": 1.1082525253295898, "learning_rate": 1.295071268237379e-06, "loss": 0.07193756103515625, "step": 5616 }, { "epoch": 0.782693513551174, "grad_norm": 0.5120639204978943, "learning_rate": 1.2934890507122195e-06, "loss": 0.05395317077636719, "step": 5617 }, { "epoch": 0.7828328572423884, "grad_norm": 0.7570551633834839, "learning_rate": 1.2919076566891703e-06, "loss": 0.08810615539550781, "step": 5618 }, { "epoch": 0.7829722009336028, "grad_norm": 0.9172307252883911, "learning_rate": 1.2903270865195837e-06, "loss": 0.09351921081542969, "step": 5619 }, { "epoch": 0.7831115446248171, "grad_norm": 0.6521072387695312, "learning_rate": 1.2887473405546254e-06, "loss": 0.07165718078613281, "step": 5620 }, { "epoch": 0.7832508883160315, "grad_norm": 0.9415944814682007, "learning_rate": 1.2871684191452772e-06, "loss": 0.08312797546386719, "step": 5621 }, { "epoch": 0.7833902320072459, "grad_norm": 0.8859607577323914, "learning_rate": 1.2855903226423412e-06, "loss": 0.07974910736083984, "step": 5622 }, { "epoch": 0.7835295756984603, "grad_norm": 0.7510119676589966, "learning_rate": 1.2840130513964338e-06, "loss": 0.07325172424316406, "step": 5623 }, { "epoch": 0.7836689193896746, "grad_norm": 1.104014277458191, "learning_rate": 1.2824366057579917e-06, "loss": 0.06602191925048828, "step": 5624 }, { "epoch": 0.783808263080889, "grad_norm": 0.8483291268348694, "learning_rate": 1.2808609860772598e-06, "loss": 0.0735921859741211, "step": 5625 }, { "epoch": 0.7839476067721034, "grad_norm": 1.1893967390060425, "learning_rate": 1.2792861927043071e-06, "loss": 0.10873031616210938, "step": 5626 }, { "epoch": 0.7840869504633178, "grad_norm": 0.6334059238433838, "learning_rate": 1.277712225989019e-06, "loss": 0.07048988342285156, "step": 5627 }, { "epoch": 0.7842262941545322, "grad_norm": 0.8972265720367432, "learning_rate": 1.2761390862810907e-06, "loss": 0.0866703987121582, "step": 5628 }, { "epoch": 0.7843656378457465, "grad_norm": 0.6725735664367676, "learning_rate": 1.274566773930041e-06, "loss": 0.0665278434753418, "step": 5629 }, { "epoch": 0.7845049815369609, "grad_norm": 0.7837823629379272, "learning_rate": 1.272995289285202e-06, "loss": 0.0869903564453125, "step": 5630 }, { "epoch": 0.7846443252281753, "grad_norm": 0.35130375623703003, "learning_rate": 1.2714246326957213e-06, "loss": 0.055919647216796875, "step": 5631 }, { "epoch": 0.7847836689193897, "grad_norm": 0.8214385509490967, "learning_rate": 1.2698548045105608e-06, "loss": 0.07953548431396484, "step": 5632 }, { "epoch": 0.784923012610604, "grad_norm": 0.9023416638374329, "learning_rate": 1.2682858050785018e-06, "loss": 0.08021926879882812, "step": 5633 }, { "epoch": 0.7850623563018184, "grad_norm": 0.5227757096290588, "learning_rate": 1.266717634748142e-06, "loss": 0.05702543258666992, "step": 5634 }, { "epoch": 0.7852016999930328, "grad_norm": 1.2199453115463257, "learning_rate": 1.2651502938678917e-06, "loss": 0.07266378402709961, "step": 5635 }, { "epoch": 0.7853410436842472, "grad_norm": 0.660539984703064, "learning_rate": 1.2635837827859766e-06, "loss": 0.09238624572753906, "step": 5636 }, { "epoch": 0.7854803873754616, "grad_norm": 0.6907209157943726, "learning_rate": 1.2620181018504406e-06, "loss": 0.07595348358154297, "step": 5637 }, { "epoch": 0.7856197310666759, "grad_norm": 1.0180082321166992, "learning_rate": 1.2604532514091444e-06, "loss": 0.10150337219238281, "step": 5638 }, { "epoch": 0.7857590747578903, "grad_norm": 0.5710105299949646, "learning_rate": 1.258889231809759e-06, "loss": 0.07517242431640625, "step": 5639 }, { "epoch": 0.7858984184491047, "grad_norm": 0.6921501755714417, "learning_rate": 1.2573260433997768e-06, "loss": 0.07092142105102539, "step": 5640 }, { "epoch": 0.7860377621403191, "grad_norm": 1.0989181995391846, "learning_rate": 1.2557636865265e-06, "loss": 0.09829139709472656, "step": 5641 }, { "epoch": 0.7861771058315334, "grad_norm": 0.616476833820343, "learning_rate": 1.254202161537051e-06, "loss": 0.08056259155273438, "step": 5642 }, { "epoch": 0.7863164495227478, "grad_norm": 0.4434530436992645, "learning_rate": 1.2526414687783616e-06, "loss": 0.06014728546142578, "step": 5643 }, { "epoch": 0.7864557932139622, "grad_norm": 0.787553071975708, "learning_rate": 1.2510816085971849e-06, "loss": 0.09225177764892578, "step": 5644 }, { "epoch": 0.7865951369051766, "grad_norm": 0.634536862373352, "learning_rate": 1.2495225813400864e-06, "loss": 0.07228612899780273, "step": 5645 }, { "epoch": 0.786734480596391, "grad_norm": 0.47330549359321594, "learning_rate": 1.247964387353446e-06, "loss": 0.06326675415039062, "step": 5646 }, { "epoch": 0.7868738242876053, "grad_norm": 0.6280648112297058, "learning_rate": 1.2464070269834566e-06, "loss": 0.07564258575439453, "step": 5647 }, { "epoch": 0.7870131679788197, "grad_norm": 0.6957798600196838, "learning_rate": 1.2448505005761297e-06, "loss": 0.08156299591064453, "step": 5648 }, { "epoch": 0.7871525116700342, "grad_norm": 0.7490731477737427, "learning_rate": 1.2432948084772917e-06, "loss": 0.06814956665039062, "step": 5649 }, { "epoch": 0.7872918553612486, "grad_norm": 0.914962887763977, "learning_rate": 1.2417399510325785e-06, "loss": 0.07686328887939453, "step": 5650 }, { "epoch": 0.787431199052463, "grad_norm": 1.2959959506988525, "learning_rate": 1.2401859285874474e-06, "loss": 0.08647632598876953, "step": 5651 }, { "epoch": 0.7875705427436773, "grad_norm": 0.5771473050117493, "learning_rate": 1.2386327414871635e-06, "loss": 0.07569122314453125, "step": 5652 }, { "epoch": 0.7877098864348917, "grad_norm": 0.7960613965988159, "learning_rate": 1.237080390076812e-06, "loss": 0.07817268371582031, "step": 5653 }, { "epoch": 0.7878492301261061, "grad_norm": 0.7012271285057068, "learning_rate": 1.2355288747012878e-06, "loss": 0.07130473852157593, "step": 5654 }, { "epoch": 0.7879885738173205, "grad_norm": 0.40508466958999634, "learning_rate": 1.2339781957053031e-06, "loss": 0.056743621826171875, "step": 5655 }, { "epoch": 0.7881279175085348, "grad_norm": 0.5176573991775513, "learning_rate": 1.232428353433387e-06, "loss": 0.062774658203125, "step": 5656 }, { "epoch": 0.7882672611997492, "grad_norm": 0.4961894154548645, "learning_rate": 1.2308793482298724e-06, "loss": 0.07096385955810547, "step": 5657 }, { "epoch": 0.7884066048909636, "grad_norm": 1.0963605642318726, "learning_rate": 1.2293311804389162e-06, "loss": 0.08448410034179688, "step": 5658 }, { "epoch": 0.788545948582178, "grad_norm": 0.9202345013618469, "learning_rate": 1.227783850404487e-06, "loss": 0.0704507827758789, "step": 5659 }, { "epoch": 0.7886852922733923, "grad_norm": 0.6829420924186707, "learning_rate": 1.2262373584703642e-06, "loss": 0.07684993743896484, "step": 5660 }, { "epoch": 0.7888246359646067, "grad_norm": 0.9859340190887451, "learning_rate": 1.2246917049801449e-06, "loss": 0.102783203125, "step": 5661 }, { "epoch": 0.7889639796558211, "grad_norm": 0.4612536132335663, "learning_rate": 1.2231468902772354e-06, "loss": 0.05433368682861328, "step": 5662 }, { "epoch": 0.7891033233470355, "grad_norm": 0.6716784834861755, "learning_rate": 1.221602914704862e-06, "loss": 0.07482385635375977, "step": 5663 }, { "epoch": 0.7892426670382499, "grad_norm": 0.7840809226036072, "learning_rate": 1.2200597786060565e-06, "loss": 0.08660602569580078, "step": 5664 }, { "epoch": 0.7893820107294642, "grad_norm": 0.7222159504890442, "learning_rate": 1.2185174823236711e-06, "loss": 0.0861673355102539, "step": 5665 }, { "epoch": 0.7895213544206786, "grad_norm": 0.616497278213501, "learning_rate": 1.2169760262003693e-06, "loss": 0.07773017883300781, "step": 5666 }, { "epoch": 0.789660698111893, "grad_norm": 0.6839821934700012, "learning_rate": 1.2154354105786276e-06, "loss": 0.06346273422241211, "step": 5667 }, { "epoch": 0.7898000418031074, "grad_norm": 0.6633926630020142, "learning_rate": 1.2138956358007325e-06, "loss": 0.07744598388671875, "step": 5668 }, { "epoch": 0.7899393854943217, "grad_norm": 0.4587746262550354, "learning_rate": 1.212356702208789e-06, "loss": 0.05448341369628906, "step": 5669 }, { "epoch": 0.7900787291855361, "grad_norm": 0.5065679550170898, "learning_rate": 1.210818610144714e-06, "loss": 0.07303524017333984, "step": 5670 }, { "epoch": 0.7902180728767505, "grad_norm": 1.3273413181304932, "learning_rate": 1.209281359950234e-06, "loss": 0.07344245910644531, "step": 5671 }, { "epoch": 0.7903574165679649, "grad_norm": 0.7360719442367554, "learning_rate": 1.2077449519668943e-06, "loss": 0.09380722045898438, "step": 5672 }, { "epoch": 0.7904967602591793, "grad_norm": 0.8116104602813721, "learning_rate": 1.2062093865360458e-06, "loss": 0.07744598388671875, "step": 5673 }, { "epoch": 0.7906361039503936, "grad_norm": 0.5729685425758362, "learning_rate": 1.2046746639988593e-06, "loss": 0.07368755340576172, "step": 5674 }, { "epoch": 0.790775447641608, "grad_norm": 0.7749955058097839, "learning_rate": 1.2031407846963122e-06, "loss": 0.08969688415527344, "step": 5675 }, { "epoch": 0.7909147913328224, "grad_norm": 0.8311667442321777, "learning_rate": 1.201607748969199e-06, "loss": 0.07836151123046875, "step": 5676 }, { "epoch": 0.7910541350240368, "grad_norm": 0.598268449306488, "learning_rate": 1.2000755571581263e-06, "loss": 0.07411003112792969, "step": 5677 }, { "epoch": 0.7911934787152511, "grad_norm": 0.6500051617622375, "learning_rate": 1.1985442096035116e-06, "loss": 0.06468772888183594, "step": 5678 }, { "epoch": 0.7913328224064655, "grad_norm": 0.6247481107711792, "learning_rate": 1.1970137066455834e-06, "loss": 0.062035560607910156, "step": 5679 }, { "epoch": 0.7914721660976799, "grad_norm": 0.8737808465957642, "learning_rate": 1.1954840486243857e-06, "loss": 0.08247232437133789, "step": 5680 }, { "epoch": 0.7916115097888943, "grad_norm": 0.9489723443984985, "learning_rate": 1.193955235879775e-06, "loss": 0.08294963836669922, "step": 5681 }, { "epoch": 0.7917508534801087, "grad_norm": 0.4769987463951111, "learning_rate": 1.1924272687514182e-06, "loss": 0.056969642639160156, "step": 5682 }, { "epoch": 0.791890197171323, "grad_norm": 1.2616753578186035, "learning_rate": 1.1909001475787917e-06, "loss": 0.06429576873779297, "step": 5683 }, { "epoch": 0.7920295408625374, "grad_norm": 0.6864468455314636, "learning_rate": 1.1893738727011894e-06, "loss": 0.07878875732421875, "step": 5684 }, { "epoch": 0.7921688845537518, "grad_norm": 1.2863131761550903, "learning_rate": 1.187848444457716e-06, "loss": 0.0987401008605957, "step": 5685 }, { "epoch": 0.7923082282449662, "grad_norm": 0.7748473286628723, "learning_rate": 1.1863238631872843e-06, "loss": 0.07822322845458984, "step": 5686 }, { "epoch": 0.7924475719361805, "grad_norm": 0.5075966119766235, "learning_rate": 1.184800129228622e-06, "loss": 0.0639352798461914, "step": 5687 }, { "epoch": 0.7925869156273949, "grad_norm": 0.6282135844230652, "learning_rate": 1.1832772429202716e-06, "loss": 0.07343864440917969, "step": 5688 }, { "epoch": 0.7927262593186094, "grad_norm": 0.9456251859664917, "learning_rate": 1.1817552046005777e-06, "loss": 0.08191680908203125, "step": 5689 }, { "epoch": 0.7928656030098238, "grad_norm": 0.49789270758628845, "learning_rate": 1.1802340146077045e-06, "loss": 0.06604719161987305, "step": 5690 }, { "epoch": 0.7930049467010382, "grad_norm": 0.8532386422157288, "learning_rate": 1.1787136732796289e-06, "loss": 0.07911014556884766, "step": 5691 }, { "epoch": 0.7931442903922525, "grad_norm": 1.0224989652633667, "learning_rate": 1.177194180954132e-06, "loss": 0.07095146179199219, "step": 5692 }, { "epoch": 0.7932836340834669, "grad_norm": 0.9348641633987427, "learning_rate": 1.1756755379688133e-06, "loss": 0.07364654541015625, "step": 5693 }, { "epoch": 0.7934229777746813, "grad_norm": 0.6542927026748657, "learning_rate": 1.174157744661078e-06, "loss": 0.0695199966430664, "step": 5694 }, { "epoch": 0.7935623214658957, "grad_norm": 1.2794954776763916, "learning_rate": 1.1726408013681473e-06, "loss": 0.08817100524902344, "step": 5695 }, { "epoch": 0.79370166515711, "grad_norm": 0.8070878982543945, "learning_rate": 1.1711247084270494e-06, "loss": 0.09493255615234375, "step": 5696 }, { "epoch": 0.7938410088483244, "grad_norm": 0.7147707939147949, "learning_rate": 1.1696094661746267e-06, "loss": 0.06383609771728516, "step": 5697 }, { "epoch": 0.7939803525395388, "grad_norm": 0.6518913507461548, "learning_rate": 1.1680950749475328e-06, "loss": 0.06015968322753906, "step": 5698 }, { "epoch": 0.7941196962307532, "grad_norm": 0.6827947497367859, "learning_rate": 1.1665815350822291e-06, "loss": 0.06334638595581055, "step": 5699 }, { "epoch": 0.7942590399219676, "grad_norm": 0.8735592365264893, "learning_rate": 1.1650688469149884e-06, "loss": 0.06533527374267578, "step": 5700 }, { "epoch": 0.7943983836131819, "grad_norm": 0.5800192952156067, "learning_rate": 1.1635570107818973e-06, "loss": 0.0689854621887207, "step": 5701 }, { "epoch": 0.7945377273043963, "grad_norm": 0.5724316239356995, "learning_rate": 1.1620460270188516e-06, "loss": 0.07805442810058594, "step": 5702 }, { "epoch": 0.7946770709956107, "grad_norm": 0.7887135744094849, "learning_rate": 1.1605358959615559e-06, "loss": 0.0943441390991211, "step": 5703 }, { "epoch": 0.7948164146868251, "grad_norm": 0.7473483681678772, "learning_rate": 1.159026617945529e-06, "loss": 0.09556102752685547, "step": 5704 }, { "epoch": 0.7949557583780394, "grad_norm": 0.5912235379219055, "learning_rate": 1.1575181933060952e-06, "loss": 0.056392669677734375, "step": 5705 }, { "epoch": 0.7950951020692538, "grad_norm": 0.8062491416931152, "learning_rate": 1.156010622378395e-06, "loss": 0.0748739242553711, "step": 5706 }, { "epoch": 0.7952344457604682, "grad_norm": 0.6321066617965698, "learning_rate": 1.1545039054973733e-06, "loss": 0.06788825988769531, "step": 5707 }, { "epoch": 0.7953737894516826, "grad_norm": 0.5718489289283752, "learning_rate": 1.1529980429977899e-06, "loss": 0.074127197265625, "step": 5708 }, { "epoch": 0.795513133142897, "grad_norm": 0.5482718348503113, "learning_rate": 1.151493035214214e-06, "loss": 0.06953573226928711, "step": 5709 }, { "epoch": 0.7956524768341113, "grad_norm": 0.8868310451507568, "learning_rate": 1.1499888824810223e-06, "loss": 0.0742330551147461, "step": 5710 }, { "epoch": 0.7957918205253257, "grad_norm": 0.863632321357727, "learning_rate": 1.148485585132403e-06, "loss": 0.0719461441040039, "step": 5711 }, { "epoch": 0.7959311642165401, "grad_norm": 1.2645831108093262, "learning_rate": 1.1469831435023542e-06, "loss": 0.10070943832397461, "step": 5712 }, { "epoch": 0.7960705079077545, "grad_norm": 0.7414016127586365, "learning_rate": 1.1454815579246874e-06, "loss": 0.09296131134033203, "step": 5713 }, { "epoch": 0.7962098515989688, "grad_norm": 1.0599627494812012, "learning_rate": 1.143980828733018e-06, "loss": 0.10108137130737305, "step": 5714 }, { "epoch": 0.7963491952901832, "grad_norm": 0.5656447410583496, "learning_rate": 1.1424809562607725e-06, "loss": 0.06674861907958984, "step": 5715 }, { "epoch": 0.7964885389813976, "grad_norm": 0.5652531981468201, "learning_rate": 1.1409819408411898e-06, "loss": 0.0741424560546875, "step": 5716 }, { "epoch": 0.796627882672612, "grad_norm": 1.0789673328399658, "learning_rate": 1.1394837828073184e-06, "loss": 0.09825706481933594, "step": 5717 }, { "epoch": 0.7967672263638264, "grad_norm": 0.6636660695075989, "learning_rate": 1.1379864824920116e-06, "loss": 0.0691385269165039, "step": 5718 }, { "epoch": 0.7969065700550407, "grad_norm": 0.612408459186554, "learning_rate": 1.1364900402279394e-06, "loss": 0.07776212692260742, "step": 5719 }, { "epoch": 0.7970459137462551, "grad_norm": 0.5241491198539734, "learning_rate": 1.134994456347574e-06, "loss": 0.06756305694580078, "step": 5720 }, { "epoch": 0.7971852574374695, "grad_norm": 0.8321913480758667, "learning_rate": 1.1334997311832003e-06, "loss": 0.07395648956298828, "step": 5721 }, { "epoch": 0.7973246011286839, "grad_norm": 0.8431848287582397, "learning_rate": 1.132005865066912e-06, "loss": 0.07009410858154297, "step": 5722 }, { "epoch": 0.7974639448198982, "grad_norm": 0.8837038278579712, "learning_rate": 1.1305128583306125e-06, "loss": 0.10223388671875, "step": 5723 }, { "epoch": 0.7976032885111126, "grad_norm": 0.7437003254890442, "learning_rate": 1.1290207113060158e-06, "loss": 0.09070491790771484, "step": 5724 }, { "epoch": 0.797742632202327, "grad_norm": 0.8584714531898499, "learning_rate": 1.127529424324641e-06, "loss": 0.07998466491699219, "step": 5725 }, { "epoch": 0.7978819758935414, "grad_norm": 1.0786871910095215, "learning_rate": 1.1260389977178166e-06, "loss": 0.07006311416625977, "step": 5726 }, { "epoch": 0.7980213195847558, "grad_norm": 0.607450008392334, "learning_rate": 1.1245494318166844e-06, "loss": 0.061061859130859375, "step": 5727 }, { "epoch": 0.7981606632759701, "grad_norm": 0.6281327605247498, "learning_rate": 1.1230607269521886e-06, "loss": 0.07858467102050781, "step": 5728 }, { "epoch": 0.7983000069671845, "grad_norm": 0.4540441334247589, "learning_rate": 1.1215728834550877e-06, "loss": 0.06103038787841797, "step": 5729 }, { "epoch": 0.798439350658399, "grad_norm": 0.5567063093185425, "learning_rate": 1.1200859016559473e-06, "loss": 0.06816720962524414, "step": 5730 }, { "epoch": 0.7985786943496134, "grad_norm": 0.6244889497756958, "learning_rate": 1.1185997818851402e-06, "loss": 0.06808805465698242, "step": 5731 }, { "epoch": 0.7987180380408277, "grad_norm": 0.4967074394226074, "learning_rate": 1.1171145244728454e-06, "loss": 0.06504344940185547, "step": 5732 }, { "epoch": 0.7988573817320421, "grad_norm": 0.7813212275505066, "learning_rate": 1.1156301297490563e-06, "loss": 0.08960533142089844, "step": 5733 }, { "epoch": 0.7989967254232565, "grad_norm": 0.925385057926178, "learning_rate": 1.1141465980435713e-06, "loss": 0.0721893310546875, "step": 5734 }, { "epoch": 0.7991360691144709, "grad_norm": 0.6700447797775269, "learning_rate": 1.112663929685997e-06, "loss": 0.07873153686523438, "step": 5735 }, { "epoch": 0.7992754128056853, "grad_norm": 0.5898081064224243, "learning_rate": 1.111182125005747e-06, "loss": 0.06667375564575195, "step": 5736 }, { "epoch": 0.7994147564968996, "grad_norm": 1.1816012859344482, "learning_rate": 1.1097011843320454e-06, "loss": 0.08234691619873047, "step": 5737 }, { "epoch": 0.799554100188114, "grad_norm": 0.7226212620735168, "learning_rate": 1.1082211079939248e-06, "loss": 0.06772708892822266, "step": 5738 }, { "epoch": 0.7996934438793284, "grad_norm": 0.8382013440132141, "learning_rate": 1.106741896320222e-06, "loss": 0.06949234008789062, "step": 5739 }, { "epoch": 0.7998327875705428, "grad_norm": 0.7422002553939819, "learning_rate": 1.1052635496395864e-06, "loss": 0.06384563446044922, "step": 5740 }, { "epoch": 0.7999721312617571, "grad_norm": 0.8904088735580444, "learning_rate": 1.1037860682804708e-06, "loss": 0.07745170593261719, "step": 5741 }, { "epoch": 0.8001114749529715, "grad_norm": 0.980732798576355, "learning_rate": 1.1023094525711397e-06, "loss": 0.0968637466430664, "step": 5742 }, { "epoch": 0.8002508186441859, "grad_norm": 1.0856729745864868, "learning_rate": 1.1008337028396616e-06, "loss": 0.08038520812988281, "step": 5743 }, { "epoch": 0.8003901623354003, "grad_norm": 0.35545316338539124, "learning_rate": 1.099358819413915e-06, "loss": 0.05230236053466797, "step": 5744 }, { "epoch": 0.8005295060266147, "grad_norm": 0.8302041292190552, "learning_rate": 1.0978848026215865e-06, "loss": 0.08105087280273438, "step": 5745 }, { "epoch": 0.800668849717829, "grad_norm": 1.31346595287323, "learning_rate": 1.0964116527901686e-06, "loss": 0.10049819946289062, "step": 5746 }, { "epoch": 0.8008081934090434, "grad_norm": 0.8570665717124939, "learning_rate": 1.094939370246959e-06, "loss": 0.06899785995483398, "step": 5747 }, { "epoch": 0.8009475371002578, "grad_norm": 0.8357130289077759, "learning_rate": 1.093467955319068e-06, "loss": 0.07830667495727539, "step": 5748 }, { "epoch": 0.8010868807914722, "grad_norm": 0.6459909081459045, "learning_rate": 1.0919974083334106e-06, "loss": 0.0680999755859375, "step": 5749 }, { "epoch": 0.8012262244826865, "grad_norm": 0.8639592528343201, "learning_rate": 1.0905277296167066e-06, "loss": 0.09305572509765625, "step": 5750 }, { "epoch": 0.8013655681739009, "grad_norm": 0.8843587636947632, "learning_rate": 1.089058919495488e-06, "loss": 0.07374286651611328, "step": 5751 }, { "epoch": 0.8015049118651153, "grad_norm": 1.0174516439437866, "learning_rate": 1.0875909782960887e-06, "loss": 0.0831003189086914, "step": 5752 }, { "epoch": 0.8016442555563297, "grad_norm": 1.4868383407592773, "learning_rate": 1.0861239063446511e-06, "loss": 0.09108924865722656, "step": 5753 }, { "epoch": 0.801783599247544, "grad_norm": 0.8210733532905579, "learning_rate": 1.0846577039671263e-06, "loss": 0.0683603286743164, "step": 5754 }, { "epoch": 0.8019229429387584, "grad_norm": 0.7042092084884644, "learning_rate": 1.0831923714892706e-06, "loss": 0.07225418090820312, "step": 5755 }, { "epoch": 0.8020622866299728, "grad_norm": 0.8995766639709473, "learning_rate": 1.0817279092366507e-06, "loss": 0.0849614143371582, "step": 5756 }, { "epoch": 0.8022016303211872, "grad_norm": 0.6586704850196838, "learning_rate": 1.0802643175346312e-06, "loss": 0.0772867202758789, "step": 5757 }, { "epoch": 0.8023409740124016, "grad_norm": 0.6314228177070618, "learning_rate": 1.0788015967083904e-06, "loss": 0.06831645965576172, "step": 5758 }, { "epoch": 0.8024803177036159, "grad_norm": 1.04548978805542, "learning_rate": 1.0773397470829145e-06, "loss": 0.0875844955444336, "step": 5759 }, { "epoch": 0.8026196613948303, "grad_norm": 0.592960000038147, "learning_rate": 1.0758787689829891e-06, "loss": 0.07794952392578125, "step": 5760 }, { "epoch": 0.8027590050860447, "grad_norm": 0.8316231966018677, "learning_rate": 1.074418662733212e-06, "loss": 0.08597373962402344, "step": 5761 }, { "epoch": 0.8028983487772591, "grad_norm": 0.484088271856308, "learning_rate": 1.0729594286579876e-06, "loss": 0.06820297241210938, "step": 5762 }, { "epoch": 0.8030376924684735, "grad_norm": 0.5088657736778259, "learning_rate": 1.0715010670815212e-06, "loss": 0.05934429168701172, "step": 5763 }, { "epoch": 0.8031770361596878, "grad_norm": 0.8095436096191406, "learning_rate": 1.0700435783278278e-06, "loss": 0.08784770965576172, "step": 5764 }, { "epoch": 0.8033163798509022, "grad_norm": 0.526528537273407, "learning_rate": 1.068586962720729e-06, "loss": 0.06478309631347656, "step": 5765 }, { "epoch": 0.8034557235421166, "grad_norm": 0.6358992457389832, "learning_rate": 1.0671312205838525e-06, "loss": 0.07024383544921875, "step": 5766 }, { "epoch": 0.803595067233331, "grad_norm": 0.7058764696121216, "learning_rate": 1.06567635224063e-06, "loss": 0.07958698272705078, "step": 5767 }, { "epoch": 0.8037344109245453, "grad_norm": 0.694105327129364, "learning_rate": 1.0642223580142985e-06, "loss": 0.06753253936767578, "step": 5768 }, { "epoch": 0.8038737546157597, "grad_norm": 0.6402261853218079, "learning_rate": 1.0627692382279038e-06, "loss": 0.07793235778808594, "step": 5769 }, { "epoch": 0.8040130983069742, "grad_norm": 0.6290509104728699, "learning_rate": 1.0613169932042972e-06, "loss": 0.07836246490478516, "step": 5770 }, { "epoch": 0.8041524419981886, "grad_norm": 0.7494250535964966, "learning_rate": 1.0598656232661313e-06, "loss": 0.07706308364868164, "step": 5771 }, { "epoch": 0.804291785689403, "grad_norm": 1.1165159940719604, "learning_rate": 1.0584151287358708e-06, "loss": 0.07107257843017578, "step": 5772 }, { "epoch": 0.8044311293806173, "grad_norm": 0.9460484385490417, "learning_rate": 1.0569655099357795e-06, "loss": 0.08717918395996094, "step": 5773 }, { "epoch": 0.8045704730718317, "grad_norm": 1.183197259902954, "learning_rate": 1.0555167671879319e-06, "loss": 0.07618188858032227, "step": 5774 }, { "epoch": 0.8047098167630461, "grad_norm": 0.43467122316360474, "learning_rate": 1.0540689008142035e-06, "loss": 0.06349802017211914, "step": 5775 }, { "epoch": 0.8048491604542605, "grad_norm": 0.7235391139984131, "learning_rate": 1.052621911136278e-06, "loss": 0.0809793472290039, "step": 5776 }, { "epoch": 0.8049885041454748, "grad_norm": 0.6641405820846558, "learning_rate": 1.0511757984756455e-06, "loss": 0.07903861999511719, "step": 5777 }, { "epoch": 0.8051278478366892, "grad_norm": 0.9359968304634094, "learning_rate": 1.049730563153597e-06, "loss": 0.0889129638671875, "step": 5778 }, { "epoch": 0.8052671915279036, "grad_norm": 0.5987400412559509, "learning_rate": 1.0482862054912296e-06, "loss": 0.0827474594116211, "step": 5779 }, { "epoch": 0.805406535219118, "grad_norm": 0.7382127642631531, "learning_rate": 1.0468427258094481e-06, "loss": 0.06740379333496094, "step": 5780 }, { "epoch": 0.8055458789103324, "grad_norm": 1.0870888233184814, "learning_rate": 1.045400124428963e-06, "loss": 0.07452201843261719, "step": 5781 }, { "epoch": 0.8056852226015467, "grad_norm": 0.733407199382782, "learning_rate": 1.043958401670283e-06, "loss": 0.08126163482666016, "step": 5782 }, { "epoch": 0.8058245662927611, "grad_norm": 0.6142202615737915, "learning_rate": 1.04251755785373e-06, "loss": 0.08411216735839844, "step": 5783 }, { "epoch": 0.8059639099839755, "grad_norm": 0.5644004344940186, "learning_rate": 1.0410775932994232e-06, "loss": 0.06555747985839844, "step": 5784 }, { "epoch": 0.8061032536751899, "grad_norm": 0.8478513360023499, "learning_rate": 1.039638508327293e-06, "loss": 0.08652782440185547, "step": 5785 }, { "epoch": 0.8062425973664042, "grad_norm": 0.9470335245132446, "learning_rate": 1.0382003032570682e-06, "loss": 0.07345008850097656, "step": 5786 }, { "epoch": 0.8063819410576186, "grad_norm": 0.9499306082725525, "learning_rate": 1.0367629784082867e-06, "loss": 0.07506847381591797, "step": 5787 }, { "epoch": 0.806521284748833, "grad_norm": 0.9214176535606384, "learning_rate": 1.0353265341002916e-06, "loss": 0.08654403686523438, "step": 5788 }, { "epoch": 0.8066606284400474, "grad_norm": 0.8278779983520508, "learning_rate": 1.0338909706522232e-06, "loss": 0.07431983947753906, "step": 5789 }, { "epoch": 0.8067999721312618, "grad_norm": 0.6504801511764526, "learning_rate": 1.032456288383033e-06, "loss": 0.08333587646484375, "step": 5790 }, { "epoch": 0.8069393158224761, "grad_norm": 0.5685761570930481, "learning_rate": 1.0310224876114766e-06, "loss": 0.0703582763671875, "step": 5791 }, { "epoch": 0.8070786595136905, "grad_norm": 1.2421711683273315, "learning_rate": 1.0295895686561087e-06, "loss": 0.0770406723022461, "step": 5792 }, { "epoch": 0.8072180032049049, "grad_norm": 0.8349636793136597, "learning_rate": 1.0281575318352937e-06, "loss": 0.06879329681396484, "step": 5793 }, { "epoch": 0.8073573468961193, "grad_norm": 0.525767982006073, "learning_rate": 1.0267263774671953e-06, "loss": 0.07515668869018555, "step": 5794 }, { "epoch": 0.8074966905873336, "grad_norm": 0.7632953524589539, "learning_rate": 1.0252961058697858e-06, "loss": 0.08813953399658203, "step": 5795 }, { "epoch": 0.807636034278548, "grad_norm": 0.7333095669746399, "learning_rate": 1.0238667173608364e-06, "loss": 0.08032560348510742, "step": 5796 }, { "epoch": 0.8077753779697624, "grad_norm": 0.5129945278167725, "learning_rate": 1.0224382122579256e-06, "loss": 0.06612038612365723, "step": 5797 }, { "epoch": 0.8079147216609768, "grad_norm": 0.6263672709465027, "learning_rate": 1.0210105908784362e-06, "loss": 0.07516002655029297, "step": 5798 }, { "epoch": 0.8080540653521912, "grad_norm": 0.8226854205131531, "learning_rate": 1.0195838535395514e-06, "loss": 0.08858871459960938, "step": 5799 }, { "epoch": 0.8081934090434055, "grad_norm": 0.5753610134124756, "learning_rate": 1.0181580005582586e-06, "loss": 0.07246589660644531, "step": 5800 }, { "epoch": 0.8083327527346199, "grad_norm": 0.9000688791275024, "learning_rate": 1.0167330322513508e-06, "loss": 0.07801961898803711, "step": 5801 }, { "epoch": 0.8084720964258343, "grad_norm": 0.6535940170288086, "learning_rate": 1.0153089489354256e-06, "loss": 0.06521177291870117, "step": 5802 }, { "epoch": 0.8086114401170487, "grad_norm": 0.950708270072937, "learning_rate": 1.0138857509268784e-06, "loss": 0.09362983703613281, "step": 5803 }, { "epoch": 0.808750783808263, "grad_norm": 0.4773043096065521, "learning_rate": 1.012463438541914e-06, "loss": 0.05853080749511719, "step": 5804 }, { "epoch": 0.8088901274994774, "grad_norm": 0.6273754239082336, "learning_rate": 1.0110420120965354e-06, "loss": 0.07600736618041992, "step": 5805 }, { "epoch": 0.8090294711906918, "grad_norm": 0.4808265268802643, "learning_rate": 1.0096214719065534e-06, "loss": 0.0642387866973877, "step": 5806 }, { "epoch": 0.8091688148819062, "grad_norm": 1.105406641960144, "learning_rate": 1.008201818287577e-06, "loss": 0.1094818115234375, "step": 5807 }, { "epoch": 0.8093081585731206, "grad_norm": 0.628771185874939, "learning_rate": 1.0067830515550224e-06, "loss": 0.06955337524414062, "step": 5808 }, { "epoch": 0.8094475022643349, "grad_norm": 1.0054911375045776, "learning_rate": 1.0053651720241087e-06, "loss": 0.12217903137207031, "step": 5809 }, { "epoch": 0.8095868459555494, "grad_norm": 0.9578653573989868, "learning_rate": 1.0039481800098545e-06, "loss": 0.09075546264648438, "step": 5810 }, { "epoch": 0.8097261896467638, "grad_norm": 0.7636083364486694, "learning_rate": 1.0025320758270819e-06, "loss": 0.09618091583251953, "step": 5811 }, { "epoch": 0.8098655333379782, "grad_norm": 0.7211315035820007, "learning_rate": 1.001116859790418e-06, "loss": 0.07714414596557617, "step": 5812 }, { "epoch": 0.8100048770291925, "grad_norm": 0.5849892497062683, "learning_rate": 9.997025322142934e-07, "loss": 0.06917095184326172, "step": 5813 }, { "epoch": 0.8101442207204069, "grad_norm": 0.6300196051597595, "learning_rate": 9.98289093412938e-07, "loss": 0.06781482696533203, "step": 5814 }, { "epoch": 0.8102835644116213, "grad_norm": 0.7782094478607178, "learning_rate": 9.96876543700384e-07, "loss": 0.06855964660644531, "step": 5815 }, { "epoch": 0.8104229081028357, "grad_norm": 0.7013974189758301, "learning_rate": 9.95464883390469e-07, "loss": 0.08196067810058594, "step": 5816 }, { "epoch": 0.8105622517940501, "grad_norm": 0.5063486099243164, "learning_rate": 9.940541127968335e-07, "loss": 0.05888223648071289, "step": 5817 }, { "epoch": 0.8107015954852644, "grad_norm": 0.5694558024406433, "learning_rate": 9.92644232232915e-07, "loss": 0.07725143432617188, "step": 5818 }, { "epoch": 0.8108409391764788, "grad_norm": 0.543456494808197, "learning_rate": 9.912352420119587e-07, "loss": 0.0660848617553711, "step": 5819 }, { "epoch": 0.8109802828676932, "grad_norm": 0.6674659252166748, "learning_rate": 9.89827142447013e-07, "loss": 0.0771017074584961, "step": 5820 }, { "epoch": 0.8111196265589076, "grad_norm": 0.37803804874420166, "learning_rate": 9.884199338509193e-07, "loss": 0.05986499786376953, "step": 5821 }, { "epoch": 0.811258970250122, "grad_norm": 0.5193999409675598, "learning_rate": 9.87013616536331e-07, "loss": 0.07404756546020508, "step": 5822 }, { "epoch": 0.8113983139413363, "grad_norm": 0.9493328332901001, "learning_rate": 9.856081908156984e-07, "loss": 0.09098434448242188, "step": 5823 }, { "epoch": 0.8115376576325507, "grad_norm": 0.6158826351165771, "learning_rate": 9.842036570012776e-07, "loss": 0.0673990249633789, "step": 5824 }, { "epoch": 0.8116770013237651, "grad_norm": 1.3179837465286255, "learning_rate": 9.828000154051216e-07, "loss": 0.09388542175292969, "step": 5825 }, { "epoch": 0.8118163450149795, "grad_norm": 0.5789176821708679, "learning_rate": 9.813972663390864e-07, "loss": 0.08090448379516602, "step": 5826 }, { "epoch": 0.8119556887061938, "grad_norm": 0.6271077990531921, "learning_rate": 9.79995410114834e-07, "loss": 0.07261323928833008, "step": 5827 }, { "epoch": 0.8120950323974082, "grad_norm": 0.455306738615036, "learning_rate": 9.785944470438218e-07, "loss": 0.06658267974853516, "step": 5828 }, { "epoch": 0.8122343760886226, "grad_norm": 1.0837568044662476, "learning_rate": 9.771943774373138e-07, "loss": 0.08516502380371094, "step": 5829 }, { "epoch": 0.812373719779837, "grad_norm": 0.7600160241127014, "learning_rate": 9.757952016063738e-07, "loss": 0.0801248550415039, "step": 5830 }, { "epoch": 0.8125130634710513, "grad_norm": 0.6911755800247192, "learning_rate": 9.743969198618659e-07, "loss": 0.08527660369873047, "step": 5831 }, { "epoch": 0.8126524071622657, "grad_norm": 0.7316302061080933, "learning_rate": 9.729995325144548e-07, "loss": 0.06923484802246094, "step": 5832 }, { "epoch": 0.8127917508534801, "grad_norm": 0.9578540921211243, "learning_rate": 9.716030398746096e-07, "loss": 0.0875406265258789, "step": 5833 }, { "epoch": 0.8129310945446945, "grad_norm": 0.6737623810768127, "learning_rate": 9.702074422526004e-07, "loss": 0.06293296813964844, "step": 5834 }, { "epoch": 0.8130704382359089, "grad_norm": 0.9744225740432739, "learning_rate": 9.688127399584956e-07, "loss": 0.0949859619140625, "step": 5835 }, { "epoch": 0.8132097819271232, "grad_norm": 0.3488667607307434, "learning_rate": 9.674189333021655e-07, "loss": 0.05974769592285156, "step": 5836 }, { "epoch": 0.8133491256183376, "grad_norm": 0.6217020750045776, "learning_rate": 9.660260225932834e-07, "loss": 0.06505298614501953, "step": 5837 }, { "epoch": 0.813488469309552, "grad_norm": 0.6110140681266785, "learning_rate": 9.646340081413225e-07, "loss": 0.07755279541015625, "step": 5838 }, { "epoch": 0.8136278130007664, "grad_norm": 0.6303251385688782, "learning_rate": 9.632428902555546e-07, "loss": 0.06266593933105469, "step": 5839 }, { "epoch": 0.8137671566919807, "grad_norm": 0.676683247089386, "learning_rate": 9.618526692450564e-07, "loss": 0.08153915405273438, "step": 5840 }, { "epoch": 0.8139065003831951, "grad_norm": 0.6682897806167603, "learning_rate": 9.604633454187035e-07, "loss": 0.0693826675415039, "step": 5841 }, { "epoch": 0.8140458440744095, "grad_norm": 0.7074491381645203, "learning_rate": 9.59074919085171e-07, "loss": 0.06311607360839844, "step": 5842 }, { "epoch": 0.8141851877656239, "grad_norm": 0.749769926071167, "learning_rate": 9.57687390552935e-07, "loss": 0.0655374526977539, "step": 5843 }, { "epoch": 0.8143245314568383, "grad_norm": 1.0386730432510376, "learning_rate": 9.563007601302727e-07, "loss": 0.08151817321777344, "step": 5844 }, { "epoch": 0.8144638751480526, "grad_norm": 0.997795581817627, "learning_rate": 9.549150281252633e-07, "loss": 0.08831977844238281, "step": 5845 }, { "epoch": 0.814603218839267, "grad_norm": 0.720951497554779, "learning_rate": 9.535301948457842e-07, "loss": 0.08574962615966797, "step": 5846 }, { "epoch": 0.8147425625304814, "grad_norm": 0.6648963689804077, "learning_rate": 9.521462605995119e-07, "loss": 0.07390022277832031, "step": 5847 }, { "epoch": 0.8148819062216958, "grad_norm": 0.8313623666763306, "learning_rate": 9.507632256939264e-07, "loss": 0.0870208740234375, "step": 5848 }, { "epoch": 0.8150212499129101, "grad_norm": 1.0169134140014648, "learning_rate": 9.493810904363077e-07, "loss": 0.10340118408203125, "step": 5849 }, { "epoch": 0.8151605936041246, "grad_norm": 0.9522435665130615, "learning_rate": 9.479998551337322e-07, "loss": 0.09380769729614258, "step": 5850 }, { "epoch": 0.815299937295339, "grad_norm": 1.4579545259475708, "learning_rate": 9.466195200930817e-07, "loss": 0.10199165344238281, "step": 5851 }, { "epoch": 0.8154392809865534, "grad_norm": 0.6560986042022705, "learning_rate": 9.452400856210337e-07, "loss": 0.07818794250488281, "step": 5852 }, { "epoch": 0.8155786246777678, "grad_norm": 1.0055590867996216, "learning_rate": 9.438615520240651e-07, "loss": 0.0746150016784668, "step": 5853 }, { "epoch": 0.8157179683689821, "grad_norm": 0.4718244671821594, "learning_rate": 9.424839196084568e-07, "loss": 0.05596029758453369, "step": 5854 }, { "epoch": 0.8158573120601965, "grad_norm": 0.6781201362609863, "learning_rate": 9.411071886802869e-07, "loss": 0.085418701171875, "step": 5855 }, { "epoch": 0.8159966557514109, "grad_norm": 0.6631107330322266, "learning_rate": 9.397313595454349e-07, "loss": 0.08044719696044922, "step": 5856 }, { "epoch": 0.8161359994426253, "grad_norm": 0.7631700038909912, "learning_rate": 9.383564325095767e-07, "loss": 0.07706069946289062, "step": 5857 }, { "epoch": 0.8162753431338396, "grad_norm": 0.6054204106330872, "learning_rate": 9.369824078781897e-07, "loss": 0.07121658325195312, "step": 5858 }, { "epoch": 0.816414686825054, "grad_norm": 0.8858045935630798, "learning_rate": 9.356092859565524e-07, "loss": 0.07955360412597656, "step": 5859 }, { "epoch": 0.8165540305162684, "grad_norm": 0.7430837154388428, "learning_rate": 9.342370670497391e-07, "loss": 0.07010030746459961, "step": 5860 }, { "epoch": 0.8166933742074828, "grad_norm": 0.7067877650260925, "learning_rate": 9.328657514626266e-07, "loss": 0.06860828399658203, "step": 5861 }, { "epoch": 0.8168327178986972, "grad_norm": 0.7267456650733948, "learning_rate": 9.314953394998905e-07, "loss": 0.07818889617919922, "step": 5862 }, { "epoch": 0.8169720615899115, "grad_norm": 0.5903359055519104, "learning_rate": 9.30125831466005e-07, "loss": 0.0801992416381836, "step": 5863 }, { "epoch": 0.8171114052811259, "grad_norm": 1.0585179328918457, "learning_rate": 9.287572276652417e-07, "loss": 0.08114814758300781, "step": 5864 }, { "epoch": 0.8172507489723403, "grad_norm": 0.563122034072876, "learning_rate": 9.273895284016743e-07, "loss": 0.07153987884521484, "step": 5865 }, { "epoch": 0.8173900926635547, "grad_norm": 0.991112470626831, "learning_rate": 9.260227339791755e-07, "loss": 0.08785867691040039, "step": 5866 }, { "epoch": 0.817529436354769, "grad_norm": 0.8631230592727661, "learning_rate": 9.246568447014148e-07, "loss": 0.08188056945800781, "step": 5867 }, { "epoch": 0.8176687800459834, "grad_norm": 0.5717471241950989, "learning_rate": 9.232918608718599e-07, "loss": 0.06905031204223633, "step": 5868 }, { "epoch": 0.8178081237371978, "grad_norm": 0.714444100856781, "learning_rate": 9.219277827937811e-07, "loss": 0.07683324813842773, "step": 5869 }, { "epoch": 0.8179474674284122, "grad_norm": 0.7621891498565674, "learning_rate": 9.205646107702465e-07, "loss": 0.07564353942871094, "step": 5870 }, { "epoch": 0.8180868111196266, "grad_norm": 0.8220952153205872, "learning_rate": 9.192023451041187e-07, "loss": 0.07805633544921875, "step": 5871 }, { "epoch": 0.8182261548108409, "grad_norm": 1.1459147930145264, "learning_rate": 9.178409860980648e-07, "loss": 0.08919715881347656, "step": 5872 }, { "epoch": 0.8183654985020553, "grad_norm": 0.5146237015724182, "learning_rate": 9.164805340545457e-07, "loss": 0.06325531005859375, "step": 5873 }, { "epoch": 0.8185048421932697, "grad_norm": 0.8085857629776001, "learning_rate": 9.151209892758245e-07, "loss": 0.08669853210449219, "step": 5874 }, { "epoch": 0.8186441858844841, "grad_norm": 1.003178596496582, "learning_rate": 9.137623520639588e-07, "loss": 0.08905792236328125, "step": 5875 }, { "epoch": 0.8187835295756984, "grad_norm": 0.49188756942749023, "learning_rate": 9.124046227208083e-07, "loss": 0.07423210144042969, "step": 5876 }, { "epoch": 0.8189228732669128, "grad_norm": 0.9859955310821533, "learning_rate": 9.110478015480301e-07, "loss": 0.09490513801574707, "step": 5877 }, { "epoch": 0.8190622169581272, "grad_norm": 0.5327615141868591, "learning_rate": 9.096918888470785e-07, "loss": 0.05517768859863281, "step": 5878 }, { "epoch": 0.8192015606493416, "grad_norm": 0.964049220085144, "learning_rate": 9.083368849192042e-07, "loss": 0.10400962829589844, "step": 5879 }, { "epoch": 0.819340904340556, "grad_norm": 1.239599585533142, "learning_rate": 9.069827900654604e-07, "loss": 0.08496969938278198, "step": 5880 }, { "epoch": 0.8194802480317703, "grad_norm": 0.55426025390625, "learning_rate": 9.056296045866964e-07, "loss": 0.06464862823486328, "step": 5881 }, { "epoch": 0.8196195917229847, "grad_norm": 0.6519297361373901, "learning_rate": 9.042773287835566e-07, "loss": 0.07990837097167969, "step": 5882 }, { "epoch": 0.8197589354141991, "grad_norm": 0.5131306052207947, "learning_rate": 9.02925962956489e-07, "loss": 0.0671243667602539, "step": 5883 }, { "epoch": 0.8198982791054135, "grad_norm": 1.02873694896698, "learning_rate": 9.015755074057336e-07, "loss": 0.08740520477294922, "step": 5884 }, { "epoch": 0.8200376227966278, "grad_norm": 0.8256435394287109, "learning_rate": 9.002259624313325e-07, "loss": 0.08295440673828125, "step": 5885 }, { "epoch": 0.8201769664878422, "grad_norm": 0.585461437702179, "learning_rate": 8.98877328333122e-07, "loss": 0.06325149536132812, "step": 5886 }, { "epoch": 0.8203163101790566, "grad_norm": 1.109846591949463, "learning_rate": 8.975296054107396e-07, "loss": 0.08866214752197266, "step": 5887 }, { "epoch": 0.820455653870271, "grad_norm": 0.42718520760536194, "learning_rate": 8.961827939636198e-07, "loss": 0.05709362030029297, "step": 5888 }, { "epoch": 0.8205949975614854, "grad_norm": 0.5269535779953003, "learning_rate": 8.948368942909891e-07, "loss": 0.06897258758544922, "step": 5889 }, { "epoch": 0.8207343412526998, "grad_norm": 0.7168231010437012, "learning_rate": 8.934919066918779e-07, "loss": 0.07673871517181396, "step": 5890 }, { "epoch": 0.8208736849439142, "grad_norm": 0.7817896008491516, "learning_rate": 8.921478314651133e-07, "loss": 0.08251810073852539, "step": 5891 }, { "epoch": 0.8210130286351286, "grad_norm": 0.7474218010902405, "learning_rate": 8.908046689093153e-07, "loss": 0.08541488647460938, "step": 5892 }, { "epoch": 0.821152372326343, "grad_norm": 1.0376380681991577, "learning_rate": 8.894624193229051e-07, "loss": 0.10846281051635742, "step": 5893 }, { "epoch": 0.8212917160175573, "grad_norm": 0.5353116989135742, "learning_rate": 8.88121083004102e-07, "loss": 0.06281471252441406, "step": 5894 }, { "epoch": 0.8214310597087717, "grad_norm": 1.006868839263916, "learning_rate": 8.867806602509177e-07, "loss": 0.09258747100830078, "step": 5895 }, { "epoch": 0.8215704033999861, "grad_norm": 0.7760254740715027, "learning_rate": 8.854411513611638e-07, "loss": 0.07638359069824219, "step": 5896 }, { "epoch": 0.8217097470912005, "grad_norm": 0.7832481265068054, "learning_rate": 8.841025566324485e-07, "loss": 0.078369140625, "step": 5897 }, { "epoch": 0.8218490907824149, "grad_norm": 0.40907642245292664, "learning_rate": 8.827648763621793e-07, "loss": 0.056313514709472656, "step": 5898 }, { "epoch": 0.8219884344736292, "grad_norm": 0.628930926322937, "learning_rate": 8.814281108475565e-07, "loss": 0.07935571670532227, "step": 5899 }, { "epoch": 0.8221277781648436, "grad_norm": 0.6571598649024963, "learning_rate": 8.800922603855772e-07, "loss": 0.07158088684082031, "step": 5900 }, { "epoch": 0.822267121856058, "grad_norm": 0.6517907977104187, "learning_rate": 8.787573252730386e-07, "loss": 0.07287311553955078, "step": 5901 }, { "epoch": 0.8224064655472724, "grad_norm": 0.7145759463310242, "learning_rate": 8.774233058065346e-07, "loss": 0.07786893844604492, "step": 5902 }, { "epoch": 0.8225458092384867, "grad_norm": 1.0531643629074097, "learning_rate": 8.760902022824502e-07, "loss": 0.07501220703125, "step": 5903 }, { "epoch": 0.8226851529297011, "grad_norm": 1.3020941019058228, "learning_rate": 8.747580149969737e-07, "loss": 0.09186363220214844, "step": 5904 }, { "epoch": 0.8228244966209155, "grad_norm": 0.5087134838104248, "learning_rate": 8.734267442460842e-07, "loss": 0.06160736083984375, "step": 5905 }, { "epoch": 0.8229638403121299, "grad_norm": 0.8502222895622253, "learning_rate": 8.720963903255619e-07, "loss": 0.09337425231933594, "step": 5906 }, { "epoch": 0.8231031840033443, "grad_norm": 0.9223057627677917, "learning_rate": 8.707669535309793e-07, "loss": 0.07528018951416016, "step": 5907 }, { "epoch": 0.8232425276945586, "grad_norm": 0.846678614616394, "learning_rate": 8.694384341577072e-07, "loss": 0.07761573791503906, "step": 5908 }, { "epoch": 0.823381871385773, "grad_norm": 0.5645059943199158, "learning_rate": 8.681108325009141e-07, "loss": 0.07397747039794922, "step": 5909 }, { "epoch": 0.8235212150769874, "grad_norm": 0.6381694674491882, "learning_rate": 8.667841488555617e-07, "loss": 0.06461715698242188, "step": 5910 }, { "epoch": 0.8236605587682018, "grad_norm": 0.8697777390480042, "learning_rate": 8.654583835164066e-07, "loss": 0.08469676971435547, "step": 5911 }, { "epoch": 0.8237999024594161, "grad_norm": 0.4283660352230072, "learning_rate": 8.641335367780057e-07, "loss": 0.05017971992492676, "step": 5912 }, { "epoch": 0.8239392461506305, "grad_norm": 0.7970200777053833, "learning_rate": 8.62809608934711e-07, "loss": 0.07529497146606445, "step": 5913 }, { "epoch": 0.8240785898418449, "grad_norm": 0.9152814745903015, "learning_rate": 8.614866002806665e-07, "loss": 0.0724644660949707, "step": 5914 }, { "epoch": 0.8242179335330593, "grad_norm": 1.0347028970718384, "learning_rate": 8.601645111098162e-07, "loss": 0.09406852722167969, "step": 5915 }, { "epoch": 0.8243572772242737, "grad_norm": 1.2453827857971191, "learning_rate": 8.588433417158965e-07, "loss": 0.07877063751220703, "step": 5916 }, { "epoch": 0.824496620915488, "grad_norm": 1.049953579902649, "learning_rate": 8.575230923924432e-07, "loss": 0.09630545973777771, "step": 5917 }, { "epoch": 0.8246359646067024, "grad_norm": 0.9355384111404419, "learning_rate": 8.562037634327836e-07, "loss": 0.0993804931640625, "step": 5918 }, { "epoch": 0.8247753082979168, "grad_norm": 1.747251033782959, "learning_rate": 8.548853551300429e-07, "loss": 0.10890007019042969, "step": 5919 }, { "epoch": 0.8249146519891312, "grad_norm": 0.7663515210151672, "learning_rate": 8.535678677771441e-07, "loss": 0.07527828216552734, "step": 5920 }, { "epoch": 0.8250539956803455, "grad_norm": 0.4957675635814667, "learning_rate": 8.522513016667982e-07, "loss": 0.06296157836914062, "step": 5921 }, { "epoch": 0.8251933393715599, "grad_norm": 0.810998797416687, "learning_rate": 8.509356570915184e-07, "loss": 0.0732736587524414, "step": 5922 }, { "epoch": 0.8253326830627743, "grad_norm": 0.7500104308128357, "learning_rate": 8.496209343436101e-07, "loss": 0.07586097717285156, "step": 5923 }, { "epoch": 0.8254720267539887, "grad_norm": 0.9523969292640686, "learning_rate": 8.483071337151777e-07, "loss": 0.07400894165039062, "step": 5924 }, { "epoch": 0.825611370445203, "grad_norm": 0.5057324171066284, "learning_rate": 8.469942554981148e-07, "loss": 0.06615686416625977, "step": 5925 }, { "epoch": 0.8257507141364174, "grad_norm": 0.8061147928237915, "learning_rate": 8.456822999841125e-07, "loss": 0.07341384887695312, "step": 5926 }, { "epoch": 0.8258900578276318, "grad_norm": 1.0537763833999634, "learning_rate": 8.443712674646598e-07, "loss": 0.09215021133422852, "step": 5927 }, { "epoch": 0.8260294015188462, "grad_norm": 0.7423933744430542, "learning_rate": 8.430611582310355e-07, "loss": 0.07198143005371094, "step": 5928 }, { "epoch": 0.8261687452100606, "grad_norm": 1.1477447748184204, "learning_rate": 8.417519725743173e-07, "loss": 0.09571599960327148, "step": 5929 }, { "epoch": 0.8263080889012749, "grad_norm": 1.025697946548462, "learning_rate": 8.40443710785378e-07, "loss": 0.08021068572998047, "step": 5930 }, { "epoch": 0.8264474325924894, "grad_norm": 0.6110186576843262, "learning_rate": 8.391363731548813e-07, "loss": 0.06960582733154297, "step": 5931 }, { "epoch": 0.8265867762837038, "grad_norm": 0.6311312317848206, "learning_rate": 8.378299599732875e-07, "loss": 0.07517623901367188, "step": 5932 }, { "epoch": 0.8267261199749182, "grad_norm": 0.82919842004776, "learning_rate": 8.365244715308524e-07, "loss": 0.08312749862670898, "step": 5933 }, { "epoch": 0.8268654636661326, "grad_norm": 0.5048547387123108, "learning_rate": 8.352199081176271e-07, "loss": 0.05996274948120117, "step": 5934 }, { "epoch": 0.8270048073573469, "grad_norm": 0.9302079081535339, "learning_rate": 8.339162700234537e-07, "loss": 0.09632682800292969, "step": 5935 }, { "epoch": 0.8271441510485613, "grad_norm": 0.7043586373329163, "learning_rate": 8.326135575379729e-07, "loss": 0.07045650482177734, "step": 5936 }, { "epoch": 0.8272834947397757, "grad_norm": 1.0218571424484253, "learning_rate": 8.313117709506158e-07, "loss": 0.07095003128051758, "step": 5937 }, { "epoch": 0.8274228384309901, "grad_norm": 0.8175719976425171, "learning_rate": 8.30010910550611e-07, "loss": 0.0685110092163086, "step": 5938 }, { "epoch": 0.8275621821222044, "grad_norm": 0.8238781690597534, "learning_rate": 8.287109766269786e-07, "loss": 0.0758962631225586, "step": 5939 }, { "epoch": 0.8277015258134188, "grad_norm": 0.7019985914230347, "learning_rate": 8.274119694685345e-07, "loss": 0.07362174987792969, "step": 5940 }, { "epoch": 0.8278408695046332, "grad_norm": 0.7380099892616272, "learning_rate": 8.26113889363891e-07, "loss": 0.08098506927490234, "step": 5941 }, { "epoch": 0.8279802131958476, "grad_norm": 0.8468420505523682, "learning_rate": 8.248167366014493e-07, "loss": 0.06980514526367188, "step": 5942 }, { "epoch": 0.828119556887062, "grad_norm": 0.5002473592758179, "learning_rate": 8.235205114694067e-07, "loss": 0.0731649398803711, "step": 5943 }, { "epoch": 0.8282589005782763, "grad_norm": 0.9337432980537415, "learning_rate": 8.222252142557557e-07, "loss": 0.08344554901123047, "step": 5944 }, { "epoch": 0.8283982442694907, "grad_norm": 0.6410346031188965, "learning_rate": 8.209308452482829e-07, "loss": 0.06241798400878906, "step": 5945 }, { "epoch": 0.8285375879607051, "grad_norm": 0.7754610180854797, "learning_rate": 8.196374047345668e-07, "loss": 0.0745697021484375, "step": 5946 }, { "epoch": 0.8286769316519195, "grad_norm": 0.6629528999328613, "learning_rate": 8.183448930019783e-07, "loss": 0.07869148254394531, "step": 5947 }, { "epoch": 0.8288162753431338, "grad_norm": 0.9340608716011047, "learning_rate": 8.170533103376865e-07, "loss": 0.08852577209472656, "step": 5948 }, { "epoch": 0.8289556190343482, "grad_norm": 0.8847813606262207, "learning_rate": 8.157626570286515e-07, "loss": 0.0740358829498291, "step": 5949 }, { "epoch": 0.8290949627255626, "grad_norm": 0.9740632772445679, "learning_rate": 8.144729333616259e-07, "loss": 0.07786941528320312, "step": 5950 }, { "epoch": 0.829234306416777, "grad_norm": 0.7550776600837708, "learning_rate": 8.131841396231566e-07, "loss": 0.08087730407714844, "step": 5951 }, { "epoch": 0.8293736501079914, "grad_norm": 0.9158352613449097, "learning_rate": 8.118962760995874e-07, "loss": 0.07987213134765625, "step": 5952 }, { "epoch": 0.8295129937992057, "grad_norm": 1.0212182998657227, "learning_rate": 8.106093430770473e-07, "loss": 0.10718917846679688, "step": 5953 }, { "epoch": 0.8296523374904201, "grad_norm": 0.7178643345832825, "learning_rate": 8.093233408414658e-07, "loss": 0.07265663146972656, "step": 5954 }, { "epoch": 0.8297916811816345, "grad_norm": 0.9025821685791016, "learning_rate": 8.080382696785627e-07, "loss": 0.07587051391601562, "step": 5955 }, { "epoch": 0.8299310248728489, "grad_norm": 0.6235224604606628, "learning_rate": 8.067541298738535e-07, "loss": 0.06567859649658203, "step": 5956 }, { "epoch": 0.8300703685640632, "grad_norm": 0.8261546492576599, "learning_rate": 8.054709217126433e-07, "loss": 0.08953475952148438, "step": 5957 }, { "epoch": 0.8302097122552776, "grad_norm": 0.9135664105415344, "learning_rate": 8.041886454800307e-07, "loss": 0.06246471405029297, "step": 5958 }, { "epoch": 0.830349055946492, "grad_norm": 1.4648298025131226, "learning_rate": 8.029073014609096e-07, "loss": 0.0930185317993164, "step": 5959 }, { "epoch": 0.8304883996377064, "grad_norm": 1.0384358167648315, "learning_rate": 8.016268899399643e-07, "loss": 0.08949136734008789, "step": 5960 }, { "epoch": 0.8306277433289208, "grad_norm": 0.6199554800987244, "learning_rate": 8.00347411201673e-07, "loss": 0.06925392150878906, "step": 5961 }, { "epoch": 0.8307670870201351, "grad_norm": 0.7271571159362793, "learning_rate": 7.990688655303086e-07, "loss": 0.06444597244262695, "step": 5962 }, { "epoch": 0.8309064307113495, "grad_norm": 0.705484926700592, "learning_rate": 7.977912532099336e-07, "loss": 0.07558441162109375, "step": 5963 }, { "epoch": 0.8310457744025639, "grad_norm": 0.9622899889945984, "learning_rate": 7.965145745244029e-07, "loss": 0.0870218276977539, "step": 5964 }, { "epoch": 0.8311851180937783, "grad_norm": 1.071081280708313, "learning_rate": 7.95238829757366e-07, "loss": 0.09688854217529297, "step": 5965 }, { "epoch": 0.8313244617849926, "grad_norm": 0.5115848779678345, "learning_rate": 7.939640191922665e-07, "loss": 0.0693349838256836, "step": 5966 }, { "epoch": 0.831463805476207, "grad_norm": 0.5942793488502502, "learning_rate": 7.926901431123362e-07, "loss": 0.08691215515136719, "step": 5967 }, { "epoch": 0.8316031491674214, "grad_norm": 0.5643383264541626, "learning_rate": 7.914172018006006e-07, "loss": 0.06792831420898438, "step": 5968 }, { "epoch": 0.8317424928586358, "grad_norm": 1.1431922912597656, "learning_rate": 7.901451955398792e-07, "loss": 0.0876607894897461, "step": 5969 }, { "epoch": 0.8318818365498502, "grad_norm": 0.8391035795211792, "learning_rate": 7.88874124612784e-07, "loss": 0.07101678848266602, "step": 5970 }, { "epoch": 0.8320211802410646, "grad_norm": 1.000410795211792, "learning_rate": 7.876039893017151e-07, "loss": 0.09255409240722656, "step": 5971 }, { "epoch": 0.832160523932279, "grad_norm": 1.206739902496338, "learning_rate": 7.863347898888696e-07, "loss": 0.1089324951171875, "step": 5972 }, { "epoch": 0.8322998676234934, "grad_norm": 1.1830742359161377, "learning_rate": 7.850665266562352e-07, "loss": 0.07557344436645508, "step": 5973 }, { "epoch": 0.8324392113147078, "grad_norm": 0.5832558870315552, "learning_rate": 7.837991998855899e-07, "loss": 0.06869125366210938, "step": 5974 }, { "epoch": 0.8325785550059221, "grad_norm": 0.8570107817649841, "learning_rate": 7.825328098585039e-07, "loss": 0.1062016487121582, "step": 5975 }, { "epoch": 0.8327178986971365, "grad_norm": 0.6208454966545105, "learning_rate": 7.812673568563406e-07, "loss": 0.08105087280273438, "step": 5976 }, { "epoch": 0.8328572423883509, "grad_norm": 0.8641589879989624, "learning_rate": 7.800028411602572e-07, "loss": 0.0894918441772461, "step": 5977 }, { "epoch": 0.8329965860795653, "grad_norm": 0.45139259099960327, "learning_rate": 7.78739263051198e-07, "loss": 0.06807899475097656, "step": 5978 }, { "epoch": 0.8331359297707797, "grad_norm": 0.7625397443771362, "learning_rate": 7.774766228099001e-07, "loss": 0.0852956771850586, "step": 5979 }, { "epoch": 0.833275273461994, "grad_norm": 0.7417899370193481, "learning_rate": 7.762149207168951e-07, "loss": 0.07268619537353516, "step": 5980 }, { "epoch": 0.8334146171532084, "grad_norm": 0.5222619771957397, "learning_rate": 7.749541570525054e-07, "loss": 0.07616472244262695, "step": 5981 }, { "epoch": 0.8335539608444228, "grad_norm": 0.9741654396057129, "learning_rate": 7.736943320968409e-07, "loss": 0.0694417953491211, "step": 5982 }, { "epoch": 0.8336933045356372, "grad_norm": 0.49881812930107117, "learning_rate": 7.724354461298089e-07, "loss": 0.05967998504638672, "step": 5983 }, { "epoch": 0.8338326482268515, "grad_norm": 0.38374435901641846, "learning_rate": 7.711774994311027e-07, "loss": 0.05388069152832031, "step": 5984 }, { "epoch": 0.8339719919180659, "grad_norm": 0.8650236129760742, "learning_rate": 7.699204922802123e-07, "loss": 0.07983970642089844, "step": 5985 }, { "epoch": 0.8341113356092803, "grad_norm": 0.6455187797546387, "learning_rate": 7.686644249564124e-07, "loss": 0.08307218551635742, "step": 5986 }, { "epoch": 0.8342506793004947, "grad_norm": 0.6097689270973206, "learning_rate": 7.674092977387737e-07, "loss": 0.07421875, "step": 5987 }, { "epoch": 0.8343900229917091, "grad_norm": 1.0578943490982056, "learning_rate": 7.661551109061593e-07, "loss": 0.08430910110473633, "step": 5988 }, { "epoch": 0.8345293666829234, "grad_norm": 1.275575041770935, "learning_rate": 7.649018647372186e-07, "loss": 0.0928642749786377, "step": 5989 }, { "epoch": 0.8346687103741378, "grad_norm": 0.7607520818710327, "learning_rate": 7.636495595103938e-07, "loss": 0.07120800018310547, "step": 5990 }, { "epoch": 0.8348080540653522, "grad_norm": 0.5899385809898376, "learning_rate": 7.6239819550392e-07, "loss": 0.07393765449523926, "step": 5991 }, { "epoch": 0.8349473977565666, "grad_norm": 0.5403484106063843, "learning_rate": 7.611477729958205e-07, "loss": 0.05878591537475586, "step": 5992 }, { "epoch": 0.835086741447781, "grad_norm": 0.4852493405342102, "learning_rate": 7.598982922639109e-07, "loss": 0.061158180236816406, "step": 5993 }, { "epoch": 0.8352260851389953, "grad_norm": 0.5774472951889038, "learning_rate": 7.586497535857984e-07, "loss": 0.06598818302154541, "step": 5994 }, { "epoch": 0.8353654288302097, "grad_norm": 0.651974081993103, "learning_rate": 7.574021572388795e-07, "loss": 0.08575153350830078, "step": 5995 }, { "epoch": 0.8355047725214241, "grad_norm": 0.43054211139678955, "learning_rate": 7.561555035003398e-07, "loss": 0.05676555633544922, "step": 5996 }, { "epoch": 0.8356441162126385, "grad_norm": 0.6744864583015442, "learning_rate": 7.549097926471583e-07, "loss": 0.07218360900878906, "step": 5997 }, { "epoch": 0.8357834599038528, "grad_norm": 0.603145956993103, "learning_rate": 7.536650249561056e-07, "loss": 0.062138497829437256, "step": 5998 }, { "epoch": 0.8359228035950672, "grad_norm": 0.9069014191627502, "learning_rate": 7.524212007037385e-07, "loss": 0.09634685516357422, "step": 5999 }, { "epoch": 0.8360621472862816, "grad_norm": 0.7189099788665771, "learning_rate": 7.511783201664053e-07, "loss": 0.09260845184326172, "step": 6000 }, { "epoch": 0.836201490977496, "grad_norm": 0.5312643051147461, "learning_rate": 7.499363836202472e-07, "loss": 0.07524585723876953, "step": 6001 }, { "epoch": 0.8363408346687103, "grad_norm": 0.96371990442276, "learning_rate": 7.486953913411954e-07, "loss": 0.07600688934326172, "step": 6002 }, { "epoch": 0.8364801783599247, "grad_norm": 0.765397846698761, "learning_rate": 7.474553436049675e-07, "loss": 0.08631038665771484, "step": 6003 }, { "epoch": 0.8366195220511391, "grad_norm": 0.9700932502746582, "learning_rate": 7.462162406870766e-07, "loss": 0.06994819641113281, "step": 6004 }, { "epoch": 0.8367588657423535, "grad_norm": 0.8851130604743958, "learning_rate": 7.4497808286282e-07, "loss": 0.09807205200195312, "step": 6005 }, { "epoch": 0.8368982094335679, "grad_norm": 0.8796024322509766, "learning_rate": 7.437408704072907e-07, "loss": 0.10004043579101562, "step": 6006 }, { "epoch": 0.8370375531247822, "grad_norm": 0.8083353638648987, "learning_rate": 7.425046035953665e-07, "loss": 0.06336688995361328, "step": 6007 }, { "epoch": 0.8371768968159966, "grad_norm": 0.7165014743804932, "learning_rate": 7.412692827017193e-07, "loss": 0.08687496185302734, "step": 6008 }, { "epoch": 0.837316240507211, "grad_norm": 0.8117594718933105, "learning_rate": 7.400349080008107e-07, "loss": 0.09264755249023438, "step": 6009 }, { "epoch": 0.8374555841984254, "grad_norm": 0.7475942969322205, "learning_rate": 7.38801479766888e-07, "loss": 0.06170368194580078, "step": 6010 }, { "epoch": 0.8375949278896399, "grad_norm": 0.5411815047264099, "learning_rate": 7.375689982739915e-07, "loss": 0.0701446533203125, "step": 6011 }, { "epoch": 0.8377342715808542, "grad_norm": 0.5088405609130859, "learning_rate": 7.363374637959498e-07, "loss": 0.06393098831176758, "step": 6012 }, { "epoch": 0.8378736152720686, "grad_norm": 0.6354932188987732, "learning_rate": 7.35106876606384e-07, "loss": 0.08250617980957031, "step": 6013 }, { "epoch": 0.838012958963283, "grad_norm": 0.7200706601142883, "learning_rate": 7.338772369787001e-07, "loss": 0.06974220275878906, "step": 6014 }, { "epoch": 0.8381523026544974, "grad_norm": 0.7875337600708008, "learning_rate": 7.326485451860976e-07, "loss": 0.07069110870361328, "step": 6015 }, { "epoch": 0.8382916463457117, "grad_norm": 0.9664623737335205, "learning_rate": 7.314208015015623e-07, "loss": 0.06754589080810547, "step": 6016 }, { "epoch": 0.8384309900369261, "grad_norm": 0.9567381143569946, "learning_rate": 7.301940061978724e-07, "loss": 0.09541988372802734, "step": 6017 }, { "epoch": 0.8385703337281405, "grad_norm": 0.5745678544044495, "learning_rate": 7.289681595475922e-07, "loss": 0.06572818756103516, "step": 6018 }, { "epoch": 0.8387096774193549, "grad_norm": 0.9991448521614075, "learning_rate": 7.277432618230773e-07, "loss": 0.09278488159179688, "step": 6019 }, { "epoch": 0.8388490211105692, "grad_norm": 0.8142937421798706, "learning_rate": 7.265193132964749e-07, "loss": 0.06889533996582031, "step": 6020 }, { "epoch": 0.8389883648017836, "grad_norm": 0.5755748152732849, "learning_rate": 7.252963142397134e-07, "loss": 0.07286262512207031, "step": 6021 }, { "epoch": 0.839127708492998, "grad_norm": 0.8825970888137817, "learning_rate": 7.24074264924518e-07, "loss": 0.0659480094909668, "step": 6022 }, { "epoch": 0.8392670521842124, "grad_norm": 0.835638165473938, "learning_rate": 7.228531656223997e-07, "loss": 0.10934638977050781, "step": 6023 }, { "epoch": 0.8394063958754268, "grad_norm": 0.9242705702781677, "learning_rate": 7.216330166046603e-07, "loss": 0.08923816680908203, "step": 6024 }, { "epoch": 0.8395457395666411, "grad_norm": 0.5815404057502747, "learning_rate": 7.204138181423881e-07, "loss": 0.06702899932861328, "step": 6025 }, { "epoch": 0.8396850832578555, "grad_norm": 0.7910100221633911, "learning_rate": 7.191955705064591e-07, "loss": 0.08341026306152344, "step": 6026 }, { "epoch": 0.8398244269490699, "grad_norm": 0.6532379984855652, "learning_rate": 7.179782739675434e-07, "loss": 0.0686655044555664, "step": 6027 }, { "epoch": 0.8399637706402843, "grad_norm": 0.8043354749679565, "learning_rate": 7.167619287960942e-07, "loss": 0.07970619201660156, "step": 6028 }, { "epoch": 0.8401031143314986, "grad_norm": 0.644498884677887, "learning_rate": 7.155465352623559e-07, "loss": 0.07755088806152344, "step": 6029 }, { "epoch": 0.840242458022713, "grad_norm": 0.7132776379585266, "learning_rate": 7.143320936363629e-07, "loss": 0.07568836212158203, "step": 6030 }, { "epoch": 0.8403818017139274, "grad_norm": 0.8114092350006104, "learning_rate": 7.131186041879357e-07, "loss": 0.06965351104736328, "step": 6031 }, { "epoch": 0.8405211454051418, "grad_norm": 0.9704258441925049, "learning_rate": 7.119060671866817e-07, "loss": 0.1076040267944336, "step": 6032 }, { "epoch": 0.8406604890963562, "grad_norm": 0.694248378276825, "learning_rate": 7.106944829020013e-07, "loss": 0.07252883911132812, "step": 6033 }, { "epoch": 0.8407998327875705, "grad_norm": 0.6324838399887085, "learning_rate": 7.094838516030811e-07, "loss": 0.07627010345458984, "step": 6034 }, { "epoch": 0.8409391764787849, "grad_norm": 0.7354175448417664, "learning_rate": 7.082741735588938e-07, "loss": 0.09487247467041016, "step": 6035 }, { "epoch": 0.8410785201699993, "grad_norm": 0.749303936958313, "learning_rate": 7.070654490382045e-07, "loss": 0.07335567474365234, "step": 6036 }, { "epoch": 0.8412178638612137, "grad_norm": 1.0789425373077393, "learning_rate": 7.058576783095622e-07, "loss": 0.09363079071044922, "step": 6037 }, { "epoch": 0.841357207552428, "grad_norm": 0.5984277129173279, "learning_rate": 7.046508616413078e-07, "loss": 0.059325218200683594, "step": 6038 }, { "epoch": 0.8414965512436424, "grad_norm": 1.1703635454177856, "learning_rate": 7.034449993015663e-07, "loss": 0.08412551879882812, "step": 6039 }, { "epoch": 0.8416358949348568, "grad_norm": 0.7902432084083557, "learning_rate": 7.022400915582539e-07, "loss": 0.07076072692871094, "step": 6040 }, { "epoch": 0.8417752386260712, "grad_norm": 1.2268978357315063, "learning_rate": 7.010361386790748e-07, "loss": 0.09925651550292969, "step": 6041 }, { "epoch": 0.8419145823172856, "grad_norm": 0.5442723631858826, "learning_rate": 6.998331409315184e-07, "loss": 0.06736087799072266, "step": 6042 }, { "epoch": 0.8420539260084999, "grad_norm": 0.9088218808174133, "learning_rate": 6.986310985828626e-07, "loss": 0.07287406921386719, "step": 6043 }, { "epoch": 0.8421932696997143, "grad_norm": 0.6738926768302917, "learning_rate": 6.974300119001754e-07, "loss": 0.07842159271240234, "step": 6044 }, { "epoch": 0.8423326133909287, "grad_norm": 0.5735779404640198, "learning_rate": 6.962298811503104e-07, "loss": 0.06614494323730469, "step": 6045 }, { "epoch": 0.8424719570821431, "grad_norm": 0.48498663306236267, "learning_rate": 6.950307065999085e-07, "loss": 0.060622215270996094, "step": 6046 }, { "epoch": 0.8426113007733574, "grad_norm": 0.797493577003479, "learning_rate": 6.938324885154007e-07, "loss": 0.08326053619384766, "step": 6047 }, { "epoch": 0.8427506444645718, "grad_norm": 0.7119065523147583, "learning_rate": 6.92635227163001e-07, "loss": 0.07379531860351562, "step": 6048 }, { "epoch": 0.8428899881557862, "grad_norm": 0.7938238978385925, "learning_rate": 6.914389228087165e-07, "loss": 0.09296226501464844, "step": 6049 }, { "epoch": 0.8430293318470006, "grad_norm": 0.4885343015193939, "learning_rate": 6.902435757183357e-07, "loss": 0.0674128532409668, "step": 6050 }, { "epoch": 0.8431686755382151, "grad_norm": 0.636899471282959, "learning_rate": 6.890491861574389e-07, "loss": 0.07722854614257812, "step": 6051 }, { "epoch": 0.8433080192294294, "grad_norm": 1.0855876207351685, "learning_rate": 6.87855754391395e-07, "loss": 0.09381580352783203, "step": 6052 }, { "epoch": 0.8434473629206438, "grad_norm": 0.863703191280365, "learning_rate": 6.866632806853518e-07, "loss": 0.0835561752319336, "step": 6053 }, { "epoch": 0.8435867066118582, "grad_norm": 0.45369914174079895, "learning_rate": 6.854717653042531e-07, "loss": 0.06616020202636719, "step": 6054 }, { "epoch": 0.8437260503030726, "grad_norm": 0.9577726125717163, "learning_rate": 6.842812085128253e-07, "loss": 0.09223556518554688, "step": 6055 }, { "epoch": 0.843865393994287, "grad_norm": 0.4906492531299591, "learning_rate": 6.830916105755847e-07, "loss": 0.06931114196777344, "step": 6056 }, { "epoch": 0.8440047376855013, "grad_norm": 0.5363842844963074, "learning_rate": 6.819029717568315e-07, "loss": 0.06406402587890625, "step": 6057 }, { "epoch": 0.8441440813767157, "grad_norm": 0.5569750070571899, "learning_rate": 6.807152923206528e-07, "loss": 0.07159805297851562, "step": 6058 }, { "epoch": 0.8442834250679301, "grad_norm": 1.3803166151046753, "learning_rate": 6.795285725309269e-07, "loss": 0.10684680938720703, "step": 6059 }, { "epoch": 0.8444227687591445, "grad_norm": 1.0952987670898438, "learning_rate": 6.783428126513125e-07, "loss": 0.11000537872314453, "step": 6060 }, { "epoch": 0.8445621124503588, "grad_norm": 0.7712045311927795, "learning_rate": 6.771580129452604e-07, "loss": 0.08780097961425781, "step": 6061 }, { "epoch": 0.8447014561415732, "grad_norm": 1.0695985555648804, "learning_rate": 6.759741736760062e-07, "loss": 0.1091461181640625, "step": 6062 }, { "epoch": 0.8448407998327876, "grad_norm": 0.6554484367370605, "learning_rate": 6.747912951065722e-07, "loss": 0.07210159301757812, "step": 6063 }, { "epoch": 0.844980143524002, "grad_norm": 0.5428755879402161, "learning_rate": 6.736093774997643e-07, "loss": 0.06619548797607422, "step": 6064 }, { "epoch": 0.8451194872152163, "grad_norm": 1.0891642570495605, "learning_rate": 6.724284211181803e-07, "loss": 0.09290504455566406, "step": 6065 }, { "epoch": 0.8452588309064307, "grad_norm": 0.7476683855056763, "learning_rate": 6.712484262242014e-07, "loss": 0.06519222259521484, "step": 6066 }, { "epoch": 0.8453981745976451, "grad_norm": 0.8947039246559143, "learning_rate": 6.700693930799945e-07, "loss": 0.06836509704589844, "step": 6067 }, { "epoch": 0.8455375182888595, "grad_norm": 0.7904706597328186, "learning_rate": 6.688913219475158e-07, "loss": 0.07330703735351562, "step": 6068 }, { "epoch": 0.8456768619800739, "grad_norm": 0.6223629117012024, "learning_rate": 6.677142130885028e-07, "loss": 0.06906318664550781, "step": 6069 }, { "epoch": 0.8458162056712882, "grad_norm": 0.760930061340332, "learning_rate": 6.665380667644849e-07, "loss": 0.06468391418457031, "step": 6070 }, { "epoch": 0.8459555493625026, "grad_norm": 0.6127912998199463, "learning_rate": 6.653628832367731e-07, "loss": 0.06253552436828613, "step": 6071 }, { "epoch": 0.846094893053717, "grad_norm": 0.5993935465812683, "learning_rate": 6.641886627664673e-07, "loss": 0.06451272964477539, "step": 6072 }, { "epoch": 0.8462342367449314, "grad_norm": 0.5678902864456177, "learning_rate": 6.630154056144533e-07, "loss": 0.06719112396240234, "step": 6073 }, { "epoch": 0.8463735804361457, "grad_norm": 1.0907381772994995, "learning_rate": 6.618431120414015e-07, "loss": 0.0960550308227539, "step": 6074 }, { "epoch": 0.8465129241273601, "grad_norm": 0.5801175236701965, "learning_rate": 6.606717823077669e-07, "loss": 0.07003355026245117, "step": 6075 }, { "epoch": 0.8466522678185745, "grad_norm": 0.8965914845466614, "learning_rate": 6.59501416673794e-07, "loss": 0.07987451553344727, "step": 6076 }, { "epoch": 0.8467916115097889, "grad_norm": 0.8089639544487, "learning_rate": 6.583320153995121e-07, "loss": 0.07290363311767578, "step": 6077 }, { "epoch": 0.8469309552010033, "grad_norm": 1.0145844221115112, "learning_rate": 6.571635787447339e-07, "loss": 0.0835275650024414, "step": 6078 }, { "epoch": 0.8470702988922176, "grad_norm": 1.0430784225463867, "learning_rate": 6.559961069690596e-07, "loss": 0.10030269622802734, "step": 6079 }, { "epoch": 0.847209642583432, "grad_norm": 0.9367116093635559, "learning_rate": 6.548296003318744e-07, "loss": 0.09982872009277344, "step": 6080 }, { "epoch": 0.8473489862746464, "grad_norm": 0.6085363626480103, "learning_rate": 6.536640590923515e-07, "loss": 0.07301473617553711, "step": 6081 }, { "epoch": 0.8474883299658608, "grad_norm": 0.8034475445747375, "learning_rate": 6.52499483509445e-07, "loss": 0.09396696090698242, "step": 6082 }, { "epoch": 0.8476276736570751, "grad_norm": 0.6884410381317139, "learning_rate": 6.51335873841899e-07, "loss": 0.0830087661743164, "step": 6083 }, { "epoch": 0.8477670173482895, "grad_norm": 0.8064174056053162, "learning_rate": 6.501732303482394e-07, "loss": 0.09910392761230469, "step": 6084 }, { "epoch": 0.8479063610395039, "grad_norm": 0.6293338537216187, "learning_rate": 6.490115532867808e-07, "loss": 0.07036018371582031, "step": 6085 }, { "epoch": 0.8480457047307183, "grad_norm": 0.6815739274024963, "learning_rate": 6.478508429156189e-07, "loss": 0.08212661743164062, "step": 6086 }, { "epoch": 0.8481850484219327, "grad_norm": 0.669854462146759, "learning_rate": 6.466910994926384e-07, "loss": 0.06046581268310547, "step": 6087 }, { "epoch": 0.848324392113147, "grad_norm": 0.5596132874488831, "learning_rate": 6.455323232755095e-07, "loss": 0.059439659118652344, "step": 6088 }, { "epoch": 0.8484637358043614, "grad_norm": 0.5227121710777283, "learning_rate": 6.44374514521684e-07, "loss": 0.07342243194580078, "step": 6089 }, { "epoch": 0.8486030794955758, "grad_norm": 0.871825098991394, "learning_rate": 6.432176734883994e-07, "loss": 0.07134532928466797, "step": 6090 }, { "epoch": 0.8487424231867903, "grad_norm": 0.6536204218864441, "learning_rate": 6.420618004326818e-07, "loss": 0.07385444641113281, "step": 6091 }, { "epoch": 0.8488817668780047, "grad_norm": 0.6918785572052002, "learning_rate": 6.409068956113379e-07, "loss": 0.08247184753417969, "step": 6092 }, { "epoch": 0.849021110569219, "grad_norm": 1.0532257556915283, "learning_rate": 6.397529592809615e-07, "loss": 0.08711433410644531, "step": 6093 }, { "epoch": 0.8491604542604334, "grad_norm": 1.3056657314300537, "learning_rate": 6.38599991697933e-07, "loss": 0.102447509765625, "step": 6094 }, { "epoch": 0.8492997979516478, "grad_norm": 0.8545871376991272, "learning_rate": 6.374479931184141e-07, "loss": 0.0731649398803711, "step": 6095 }, { "epoch": 0.8494391416428622, "grad_norm": 0.9123099446296692, "learning_rate": 6.362969637983507e-07, "loss": 0.09046268463134766, "step": 6096 }, { "epoch": 0.8495784853340765, "grad_norm": 0.8451285362243652, "learning_rate": 6.351469039934771e-07, "loss": 0.06908798217773438, "step": 6097 }, { "epoch": 0.8497178290252909, "grad_norm": 0.7431769967079163, "learning_rate": 6.339978139593117e-07, "loss": 0.0969552993774414, "step": 6098 }, { "epoch": 0.8498571727165053, "grad_norm": 0.756733775138855, "learning_rate": 6.328496939511541e-07, "loss": 0.08500957489013672, "step": 6099 }, { "epoch": 0.8499965164077197, "grad_norm": 0.6129223108291626, "learning_rate": 6.317025442240893e-07, "loss": 0.07539081573486328, "step": 6100 }, { "epoch": 0.850135860098934, "grad_norm": 0.7732660174369812, "learning_rate": 6.305563650329899e-07, "loss": 0.08287906646728516, "step": 6101 }, { "epoch": 0.8502752037901484, "grad_norm": 0.9144656658172607, "learning_rate": 6.294111566325106e-07, "loss": 0.06906795501708984, "step": 6102 }, { "epoch": 0.8504145474813628, "grad_norm": 0.7673020958900452, "learning_rate": 6.282669192770896e-07, "loss": 0.0802469253540039, "step": 6103 }, { "epoch": 0.8505538911725772, "grad_norm": 0.6462888121604919, "learning_rate": 6.271236532209502e-07, "loss": 0.0775146484375, "step": 6104 }, { "epoch": 0.8506932348637916, "grad_norm": 0.8896679878234863, "learning_rate": 6.259813587181024e-07, "loss": 0.0940389633178711, "step": 6105 }, { "epoch": 0.8508325785550059, "grad_norm": 0.8624019622802734, "learning_rate": 6.248400360223355e-07, "loss": 0.06921958923339844, "step": 6106 }, { "epoch": 0.8509719222462203, "grad_norm": 0.5683137774467468, "learning_rate": 6.236996853872251e-07, "loss": 0.07517576217651367, "step": 6107 }, { "epoch": 0.8511112659374347, "grad_norm": 0.9733118414878845, "learning_rate": 6.225603070661318e-07, "loss": 0.09433937072753906, "step": 6108 }, { "epoch": 0.8512506096286491, "grad_norm": 0.6229498386383057, "learning_rate": 6.214219013122008e-07, "loss": 0.07543182373046875, "step": 6109 }, { "epoch": 0.8513899533198634, "grad_norm": 0.4572784900665283, "learning_rate": 6.202844683783587e-07, "loss": 0.06757831573486328, "step": 6110 }, { "epoch": 0.8515292970110778, "grad_norm": 0.7379888892173767, "learning_rate": 6.191480085173163e-07, "loss": 0.07861614227294922, "step": 6111 }, { "epoch": 0.8516686407022922, "grad_norm": 0.5041009187698364, "learning_rate": 6.180125219815697e-07, "loss": 0.06126976013183594, "step": 6112 }, { "epoch": 0.8518079843935066, "grad_norm": 0.6499923467636108, "learning_rate": 6.168780090233994e-07, "loss": 0.060967445373535156, "step": 6113 }, { "epoch": 0.851947328084721, "grad_norm": 1.0508099794387817, "learning_rate": 6.157444698948656e-07, "loss": 0.09080123901367188, "step": 6114 }, { "epoch": 0.8520866717759353, "grad_norm": 0.6937904357910156, "learning_rate": 6.146119048478177e-07, "loss": 0.09023857116699219, "step": 6115 }, { "epoch": 0.8522260154671497, "grad_norm": 1.0939884185791016, "learning_rate": 6.134803141338835e-07, "loss": 0.10637569427490234, "step": 6116 }, { "epoch": 0.8523653591583641, "grad_norm": 0.9126864671707153, "learning_rate": 6.123496980044785e-07, "loss": 0.09026622772216797, "step": 6117 }, { "epoch": 0.8525047028495785, "grad_norm": 0.7245039939880371, "learning_rate": 6.112200567107978e-07, "loss": 0.08710765838623047, "step": 6118 }, { "epoch": 0.8526440465407928, "grad_norm": 0.40539467334747314, "learning_rate": 6.10091390503823e-07, "loss": 0.05531024932861328, "step": 6119 }, { "epoch": 0.8527833902320072, "grad_norm": 0.5334213376045227, "learning_rate": 6.089636996343202e-07, "loss": 0.06590652465820312, "step": 6120 }, { "epoch": 0.8529227339232216, "grad_norm": 0.7724472284317017, "learning_rate": 6.07836984352832e-07, "loss": 0.08474959433078766, "step": 6121 }, { "epoch": 0.853062077614436, "grad_norm": 0.7850760817527771, "learning_rate": 6.067112449096907e-07, "loss": 0.06658935546875, "step": 6122 }, { "epoch": 0.8532014213056504, "grad_norm": 0.7084347009658813, "learning_rate": 6.055864815550106e-07, "loss": 0.07871437072753906, "step": 6123 }, { "epoch": 0.8533407649968647, "grad_norm": 0.5471332669258118, "learning_rate": 6.044626945386894e-07, "loss": 0.06803035736083984, "step": 6124 }, { "epoch": 0.8534801086880791, "grad_norm": 0.401618093252182, "learning_rate": 6.033398841104043e-07, "loss": 0.04773426055908203, "step": 6125 }, { "epoch": 0.8536194523792935, "grad_norm": 0.5146939754486084, "learning_rate": 6.022180505196207e-07, "loss": 0.05945777893066406, "step": 6126 }, { "epoch": 0.8537587960705079, "grad_norm": 0.9252511262893677, "learning_rate": 6.01097194015583e-07, "loss": 0.08758544921875, "step": 6127 }, { "epoch": 0.8538981397617222, "grad_norm": 1.1656352281570435, "learning_rate": 5.999773148473193e-07, "loss": 0.08644247055053711, "step": 6128 }, { "epoch": 0.8540374834529366, "grad_norm": 0.608542799949646, "learning_rate": 5.988584132636421e-07, "loss": 0.06194877624511719, "step": 6129 }, { "epoch": 0.854176827144151, "grad_norm": 0.979479968547821, "learning_rate": 5.977404895131467e-07, "loss": 0.1150970458984375, "step": 6130 }, { "epoch": 0.8543161708353654, "grad_norm": 0.5475097298622131, "learning_rate": 5.966235438442086e-07, "loss": 0.07213687896728516, "step": 6131 }, { "epoch": 0.8544555145265799, "grad_norm": 0.6327565312385559, "learning_rate": 5.955075765049878e-07, "loss": 0.08686065673828125, "step": 6132 }, { "epoch": 0.8545948582177942, "grad_norm": 0.6341246962547302, "learning_rate": 5.943925877434276e-07, "loss": 0.07854270935058594, "step": 6133 }, { "epoch": 0.8547342019090086, "grad_norm": 0.7429203391075134, "learning_rate": 5.932785778072531e-07, "loss": 0.06785869598388672, "step": 6134 }, { "epoch": 0.854873545600223, "grad_norm": 0.8801400661468506, "learning_rate": 5.921655469439708e-07, "loss": 0.0996713638305664, "step": 6135 }, { "epoch": 0.8550128892914374, "grad_norm": 0.6732907891273499, "learning_rate": 5.910534954008718e-07, "loss": 0.08026790618896484, "step": 6136 }, { "epoch": 0.8551522329826517, "grad_norm": 1.0939180850982666, "learning_rate": 5.899424234250278e-07, "loss": 0.09172630310058594, "step": 6137 }, { "epoch": 0.8552915766738661, "grad_norm": 0.5101974010467529, "learning_rate": 5.888323312632948e-07, "loss": 0.05655360221862793, "step": 6138 }, { "epoch": 0.8554309203650805, "grad_norm": 0.8016234040260315, "learning_rate": 5.877232191623078e-07, "loss": 0.08158302307128906, "step": 6139 }, { "epoch": 0.8555702640562949, "grad_norm": 0.8661490678787231, "learning_rate": 5.866150873684878e-07, "loss": 0.09567070007324219, "step": 6140 }, { "epoch": 0.8557096077475093, "grad_norm": 0.8024041652679443, "learning_rate": 5.855079361280374e-07, "loss": 0.09804916381835938, "step": 6141 }, { "epoch": 0.8558489514387236, "grad_norm": 0.5881719589233398, "learning_rate": 5.844017656869389e-07, "loss": 0.07973480224609375, "step": 6142 }, { "epoch": 0.855988295129938, "grad_norm": 0.3993369936943054, "learning_rate": 5.83296576290957e-07, "loss": 0.054505109786987305, "step": 6143 }, { "epoch": 0.8561276388211524, "grad_norm": 0.599688708782196, "learning_rate": 5.821923681856406e-07, "loss": 0.07499217987060547, "step": 6144 }, { "epoch": 0.8562669825123668, "grad_norm": 0.39725440740585327, "learning_rate": 5.810891416163211e-07, "loss": 0.056934356689453125, "step": 6145 }, { "epoch": 0.8564063262035811, "grad_norm": 1.1701455116271973, "learning_rate": 5.799868968281075e-07, "loss": 0.11884021759033203, "step": 6146 }, { "epoch": 0.8565456698947955, "grad_norm": 0.6214386820793152, "learning_rate": 5.788856340658966e-07, "loss": 0.06792926788330078, "step": 6147 }, { "epoch": 0.8566850135860099, "grad_norm": 0.6386129856109619, "learning_rate": 5.777853535743605e-07, "loss": 0.061507225036621094, "step": 6148 }, { "epoch": 0.8568243572772243, "grad_norm": 0.8919962644577026, "learning_rate": 5.766860555979586e-07, "loss": 0.06417465209960938, "step": 6149 }, { "epoch": 0.8569637009684387, "grad_norm": 1.417857050895691, "learning_rate": 5.755877403809284e-07, "loss": 0.10371589660644531, "step": 6150 }, { "epoch": 0.857103044659653, "grad_norm": 0.4817652404308319, "learning_rate": 5.744904081672914e-07, "loss": 0.05400514602661133, "step": 6151 }, { "epoch": 0.8572423883508674, "grad_norm": 0.45172736048698425, "learning_rate": 5.733940592008519e-07, "loss": 0.05630683898925781, "step": 6152 }, { "epoch": 0.8573817320420818, "grad_norm": 0.8632796406745911, "learning_rate": 5.72298693725189e-07, "loss": 0.09212589263916016, "step": 6153 }, { "epoch": 0.8575210757332962, "grad_norm": 0.5710351467132568, "learning_rate": 5.712043119836702e-07, "loss": 0.06213045120239258, "step": 6154 }, { "epoch": 0.8576604194245105, "grad_norm": 0.7999300360679626, "learning_rate": 5.701109142194422e-07, "loss": 0.0923004150390625, "step": 6155 }, { "epoch": 0.8577997631157249, "grad_norm": 0.5170500874519348, "learning_rate": 5.69018500675434e-07, "loss": 0.061011314392089844, "step": 6156 }, { "epoch": 0.8579391068069393, "grad_norm": 0.48093608021736145, "learning_rate": 5.679270715943535e-07, "loss": 0.059311866760253906, "step": 6157 }, { "epoch": 0.8580784504981537, "grad_norm": 1.2137759923934937, "learning_rate": 5.668366272186915e-07, "loss": 0.08768081665039062, "step": 6158 }, { "epoch": 0.858217794189368, "grad_norm": 0.48224636912345886, "learning_rate": 5.657471677907205e-07, "loss": 0.06435251235961914, "step": 6159 }, { "epoch": 0.8583571378805824, "grad_norm": 0.7624504566192627, "learning_rate": 5.646586935524922e-07, "loss": 0.07236003875732422, "step": 6160 }, { "epoch": 0.8584964815717968, "grad_norm": 1.0064789056777954, "learning_rate": 5.635712047458419e-07, "loss": 0.08152580261230469, "step": 6161 }, { "epoch": 0.8586358252630112, "grad_norm": 0.48014122247695923, "learning_rate": 5.624847016123847e-07, "loss": 0.06381416320800781, "step": 6162 }, { "epoch": 0.8587751689542256, "grad_norm": 0.7780460119247437, "learning_rate": 5.613991843935179e-07, "loss": 0.08431804180145264, "step": 6163 }, { "epoch": 0.8589145126454399, "grad_norm": 1.0358333587646484, "learning_rate": 5.60314653330416e-07, "loss": 0.07791996002197266, "step": 6164 }, { "epoch": 0.8590538563366543, "grad_norm": 0.6493609547615051, "learning_rate": 5.592311086640379e-07, "loss": 0.0735788345336914, "step": 6165 }, { "epoch": 0.8591932000278687, "grad_norm": 1.2750993967056274, "learning_rate": 5.581485506351242e-07, "loss": 0.07857036590576172, "step": 6166 }, { "epoch": 0.8593325437190831, "grad_norm": 0.9597866535186768, "learning_rate": 5.570669794841921e-07, "loss": 0.09166574478149414, "step": 6167 }, { "epoch": 0.8594718874102975, "grad_norm": 0.7202970385551453, "learning_rate": 5.559863954515448e-07, "loss": 0.08327579498291016, "step": 6168 }, { "epoch": 0.8596112311015118, "grad_norm": 0.8445945978164673, "learning_rate": 5.549067987772605e-07, "loss": 0.07873010635375977, "step": 6169 }, { "epoch": 0.8597505747927262, "grad_norm": 0.8123039603233337, "learning_rate": 5.538281897012032e-07, "loss": 0.06970882415771484, "step": 6170 }, { "epoch": 0.8598899184839406, "grad_norm": 1.4663113355636597, "learning_rate": 5.527505684630136e-07, "loss": 0.07937014102935791, "step": 6171 }, { "epoch": 0.8600292621751551, "grad_norm": 0.5546654462814331, "learning_rate": 5.51673935302115e-07, "loss": 0.06585025787353516, "step": 6172 }, { "epoch": 0.8601686058663695, "grad_norm": 0.8696966767311096, "learning_rate": 5.505982904577123e-07, "loss": 0.0787348747253418, "step": 6173 }, { "epoch": 0.8603079495575838, "grad_norm": 0.60402512550354, "learning_rate": 5.495236341687876e-07, "loss": 0.07355308532714844, "step": 6174 }, { "epoch": 0.8604472932487982, "grad_norm": 0.9471162557601929, "learning_rate": 5.484499666741044e-07, "loss": 0.10889053344726562, "step": 6175 }, { "epoch": 0.8605866369400126, "grad_norm": 0.6832252740859985, "learning_rate": 5.47377288212208e-07, "loss": 0.08038139343261719, "step": 6176 }, { "epoch": 0.860725980631227, "grad_norm": 0.6126812696456909, "learning_rate": 5.463055990214245e-07, "loss": 0.06948089599609375, "step": 6177 }, { "epoch": 0.8608653243224413, "grad_norm": 0.6321056485176086, "learning_rate": 5.452348993398566e-07, "loss": 0.07042741775512695, "step": 6178 }, { "epoch": 0.8610046680136557, "grad_norm": 0.4097440838813782, "learning_rate": 5.441651894053895e-07, "loss": 0.053943634033203125, "step": 6179 }, { "epoch": 0.8611440117048701, "grad_norm": 0.7561411261558533, "learning_rate": 5.430964694556884e-07, "loss": 0.06639385223388672, "step": 6180 }, { "epoch": 0.8612833553960845, "grad_norm": 0.9577592015266418, "learning_rate": 5.420287397282004e-07, "loss": 0.0674276351928711, "step": 6181 }, { "epoch": 0.8614226990872988, "grad_norm": 0.7038933038711548, "learning_rate": 5.409620004601479e-07, "loss": 0.08508014678955078, "step": 6182 }, { "epoch": 0.8615620427785132, "grad_norm": 0.5966281890869141, "learning_rate": 5.398962518885375e-07, "loss": 0.06224632263183594, "step": 6183 }, { "epoch": 0.8617013864697276, "grad_norm": 0.7550960779190063, "learning_rate": 5.388314942501549e-07, "loss": 0.07283210754394531, "step": 6184 }, { "epoch": 0.861840730160942, "grad_norm": 0.9103513360023499, "learning_rate": 5.377677277815646e-07, "loss": 0.09507179260253906, "step": 6185 }, { "epoch": 0.8619800738521564, "grad_norm": 1.0344419479370117, "learning_rate": 5.367049527191093e-07, "loss": 0.09096908569335938, "step": 6186 }, { "epoch": 0.8621194175433707, "grad_norm": 0.7077224254608154, "learning_rate": 5.356431692989144e-07, "loss": 0.06165122985839844, "step": 6187 }, { "epoch": 0.8622587612345851, "grad_norm": 0.9323617815971375, "learning_rate": 5.345823777568859e-07, "loss": 0.08546733856201172, "step": 6188 }, { "epoch": 0.8623981049257995, "grad_norm": 0.8743637800216675, "learning_rate": 5.335225783287051e-07, "loss": 0.11477947235107422, "step": 6189 }, { "epoch": 0.8625374486170139, "grad_norm": 0.5380894541740417, "learning_rate": 5.324637712498359e-07, "loss": 0.06530904769897461, "step": 6190 }, { "epoch": 0.8626767923082282, "grad_norm": 0.5879705548286438, "learning_rate": 5.314059567555213e-07, "loss": 0.08041858673095703, "step": 6191 }, { "epoch": 0.8628161359994426, "grad_norm": 1.30670964717865, "learning_rate": 5.303491350807832e-07, "loss": 0.08584213256835938, "step": 6192 }, { "epoch": 0.862955479690657, "grad_norm": 0.892043948173523, "learning_rate": 5.292933064604228e-07, "loss": 0.08885955810546875, "step": 6193 }, { "epoch": 0.8630948233818714, "grad_norm": 0.4707173705101013, "learning_rate": 5.282384711290228e-07, "loss": 0.05138063430786133, "step": 6194 }, { "epoch": 0.8632341670730858, "grad_norm": 0.6301442980766296, "learning_rate": 5.271846293209426e-07, "loss": 0.054117679595947266, "step": 6195 }, { "epoch": 0.8633735107643001, "grad_norm": 0.8720158934593201, "learning_rate": 5.261317812703204e-07, "loss": 0.0723733901977539, "step": 6196 }, { "epoch": 0.8635128544555145, "grad_norm": 1.059632658958435, "learning_rate": 5.250799272110768e-07, "loss": 0.09883689880371094, "step": 6197 }, { "epoch": 0.8636521981467289, "grad_norm": 0.6415290832519531, "learning_rate": 5.240290673769099e-07, "loss": 0.0880727767944336, "step": 6198 }, { "epoch": 0.8637915418379433, "grad_norm": 0.8281276822090149, "learning_rate": 5.229792020012947e-07, "loss": 0.0841817855834961, "step": 6199 }, { "epoch": 0.8639308855291576, "grad_norm": 0.8149059414863586, "learning_rate": 5.2193033131749e-07, "loss": 0.08226728439331055, "step": 6200 }, { "epoch": 0.864070229220372, "grad_norm": 1.040069818496704, "learning_rate": 5.20882455558529e-07, "loss": 0.07676506042480469, "step": 6201 }, { "epoch": 0.8642095729115864, "grad_norm": 0.7078242897987366, "learning_rate": 5.19835574957227e-07, "loss": 0.06116199493408203, "step": 6202 }, { "epoch": 0.8643489166028008, "grad_norm": 0.6706092953681946, "learning_rate": 5.187896897461752e-07, "loss": 0.059342384338378906, "step": 6203 }, { "epoch": 0.8644882602940152, "grad_norm": 1.3072139024734497, "learning_rate": 5.177448001577468e-07, "loss": 0.08721637725830078, "step": 6204 }, { "epoch": 0.8646276039852295, "grad_norm": 0.674363911151886, "learning_rate": 5.167009064240936e-07, "loss": 0.07906579971313477, "step": 6205 }, { "epoch": 0.8647669476764439, "grad_norm": 0.8475940227508545, "learning_rate": 5.156580087771429e-07, "loss": 0.07268524169921875, "step": 6206 }, { "epoch": 0.8649062913676583, "grad_norm": 0.6932336688041687, "learning_rate": 5.146161074486022e-07, "loss": 0.07461929321289062, "step": 6207 }, { "epoch": 0.8650456350588727, "grad_norm": 0.9813533425331116, "learning_rate": 5.135752026699597e-07, "loss": 0.07405853271484375, "step": 6208 }, { "epoch": 0.865184978750087, "grad_norm": 0.5624791979789734, "learning_rate": 5.125352946724816e-07, "loss": 0.06663703918457031, "step": 6209 }, { "epoch": 0.8653243224413014, "grad_norm": 0.7163691520690918, "learning_rate": 5.114963836872105e-07, "loss": 0.07047224044799805, "step": 6210 }, { "epoch": 0.8654636661325158, "grad_norm": 1.0007925033569336, "learning_rate": 5.104584699449671e-07, "loss": 0.08309602737426758, "step": 6211 }, { "epoch": 0.8656030098237303, "grad_norm": 1.0512781143188477, "learning_rate": 5.094215536763541e-07, "loss": 0.10021400451660156, "step": 6212 }, { "epoch": 0.8657423535149447, "grad_norm": 0.6302140951156616, "learning_rate": 5.083856351117511e-07, "loss": 0.0767812728881836, "step": 6213 }, { "epoch": 0.865881697206159, "grad_norm": 0.8718206882476807, "learning_rate": 5.073507144813139e-07, "loss": 0.0670480728149414, "step": 6214 }, { "epoch": 0.8660210408973734, "grad_norm": 0.6012245416641235, "learning_rate": 5.063167920149797e-07, "loss": 0.06362152099609375, "step": 6215 }, { "epoch": 0.8661603845885878, "grad_norm": 0.47299128770828247, "learning_rate": 5.052838679424609e-07, "loss": 0.05899333953857422, "step": 6216 }, { "epoch": 0.8662997282798022, "grad_norm": 0.6455273628234863, "learning_rate": 5.042519424932512e-07, "loss": 0.08044242858886719, "step": 6217 }, { "epoch": 0.8664390719710166, "grad_norm": 0.6933379769325256, "learning_rate": 5.0322101589662e-07, "loss": 0.06964111328125, "step": 6218 }, { "epoch": 0.8665784156622309, "grad_norm": 1.2029876708984375, "learning_rate": 5.02191088381615e-07, "loss": 0.08540916442871094, "step": 6219 }, { "epoch": 0.8667177593534453, "grad_norm": 0.5677298903465271, "learning_rate": 5.01162160177065e-07, "loss": 0.06219625473022461, "step": 6220 }, { "epoch": 0.8668571030446597, "grad_norm": 0.7416636347770691, "learning_rate": 5.001342315115726e-07, "loss": 0.0827646255493164, "step": 6221 }, { "epoch": 0.8669964467358741, "grad_norm": 0.9843379855155945, "learning_rate": 4.991073026135196e-07, "loss": 0.10355091094970703, "step": 6222 }, { "epoch": 0.8671357904270884, "grad_norm": 1.0793832540512085, "learning_rate": 4.980813737110662e-07, "loss": 0.0928335189819336, "step": 6223 }, { "epoch": 0.8672751341183028, "grad_norm": 0.9694938063621521, "learning_rate": 4.970564450321525e-07, "loss": 0.06834745407104492, "step": 6224 }, { "epoch": 0.8674144778095172, "grad_norm": 0.9494277834892273, "learning_rate": 4.960325168044916e-07, "loss": 0.09517192840576172, "step": 6225 }, { "epoch": 0.8675538215007316, "grad_norm": 0.7046290636062622, "learning_rate": 4.950095892555789e-07, "loss": 0.0888967514038086, "step": 6226 }, { "epoch": 0.867693165191946, "grad_norm": 0.9269615411758423, "learning_rate": 4.93987662612685e-07, "loss": 0.05946636199951172, "step": 6227 }, { "epoch": 0.8678325088831603, "grad_norm": 0.8690692186355591, "learning_rate": 4.929667371028579e-07, "loss": 0.0637049674987793, "step": 6228 }, { "epoch": 0.8679718525743747, "grad_norm": 0.7758440375328064, "learning_rate": 4.919468129529237e-07, "loss": 0.0722665786743164, "step": 6229 }, { "epoch": 0.8681111962655891, "grad_norm": 0.6893408298492432, "learning_rate": 4.909278903894887e-07, "loss": 0.07599067687988281, "step": 6230 }, { "epoch": 0.8682505399568035, "grad_norm": 0.8996530175209045, "learning_rate": 4.89909969638932e-07, "loss": 0.09287834167480469, "step": 6231 }, { "epoch": 0.8683898836480178, "grad_norm": 0.5130107998847961, "learning_rate": 4.888930509274125e-07, "loss": 0.0715036392211914, "step": 6232 }, { "epoch": 0.8685292273392322, "grad_norm": 0.849837601184845, "learning_rate": 4.878771344808664e-07, "loss": 0.06792259216308594, "step": 6233 }, { "epoch": 0.8686685710304466, "grad_norm": 0.7739664316177368, "learning_rate": 4.868622205250089e-07, "loss": 0.07059955596923828, "step": 6234 }, { "epoch": 0.868807914721661, "grad_norm": 0.71004319190979, "learning_rate": 4.858483092853278e-07, "loss": 0.07463264465332031, "step": 6235 }, { "epoch": 0.8689472584128753, "grad_norm": 0.7338085174560547, "learning_rate": 4.848354009870931e-07, "loss": 0.06697845458984375, "step": 6236 }, { "epoch": 0.8690866021040897, "grad_norm": 1.006998896598816, "learning_rate": 4.838234958553501e-07, "loss": 0.09284305572509766, "step": 6237 }, { "epoch": 0.8692259457953041, "grad_norm": 1.323104977607727, "learning_rate": 4.828125941149197e-07, "loss": 0.09007549285888672, "step": 6238 }, { "epoch": 0.8693652894865185, "grad_norm": 0.5733103156089783, "learning_rate": 4.818026959904016e-07, "loss": 0.07998943328857422, "step": 6239 }, { "epoch": 0.8695046331777329, "grad_norm": 0.8065129518508911, "learning_rate": 4.80793801706172e-07, "loss": 0.09095573425292969, "step": 6240 }, { "epoch": 0.8696439768689472, "grad_norm": 0.7336673736572266, "learning_rate": 4.797859114863851e-07, "loss": 0.07247400283813477, "step": 6241 }, { "epoch": 0.8697833205601616, "grad_norm": 0.8295286297798157, "learning_rate": 4.787790255549707e-07, "loss": 0.0883026123046875, "step": 6242 }, { "epoch": 0.869922664251376, "grad_norm": 0.8814952969551086, "learning_rate": 4.777731441356342e-07, "loss": 0.07050895690917969, "step": 6243 }, { "epoch": 0.8700620079425904, "grad_norm": 0.6762577891349792, "learning_rate": 4.7676826745186144e-07, "loss": 0.0852665901184082, "step": 6244 }, { "epoch": 0.8702013516338047, "grad_norm": 0.8124958276748657, "learning_rate": 4.757643957269131e-07, "loss": 0.08133602142333984, "step": 6245 }, { "epoch": 0.8703406953250191, "grad_norm": 0.6171401739120483, "learning_rate": 4.7476152918382535e-07, "loss": 0.061966896057128906, "step": 6246 }, { "epoch": 0.8704800390162335, "grad_norm": 0.9099807739257812, "learning_rate": 4.737596680454137e-07, "loss": 0.07147955894470215, "step": 6247 }, { "epoch": 0.8706193827074479, "grad_norm": 0.6728642582893372, "learning_rate": 4.727588125342669e-07, "loss": 0.06727123260498047, "step": 6248 }, { "epoch": 0.8707587263986623, "grad_norm": 0.6154356598854065, "learning_rate": 4.7175896287275424e-07, "loss": 0.06480026245117188, "step": 6249 }, { "epoch": 0.8708980700898766, "grad_norm": 0.7148872017860413, "learning_rate": 4.7076011928301803e-07, "loss": 0.0713043212890625, "step": 6250 }, { "epoch": 0.871037413781091, "grad_norm": 0.662929356098175, "learning_rate": 4.6976228198697847e-07, "loss": 0.06932449340820312, "step": 6251 }, { "epoch": 0.8711767574723055, "grad_norm": 0.8784526586532593, "learning_rate": 4.687654512063344e-07, "loss": 0.07966232299804688, "step": 6252 }, { "epoch": 0.8713161011635199, "grad_norm": 0.5265050530433655, "learning_rate": 4.6776962716255593e-07, "loss": 0.06020212173461914, "step": 6253 }, { "epoch": 0.8714554448547343, "grad_norm": 0.6777220368385315, "learning_rate": 4.667748100768937e-07, "loss": 0.07748126983642578, "step": 6254 }, { "epoch": 0.8715947885459486, "grad_norm": 0.464958131313324, "learning_rate": 4.657810001703733e-07, "loss": 0.05151557922363281, "step": 6255 }, { "epoch": 0.871734132237163, "grad_norm": 0.7520949244499207, "learning_rate": 4.647881976637975e-07, "loss": 0.10146331787109375, "step": 6256 }, { "epoch": 0.8718734759283774, "grad_norm": 0.6890666484832764, "learning_rate": 4.637964027777425e-07, "loss": 0.0730133056640625, "step": 6257 }, { "epoch": 0.8720128196195918, "grad_norm": 0.7445491552352905, "learning_rate": 4.62805615732565e-07, "loss": 0.08163690567016602, "step": 6258 }, { "epoch": 0.8721521633108061, "grad_norm": 0.7553940415382385, "learning_rate": 4.6181583674839323e-07, "loss": 0.08362674713134766, "step": 6259 }, { "epoch": 0.8722915070020205, "grad_norm": 0.8762204647064209, "learning_rate": 4.6082706604513307e-07, "loss": 0.07827091217041016, "step": 6260 }, { "epoch": 0.8724308506932349, "grad_norm": 1.0582038164138794, "learning_rate": 4.598393038424681e-07, "loss": 0.07919502258300781, "step": 6261 }, { "epoch": 0.8725701943844493, "grad_norm": 0.4371945858001709, "learning_rate": 4.5885255035985675e-07, "loss": 0.0591583251953125, "step": 6262 }, { "epoch": 0.8727095380756636, "grad_norm": 0.9316326379776001, "learning_rate": 4.578668058165325e-07, "loss": 0.09973716735839844, "step": 6263 }, { "epoch": 0.872848881766878, "grad_norm": 0.6998912692070007, "learning_rate": 4.5688207043150467e-07, "loss": 0.08264350891113281, "step": 6264 }, { "epoch": 0.8729882254580924, "grad_norm": 1.0290101766586304, "learning_rate": 4.5589834442355986e-07, "loss": 0.10079813003540039, "step": 6265 }, { "epoch": 0.8731275691493068, "grad_norm": 0.7910116314888, "learning_rate": 4.549156280112599e-07, "loss": 0.08206033706665039, "step": 6266 }, { "epoch": 0.8732669128405212, "grad_norm": 0.6372849941253662, "learning_rate": 4.5393392141294066e-07, "loss": 0.05828857421875, "step": 6267 }, { "epoch": 0.8734062565317355, "grad_norm": 0.7043319344520569, "learning_rate": 4.5295322484671667e-07, "loss": 0.06458187103271484, "step": 6268 }, { "epoch": 0.8735456002229499, "grad_norm": 0.47349417209625244, "learning_rate": 4.519735385304741e-07, "loss": 0.05852556228637695, "step": 6269 }, { "epoch": 0.8736849439141643, "grad_norm": 0.8945528864860535, "learning_rate": 4.509948626818789e-07, "loss": 0.09195613861083984, "step": 6270 }, { "epoch": 0.8738242876053787, "grad_norm": 0.47137847542762756, "learning_rate": 4.500171975183687e-07, "loss": 0.06678962707519531, "step": 6271 }, { "epoch": 0.873963631296593, "grad_norm": 0.6837279200553894, "learning_rate": 4.4904054325715927e-07, "loss": 0.0808262825012207, "step": 6272 }, { "epoch": 0.8741029749878074, "grad_norm": 0.6802668571472168, "learning_rate": 4.4806490011524205e-07, "loss": 0.07382392883300781, "step": 6273 }, { "epoch": 0.8742423186790218, "grad_norm": 1.1051976680755615, "learning_rate": 4.4709026830938194e-07, "loss": 0.0858755111694336, "step": 6274 }, { "epoch": 0.8743816623702362, "grad_norm": 0.9050634503364563, "learning_rate": 4.46116648056118e-07, "loss": 0.08996772766113281, "step": 6275 }, { "epoch": 0.8745210060614506, "grad_norm": 0.7173388600349426, "learning_rate": 4.451440395717682e-07, "loss": 0.06978607177734375, "step": 6276 }, { "epoch": 0.8746603497526649, "grad_norm": 0.8723272085189819, "learning_rate": 4.441724430724248e-07, "loss": 0.0714273452758789, "step": 6277 }, { "epoch": 0.8747996934438793, "grad_norm": 0.617901086807251, "learning_rate": 4.432018587739517e-07, "loss": 0.05942869186401367, "step": 6278 }, { "epoch": 0.8749390371350937, "grad_norm": 0.7553775906562805, "learning_rate": 4.422322868919937e-07, "loss": 0.0834360122680664, "step": 6279 }, { "epoch": 0.8750783808263081, "grad_norm": 0.48236238956451416, "learning_rate": 4.4126372764196457e-07, "loss": 0.06015205383300781, "step": 6280 }, { "epoch": 0.8752177245175224, "grad_norm": 1.1040234565734863, "learning_rate": 4.402961812390588e-07, "loss": 0.08979082107543945, "step": 6281 }, { "epoch": 0.8753570682087368, "grad_norm": 0.7468440532684326, "learning_rate": 4.3932964789824064e-07, "loss": 0.07284164428710938, "step": 6282 }, { "epoch": 0.8754964118999512, "grad_norm": 0.7495017647743225, "learning_rate": 4.3836412783425265e-07, "loss": 0.07378053665161133, "step": 6283 }, { "epoch": 0.8756357555911656, "grad_norm": 0.5844916105270386, "learning_rate": 4.3739962126161273e-07, "loss": 0.06311702728271484, "step": 6284 }, { "epoch": 0.87577509928238, "grad_norm": 0.6229231953620911, "learning_rate": 4.3643612839461057e-07, "loss": 0.07545852661132812, "step": 6285 }, { "epoch": 0.8759144429735943, "grad_norm": 0.8916740417480469, "learning_rate": 4.354736494473122e-07, "loss": 0.08897161483764648, "step": 6286 }, { "epoch": 0.8760537866648087, "grad_norm": 0.5625234842300415, "learning_rate": 4.345121846335593e-07, "loss": 0.06511497497558594, "step": 6287 }, { "epoch": 0.8761931303560231, "grad_norm": 0.8841897249221802, "learning_rate": 4.335517341669676e-07, "loss": 0.09757232666015625, "step": 6288 }, { "epoch": 0.8763324740472375, "grad_norm": 1.1807172298431396, "learning_rate": 4.3259229826092655e-07, "loss": 0.09538555145263672, "step": 6289 }, { "epoch": 0.8764718177384518, "grad_norm": 0.47003626823425293, "learning_rate": 4.316338771286005e-07, "loss": 0.06453895568847656, "step": 6290 }, { "epoch": 0.8766111614296662, "grad_norm": 1.392568588256836, "learning_rate": 4.3067647098293033e-07, "loss": 0.11595916748046875, "step": 6291 }, { "epoch": 0.8767505051208806, "grad_norm": 0.5997268557548523, "learning_rate": 4.29720080036628e-07, "loss": 0.06940674781799316, "step": 6292 }, { "epoch": 0.8768898488120951, "grad_norm": 1.0647342205047607, "learning_rate": 4.2876470450218254e-07, "loss": 0.09581184387207031, "step": 6293 }, { "epoch": 0.8770291925033095, "grad_norm": 0.8521998524665833, "learning_rate": 4.278103445918569e-07, "loss": 0.07645797729492188, "step": 6294 }, { "epoch": 0.8771685361945238, "grad_norm": 0.5995393991470337, "learning_rate": 4.268570005176892e-07, "loss": 0.06576395034790039, "step": 6295 }, { "epoch": 0.8773078798857382, "grad_norm": 0.6588721871376038, "learning_rate": 4.259046724914878e-07, "loss": 0.0688161849975586, "step": 6296 }, { "epoch": 0.8774472235769526, "grad_norm": 0.7151451706886292, "learning_rate": 4.2495336072484015e-07, "loss": 0.07604694366455078, "step": 6297 }, { "epoch": 0.877586567268167, "grad_norm": 0.54990154504776, "learning_rate": 4.240030654291061e-07, "loss": 0.05997657775878906, "step": 6298 }, { "epoch": 0.8777259109593814, "grad_norm": 0.7613615989685059, "learning_rate": 4.2305378681541833e-07, "loss": 0.07145118713378906, "step": 6299 }, { "epoch": 0.8778652546505957, "grad_norm": 0.6382986307144165, "learning_rate": 4.221055250946865e-07, "loss": 0.06508541107177734, "step": 6300 }, { "epoch": 0.8780045983418101, "grad_norm": 1.0817081928253174, "learning_rate": 4.21158280477591e-07, "loss": 0.09665679931640625, "step": 6301 }, { "epoch": 0.8781439420330245, "grad_norm": 0.648161768913269, "learning_rate": 4.202120531745896e-07, "loss": 0.06351661682128906, "step": 6302 }, { "epoch": 0.8782832857242389, "grad_norm": 0.8750203251838684, "learning_rate": 4.192668433959113e-07, "loss": 0.081329345703125, "step": 6303 }, { "epoch": 0.8784226294154532, "grad_norm": 1.011135458946228, "learning_rate": 4.183226513515598e-07, "loss": 0.087188720703125, "step": 6304 }, { "epoch": 0.8785619731066676, "grad_norm": 0.66841059923172, "learning_rate": 4.173794772513151e-07, "loss": 0.06466007232666016, "step": 6305 }, { "epoch": 0.878701316797882, "grad_norm": 1.2439683675765991, "learning_rate": 4.1643732130472737e-07, "loss": 0.07140302658081055, "step": 6306 }, { "epoch": 0.8788406604890964, "grad_norm": 0.7707011699676514, "learning_rate": 4.1549618372112135e-07, "loss": 0.09950065612792969, "step": 6307 }, { "epoch": 0.8789800041803107, "grad_norm": 1.3339980840682983, "learning_rate": 4.1455606470959755e-07, "loss": 0.09596824645996094, "step": 6308 }, { "epoch": 0.8791193478715251, "grad_norm": 0.5121619701385498, "learning_rate": 4.1361696447902944e-07, "loss": 0.07425785064697266, "step": 6309 }, { "epoch": 0.8792586915627395, "grad_norm": 0.6390931606292725, "learning_rate": 4.1267888323806294e-07, "loss": 0.08586549758911133, "step": 6310 }, { "epoch": 0.8793980352539539, "grad_norm": 1.1191941499710083, "learning_rate": 4.117418211951174e-07, "loss": 0.09137916564941406, "step": 6311 }, { "epoch": 0.8795373789451683, "grad_norm": 0.742401659488678, "learning_rate": 4.1080577855838746e-07, "loss": 0.07077884674072266, "step": 6312 }, { "epoch": 0.8796767226363826, "grad_norm": 0.7849133610725403, "learning_rate": 4.098707555358411e-07, "loss": 0.06758308410644531, "step": 6313 }, { "epoch": 0.879816066327597, "grad_norm": 1.1293843984603882, "learning_rate": 4.0893675233521777e-07, "loss": 0.10078811645507812, "step": 6314 }, { "epoch": 0.8799554100188114, "grad_norm": 1.4973918199539185, "learning_rate": 4.080037691640321e-07, "loss": 0.08924293518066406, "step": 6315 }, { "epoch": 0.8800947537100258, "grad_norm": 0.6540220379829407, "learning_rate": 4.070718062295731e-07, "loss": 0.06741905212402344, "step": 6316 }, { "epoch": 0.8802340974012401, "grad_norm": 0.4526790976524353, "learning_rate": 4.0614086373890026e-07, "loss": 0.06436729431152344, "step": 6317 }, { "epoch": 0.8803734410924545, "grad_norm": 0.6872704029083252, "learning_rate": 4.05210941898847e-07, "loss": 0.06161642074584961, "step": 6318 }, { "epoch": 0.8805127847836689, "grad_norm": 0.8102474212646484, "learning_rate": 4.042820409160214e-07, "loss": 0.08274459838867188, "step": 6319 }, { "epoch": 0.8806521284748833, "grad_norm": 0.6365944743156433, "learning_rate": 4.033541609968056e-07, "loss": 0.06947708129882812, "step": 6320 }, { "epoch": 0.8807914721660977, "grad_norm": 0.9239325523376465, "learning_rate": 4.0242730234735184e-07, "loss": 0.09202003479003906, "step": 6321 }, { "epoch": 0.880930815857312, "grad_norm": 0.7836915850639343, "learning_rate": 4.01501465173586e-07, "loss": 0.08136892318725586, "step": 6322 }, { "epoch": 0.8810701595485264, "grad_norm": 0.658012866973877, "learning_rate": 4.005766496812097e-07, "loss": 0.0634603500366211, "step": 6323 }, { "epoch": 0.8812095032397408, "grad_norm": 0.5034682154655457, "learning_rate": 3.9965285607569573e-07, "loss": 0.06470775604248047, "step": 6324 }, { "epoch": 0.8813488469309552, "grad_norm": 0.7473801970481873, "learning_rate": 3.987300845622882e-07, "loss": 0.07646560668945312, "step": 6325 }, { "epoch": 0.8814881906221695, "grad_norm": 0.8630263805389404, "learning_rate": 3.978083353460083e-07, "loss": 0.07339811325073242, "step": 6326 }, { "epoch": 0.8816275343133839, "grad_norm": 0.5490570068359375, "learning_rate": 3.96887608631647e-07, "loss": 0.07029438018798828, "step": 6327 }, { "epoch": 0.8817668780045983, "grad_norm": 0.4857979118824005, "learning_rate": 3.959679046237663e-07, "loss": 0.059510231018066406, "step": 6328 }, { "epoch": 0.8819062216958127, "grad_norm": 0.5940669775009155, "learning_rate": 3.950492235267062e-07, "loss": 0.07247734069824219, "step": 6329 }, { "epoch": 0.882045565387027, "grad_norm": 1.3743025064468384, "learning_rate": 3.9413156554457655e-07, "loss": 0.09004068374633789, "step": 6330 }, { "epoch": 0.8821849090782414, "grad_norm": 0.6858499050140381, "learning_rate": 3.9321493088125774e-07, "loss": 0.07012748718261719, "step": 6331 }, { "epoch": 0.8823242527694558, "grad_norm": 0.522009015083313, "learning_rate": 3.9229931974040844e-07, "loss": 0.0666360855102539, "step": 6332 }, { "epoch": 0.8824635964606703, "grad_norm": 0.7007552981376648, "learning_rate": 3.9138473232545326e-07, "loss": 0.06669998168945312, "step": 6333 }, { "epoch": 0.8826029401518847, "grad_norm": 0.8214578628540039, "learning_rate": 3.9047116883959513e-07, "loss": 0.07606983184814453, "step": 6334 }, { "epoch": 0.882742283843099, "grad_norm": 0.8419836163520813, "learning_rate": 3.895586294858045e-07, "loss": 0.07727622985839844, "step": 6335 }, { "epoch": 0.8828816275343134, "grad_norm": 0.6370621919631958, "learning_rate": 3.886471144668291e-07, "loss": 0.08083629608154297, "step": 6336 }, { "epoch": 0.8830209712255278, "grad_norm": 0.8389537334442139, "learning_rate": 3.8773662398518596e-07, "loss": 0.07858085632324219, "step": 6337 }, { "epoch": 0.8831603149167422, "grad_norm": 0.8675861358642578, "learning_rate": 3.8682715824316594e-07, "loss": 0.08702659606933594, "step": 6338 }, { "epoch": 0.8832996586079566, "grad_norm": 0.906721830368042, "learning_rate": 3.8591871744282973e-07, "loss": 0.07216930389404297, "step": 6339 }, { "epoch": 0.8834390022991709, "grad_norm": 0.7458005547523499, "learning_rate": 3.85011301786013e-07, "loss": 0.0839242935180664, "step": 6340 }, { "epoch": 0.8835783459903853, "grad_norm": 0.5522433519363403, "learning_rate": 3.841049114743239e-07, "loss": 0.06933879852294922, "step": 6341 }, { "epoch": 0.8837176896815997, "grad_norm": 0.6681802272796631, "learning_rate": 3.8319954670914094e-07, "loss": 0.08263969421386719, "step": 6342 }, { "epoch": 0.8838570333728141, "grad_norm": 0.6059678792953491, "learning_rate": 3.8229520769161474e-07, "loss": 0.07244658470153809, "step": 6343 }, { "epoch": 0.8839963770640284, "grad_norm": 0.45456117391586304, "learning_rate": 3.813918946226691e-07, "loss": 0.06327486038208008, "step": 6344 }, { "epoch": 0.8841357207552428, "grad_norm": 1.0111286640167236, "learning_rate": 3.804896077030007e-07, "loss": 0.07574295997619629, "step": 6345 }, { "epoch": 0.8842750644464572, "grad_norm": 0.6187494993209839, "learning_rate": 3.7958834713307524e-07, "loss": 0.06695079803466797, "step": 6346 }, { "epoch": 0.8844144081376716, "grad_norm": 0.6265200972557068, "learning_rate": 3.786881131131348e-07, "loss": 0.06891942024230957, "step": 6347 }, { "epoch": 0.884553751828886, "grad_norm": 0.7745209336280823, "learning_rate": 3.7778890584318773e-07, "loss": 0.0759730339050293, "step": 6348 }, { "epoch": 0.8846930955201003, "grad_norm": 0.8173906803131104, "learning_rate": 3.7689072552301973e-07, "loss": 0.07436180114746094, "step": 6349 }, { "epoch": 0.8848324392113147, "grad_norm": 0.6611725091934204, "learning_rate": 3.759935723521846e-07, "loss": 0.0758657455444336, "step": 6350 }, { "epoch": 0.8849717829025291, "grad_norm": 0.6025267243385315, "learning_rate": 3.7509744653001e-07, "loss": 0.06646156311035156, "step": 6351 }, { "epoch": 0.8851111265937435, "grad_norm": 0.9530943632125854, "learning_rate": 3.742023482555951e-07, "loss": 0.08440876007080078, "step": 6352 }, { "epoch": 0.8852504702849578, "grad_norm": 0.5756753087043762, "learning_rate": 3.7330827772780967e-07, "loss": 0.07035303115844727, "step": 6353 }, { "epoch": 0.8853898139761722, "grad_norm": 0.665399432182312, "learning_rate": 3.7241523514529476e-07, "loss": 0.0699758529663086, "step": 6354 }, { "epoch": 0.8855291576673866, "grad_norm": 0.5440705418586731, "learning_rate": 3.715232207064651e-07, "loss": 0.07771730422973633, "step": 6355 }, { "epoch": 0.885668501358601, "grad_norm": 0.49023547768592834, "learning_rate": 3.7063223460950705e-07, "loss": 0.06633901596069336, "step": 6356 }, { "epoch": 0.8858078450498154, "grad_norm": 0.5283829569816589, "learning_rate": 3.697422770523751e-07, "loss": 0.06803512573242188, "step": 6357 }, { "epoch": 0.8859471887410297, "grad_norm": 0.9651145935058594, "learning_rate": 3.688533482327994e-07, "loss": 0.08394813537597656, "step": 6358 }, { "epoch": 0.8860865324322441, "grad_norm": 0.8455158472061157, "learning_rate": 3.6796544834827865e-07, "loss": 0.07292461395263672, "step": 6359 }, { "epoch": 0.8862258761234585, "grad_norm": 0.7355096936225891, "learning_rate": 3.670785775960839e-07, "loss": 0.06492996215820312, "step": 6360 }, { "epoch": 0.8863652198146729, "grad_norm": 0.6223850846290588, "learning_rate": 3.66192736173257e-07, "loss": 0.0708627700805664, "step": 6361 }, { "epoch": 0.8865045635058872, "grad_norm": 0.627419114112854, "learning_rate": 3.653079242766139e-07, "loss": 0.06982994079589844, "step": 6362 }, { "epoch": 0.8866439071971016, "grad_norm": 1.339910864830017, "learning_rate": 3.6442414210273834e-07, "loss": 0.11167144775390625, "step": 6363 }, { "epoch": 0.886783250888316, "grad_norm": 0.9867777228355408, "learning_rate": 3.6354138984798506e-07, "loss": 0.07604408264160156, "step": 6364 }, { "epoch": 0.8869225945795304, "grad_norm": 1.6202330589294434, "learning_rate": 3.6265966770848314e-07, "loss": 0.10648155212402344, "step": 6365 }, { "epoch": 0.8870619382707448, "grad_norm": 0.8158136606216431, "learning_rate": 3.6177897588013154e-07, "loss": 0.08386754989624023, "step": 6366 }, { "epoch": 0.8872012819619591, "grad_norm": 0.7375971078872681, "learning_rate": 3.608993145585987e-07, "loss": 0.07376861572265625, "step": 6367 }, { "epoch": 0.8873406256531735, "grad_norm": 0.6191962957382202, "learning_rate": 3.600206839393261e-07, "loss": 0.07171249389648438, "step": 6368 }, { "epoch": 0.8874799693443879, "grad_norm": 1.0072609186172485, "learning_rate": 3.591430842175242e-07, "loss": 0.08136749267578125, "step": 6369 }, { "epoch": 0.8876193130356023, "grad_norm": 0.5010581016540527, "learning_rate": 3.5826651558817703e-07, "loss": 0.06400680541992188, "step": 6370 }, { "epoch": 0.8877586567268166, "grad_norm": 0.6273073554039001, "learning_rate": 3.5739097824603665e-07, "loss": 0.0738525390625, "step": 6371 }, { "epoch": 0.887898000418031, "grad_norm": 0.6999067068099976, "learning_rate": 3.5651647238562904e-07, "loss": 0.07610702514648438, "step": 6372 }, { "epoch": 0.8880373441092455, "grad_norm": 0.632998526096344, "learning_rate": 3.5564299820124883e-07, "loss": 0.07190990447998047, "step": 6373 }, { "epoch": 0.8881766878004599, "grad_norm": 1.0007970333099365, "learning_rate": 3.547705558869624e-07, "loss": 0.07606124877929688, "step": 6374 }, { "epoch": 0.8883160314916743, "grad_norm": 0.7149776816368103, "learning_rate": 3.5389914563660475e-07, "loss": 0.06561279296875, "step": 6375 }, { "epoch": 0.8884553751828886, "grad_norm": 0.5685744285583496, "learning_rate": 3.530287676437849e-07, "loss": 0.06067156791687012, "step": 6376 }, { "epoch": 0.888594718874103, "grad_norm": 0.36278867721557617, "learning_rate": 3.5215942210188204e-07, "loss": 0.04955148696899414, "step": 6377 }, { "epoch": 0.8887340625653174, "grad_norm": 0.7505413889884949, "learning_rate": 3.512911092040422e-07, "loss": 0.07010364532470703, "step": 6378 }, { "epoch": 0.8888734062565318, "grad_norm": 1.0256162881851196, "learning_rate": 3.5042382914318716e-07, "loss": 0.07682132720947266, "step": 6379 }, { "epoch": 0.8890127499477462, "grad_norm": 0.604225218296051, "learning_rate": 3.495575821120045e-07, "loss": 0.06560516357421875, "step": 6380 }, { "epoch": 0.8891520936389605, "grad_norm": 0.6373326182365417, "learning_rate": 3.4869236830295695e-07, "loss": 0.07343482971191406, "step": 6381 }, { "epoch": 0.8892914373301749, "grad_norm": 0.7316504716873169, "learning_rate": 3.478281879082729e-07, "loss": 0.06908202916383743, "step": 6382 }, { "epoch": 0.8894307810213893, "grad_norm": 0.8922885060310364, "learning_rate": 3.469650411199543e-07, "loss": 0.09959602355957031, "step": 6383 }, { "epoch": 0.8895701247126037, "grad_norm": 0.7639334797859192, "learning_rate": 3.4610292812977454e-07, "loss": 0.07467269897460938, "step": 6384 }, { "epoch": 0.889709468403818, "grad_norm": 0.4429170489311218, "learning_rate": 3.452418491292731e-07, "loss": 0.06348848342895508, "step": 6385 }, { "epoch": 0.8898488120950324, "grad_norm": 1.3202621936798096, "learning_rate": 3.4438180430976243e-07, "loss": 0.11600875854492188, "step": 6386 }, { "epoch": 0.8899881557862468, "grad_norm": 0.8790245652198792, "learning_rate": 3.4352279386232535e-07, "loss": 0.0872182846069336, "step": 6387 }, { "epoch": 0.8901274994774612, "grad_norm": 0.6504204869270325, "learning_rate": 3.426648179778147e-07, "loss": 0.0723114013671875, "step": 6388 }, { "epoch": 0.8902668431686755, "grad_norm": 0.7415354251861572, "learning_rate": 3.4180787684685246e-07, "loss": 0.07803058624267578, "step": 6389 }, { "epoch": 0.8904061868598899, "grad_norm": 0.6235186457633972, "learning_rate": 3.409519706598324e-07, "loss": 0.07941055297851562, "step": 6390 }, { "epoch": 0.8905455305511043, "grad_norm": 0.6965846419334412, "learning_rate": 3.400970996069164e-07, "loss": 0.06451988220214844, "step": 6391 }, { "epoch": 0.8906848742423187, "grad_norm": 0.5547088384628296, "learning_rate": 3.392432638780363e-07, "loss": 0.05474376678466797, "step": 6392 }, { "epoch": 0.8908242179335331, "grad_norm": 0.4707104563713074, "learning_rate": 3.383904636628965e-07, "loss": 0.06120109558105469, "step": 6393 }, { "epoch": 0.8909635616247474, "grad_norm": 0.8219490647315979, "learning_rate": 3.3753869915096936e-07, "loss": 0.08092880249023438, "step": 6394 }, { "epoch": 0.8911029053159618, "grad_norm": 0.7232023477554321, "learning_rate": 3.3668797053149907e-07, "loss": 0.08660316467285156, "step": 6395 }, { "epoch": 0.8912422490071762, "grad_norm": 0.6345046162605286, "learning_rate": 3.3583827799349486e-07, "loss": 0.07802104949951172, "step": 6396 }, { "epoch": 0.8913815926983906, "grad_norm": 0.8739301562309265, "learning_rate": 3.3498962172574033e-07, "loss": 0.10373497009277344, "step": 6397 }, { "epoch": 0.891520936389605, "grad_norm": 0.558605968952179, "learning_rate": 3.3414200191678903e-07, "loss": 0.06443500518798828, "step": 6398 }, { "epoch": 0.8916602800808193, "grad_norm": 1.555112361907959, "learning_rate": 3.332954187549603e-07, "loss": 0.09093189239501953, "step": 6399 }, { "epoch": 0.8917996237720337, "grad_norm": 0.4971620738506317, "learning_rate": 3.3244987242834816e-07, "loss": 0.06550788879394531, "step": 6400 }, { "epoch": 0.8919389674632481, "grad_norm": 0.4403489828109741, "learning_rate": 3.3160536312481174e-07, "loss": 0.06651592254638672, "step": 6401 }, { "epoch": 0.8920783111544625, "grad_norm": 0.5172805190086365, "learning_rate": 3.3076189103198265e-07, "loss": 0.0691065788269043, "step": 6402 }, { "epoch": 0.8922176548456768, "grad_norm": 0.689906895160675, "learning_rate": 3.299194563372604e-07, "loss": 0.07578849792480469, "step": 6403 }, { "epoch": 0.8923569985368912, "grad_norm": 0.7723705172538757, "learning_rate": 3.290780592278148e-07, "loss": 0.08518600463867188, "step": 6404 }, { "epoch": 0.8924963422281056, "grad_norm": 0.6672233939170837, "learning_rate": 3.2823769989058674e-07, "loss": 0.07429313659667969, "step": 6405 }, { "epoch": 0.89263568591932, "grad_norm": 0.530303418636322, "learning_rate": 3.2739837851228306e-07, "loss": 0.06425619125366211, "step": 6406 }, { "epoch": 0.8927750296105343, "grad_norm": 0.47388574481010437, "learning_rate": 3.265600952793818e-07, "loss": 0.060853004455566406, "step": 6407 }, { "epoch": 0.8929143733017487, "grad_norm": 0.5617201924324036, "learning_rate": 3.2572285037813123e-07, "loss": 0.06688308715820312, "step": 6408 }, { "epoch": 0.8930537169929631, "grad_norm": 0.866465151309967, "learning_rate": 3.248866439945486e-07, "loss": 0.08619880676269531, "step": 6409 }, { "epoch": 0.8931930606841775, "grad_norm": 0.61720210313797, "learning_rate": 3.2405147631441757e-07, "loss": 0.056432515382766724, "step": 6410 }, { "epoch": 0.8933324043753919, "grad_norm": 0.9124616980552673, "learning_rate": 3.232173475232964e-07, "loss": 0.080810546875, "step": 6411 }, { "epoch": 0.8934717480666062, "grad_norm": 0.7081686854362488, "learning_rate": 3.2238425780650617e-07, "loss": 0.06876945495605469, "step": 6412 }, { "epoch": 0.8936110917578207, "grad_norm": 0.7011188268661499, "learning_rate": 3.215522073491434e-07, "loss": 0.06544780731201172, "step": 6413 }, { "epoch": 0.8937504354490351, "grad_norm": 0.6249575018882751, "learning_rate": 3.2072119633606845e-07, "loss": 0.0783843994140625, "step": 6414 }, { "epoch": 0.8938897791402495, "grad_norm": 1.0019181966781616, "learning_rate": 3.198912249519143e-07, "loss": 0.06441593170166016, "step": 6415 }, { "epoch": 0.8940291228314639, "grad_norm": 0.5305657386779785, "learning_rate": 3.190622933810816e-07, "loss": 0.05667901039123535, "step": 6416 }, { "epoch": 0.8941684665226782, "grad_norm": 0.7190068364143372, "learning_rate": 3.182344018077399e-07, "loss": 0.07057547569274902, "step": 6417 }, { "epoch": 0.8943078102138926, "grad_norm": 0.6438402533531189, "learning_rate": 3.1740755041582694e-07, "loss": 0.07133960723876953, "step": 6418 }, { "epoch": 0.894447153905107, "grad_norm": 0.6497985124588013, "learning_rate": 3.1658173938905023e-07, "loss": 0.06988716125488281, "step": 6419 }, { "epoch": 0.8945864975963214, "grad_norm": 0.9331527948379517, "learning_rate": 3.1575696891088804e-07, "loss": 0.08859825134277344, "step": 6420 }, { "epoch": 0.8947258412875357, "grad_norm": 0.7372360825538635, "learning_rate": 3.149332391645843e-07, "loss": 0.0637965202331543, "step": 6421 }, { "epoch": 0.8948651849787501, "grad_norm": 0.7147362232208252, "learning_rate": 3.1411055033315207e-07, "loss": 0.08591175079345703, "step": 6422 }, { "epoch": 0.8950045286699645, "grad_norm": 1.0048208236694336, "learning_rate": 3.132889025993746e-07, "loss": 0.0774688720703125, "step": 6423 }, { "epoch": 0.8951438723611789, "grad_norm": 0.7359863519668579, "learning_rate": 3.1246829614580476e-07, "loss": 0.0787196159362793, "step": 6424 }, { "epoch": 0.8952832160523932, "grad_norm": 0.9747664332389832, "learning_rate": 3.1164873115476056e-07, "loss": 0.0660099983215332, "step": 6425 }, { "epoch": 0.8954225597436076, "grad_norm": 0.5012891292572021, "learning_rate": 3.1083020780833137e-07, "loss": 0.06270122528076172, "step": 6426 }, { "epoch": 0.895561903434822, "grad_norm": 1.008649468421936, "learning_rate": 3.1001272628837565e-07, "loss": 0.08696126937866211, "step": 6427 }, { "epoch": 0.8957012471260364, "grad_norm": 0.5572807788848877, "learning_rate": 3.0919628677651636e-07, "loss": 0.0610198974609375, "step": 6428 }, { "epoch": 0.8958405908172508, "grad_norm": 0.5429809093475342, "learning_rate": 3.083808894541496e-07, "loss": 0.07230806350708008, "step": 6429 }, { "epoch": 0.8959799345084651, "grad_norm": 0.7653356790542603, "learning_rate": 3.075665345024387e-07, "loss": 0.07925224304199219, "step": 6430 }, { "epoch": 0.8961192781996795, "grad_norm": 0.7032045125961304, "learning_rate": 3.0675322210231227e-07, "loss": 0.07218742370605469, "step": 6431 }, { "epoch": 0.8962586218908939, "grad_norm": 0.8659231662750244, "learning_rate": 3.0594095243447254e-07, "loss": 0.08087730407714844, "step": 6432 }, { "epoch": 0.8963979655821083, "grad_norm": 0.7823147773742676, "learning_rate": 3.0512972567938505e-07, "loss": 0.06999588012695312, "step": 6433 }, { "epoch": 0.8965373092733226, "grad_norm": 0.8838520050048828, "learning_rate": 3.043195420172879e-07, "loss": 0.09006309509277344, "step": 6434 }, { "epoch": 0.896676652964537, "grad_norm": 0.8415012359619141, "learning_rate": 3.035104016281831e-07, "loss": 0.08569622039794922, "step": 6435 }, { "epoch": 0.8968159966557514, "grad_norm": 0.7887405157089233, "learning_rate": 3.027023046918448e-07, "loss": 0.0899215042591095, "step": 6436 }, { "epoch": 0.8969553403469658, "grad_norm": 1.1711738109588623, "learning_rate": 3.018952513878137e-07, "loss": 0.09528732299804688, "step": 6437 }, { "epoch": 0.8970946840381802, "grad_norm": 0.6502234935760498, "learning_rate": 3.010892418953981e-07, "loss": 0.07984542846679688, "step": 6438 }, { "epoch": 0.8972340277293945, "grad_norm": 0.7348859310150146, "learning_rate": 3.0028427639367475e-07, "loss": 0.07678413391113281, "step": 6439 }, { "epoch": 0.8973733714206089, "grad_norm": 0.8925193548202515, "learning_rate": 2.994803550614883e-07, "loss": 0.08466053009033203, "step": 6440 }, { "epoch": 0.8975127151118233, "grad_norm": 1.7771658897399902, "learning_rate": 2.9867747807745315e-07, "loss": 0.1098785400390625, "step": 6441 }, { "epoch": 0.8976520588030377, "grad_norm": 0.8814509510993958, "learning_rate": 2.978756456199494e-07, "loss": 0.08015155792236328, "step": 6442 }, { "epoch": 0.897791402494252, "grad_norm": 0.6575514078140259, "learning_rate": 2.970748578671251e-07, "loss": 0.0822896957397461, "step": 6443 }, { "epoch": 0.8979307461854664, "grad_norm": 0.627332866191864, "learning_rate": 2.9627511499689787e-07, "loss": 0.060187339782714844, "step": 6444 }, { "epoch": 0.8980700898766808, "grad_norm": 0.7623525261878967, "learning_rate": 2.9547641718695285e-07, "loss": 0.0747518539428711, "step": 6445 }, { "epoch": 0.8982094335678952, "grad_norm": 0.8777163624763489, "learning_rate": 2.946787646147414e-07, "loss": 0.09177541732788086, "step": 6446 }, { "epoch": 0.8983487772591096, "grad_norm": 0.7936050295829773, "learning_rate": 2.9388215745748347e-07, "loss": 0.068939208984375, "step": 6447 }, { "epoch": 0.8984881209503239, "grad_norm": 0.8314614295959473, "learning_rate": 2.9308659589216913e-07, "loss": 0.07371711730957031, "step": 6448 }, { "epoch": 0.8986274646415383, "grad_norm": 0.6077713966369629, "learning_rate": 2.92292080095552e-07, "loss": 0.0739288330078125, "step": 6449 }, { "epoch": 0.8987668083327527, "grad_norm": 0.8715851902961731, "learning_rate": 2.9149861024415526e-07, "loss": 0.08565330505371094, "step": 6450 }, { "epoch": 0.8989061520239671, "grad_norm": 0.6006270051002502, "learning_rate": 2.9070618651427073e-07, "loss": 0.06874752044677734, "step": 6451 }, { "epoch": 0.8990454957151814, "grad_norm": 1.308287501335144, "learning_rate": 2.89914809081957e-07, "loss": 0.07719659805297852, "step": 6452 }, { "epoch": 0.8991848394063959, "grad_norm": 0.5539557337760925, "learning_rate": 2.8912447812303956e-07, "loss": 0.07212257385253906, "step": 6453 }, { "epoch": 0.8993241830976103, "grad_norm": 0.9611948132514954, "learning_rate": 2.8833519381311127e-07, "loss": 0.10104656219482422, "step": 6454 }, { "epoch": 0.8994635267888247, "grad_norm": 1.1614673137664795, "learning_rate": 2.8754695632753406e-07, "loss": 0.09031295776367188, "step": 6455 }, { "epoch": 0.8996028704800391, "grad_norm": 0.6880174279212952, "learning_rate": 2.867597658414367e-07, "loss": 0.0786123275756836, "step": 6456 }, { "epoch": 0.8997422141712534, "grad_norm": 0.8401290774345398, "learning_rate": 2.859736225297133e-07, "loss": 0.08872413635253906, "step": 6457 }, { "epoch": 0.8998815578624678, "grad_norm": 0.7830359935760498, "learning_rate": 2.8518852656702845e-07, "loss": 0.0722494125366211, "step": 6458 }, { "epoch": 0.9000209015536822, "grad_norm": 1.0119059085845947, "learning_rate": 2.844044781278127e-07, "loss": 0.09801864624023438, "step": 6459 }, { "epoch": 0.9001602452448966, "grad_norm": 0.7812474966049194, "learning_rate": 2.836214773862617e-07, "loss": 0.08437538146972656, "step": 6460 }, { "epoch": 0.900299588936111, "grad_norm": 0.8361213803291321, "learning_rate": 2.828395245163418e-07, "loss": 0.0828094482421875, "step": 6461 }, { "epoch": 0.9004389326273253, "grad_norm": 0.9899889826774597, "learning_rate": 2.820586196917857e-07, "loss": 0.09557056427001953, "step": 6462 }, { "epoch": 0.9005782763185397, "grad_norm": 0.5574080944061279, "learning_rate": 2.812787630860919e-07, "loss": 0.06647396087646484, "step": 6463 }, { "epoch": 0.9007176200097541, "grad_norm": 0.7377720475196838, "learning_rate": 2.8049995487252625e-07, "loss": 0.08669137954711914, "step": 6464 }, { "epoch": 0.9008569637009685, "grad_norm": 0.5931088328361511, "learning_rate": 2.7972219522412194e-07, "loss": 0.058307647705078125, "step": 6465 }, { "epoch": 0.9009963073921828, "grad_norm": 0.7924218773841858, "learning_rate": 2.789454843136813e-07, "loss": 0.07424020767211914, "step": 6466 }, { "epoch": 0.9011356510833972, "grad_norm": 0.7610228061676025, "learning_rate": 2.7816982231376964e-07, "loss": 0.07093620300292969, "step": 6467 }, { "epoch": 0.9012749947746116, "grad_norm": 0.8347461819648743, "learning_rate": 2.773952093967225e-07, "loss": 0.09599876403808594, "step": 6468 }, { "epoch": 0.901414338465826, "grad_norm": 0.4893519878387451, "learning_rate": 2.7662164573464156e-07, "loss": 0.06893539428710938, "step": 6469 }, { "epoch": 0.9015536821570403, "grad_norm": 0.6246331334114075, "learning_rate": 2.758491314993944e-07, "loss": 0.08380413055419922, "step": 6470 }, { "epoch": 0.9016930258482547, "grad_norm": 0.7554564476013184, "learning_rate": 2.750776668626148e-07, "loss": 0.08066654205322266, "step": 6471 }, { "epoch": 0.9018323695394691, "grad_norm": 0.839844822883606, "learning_rate": 2.743072519957063e-07, "loss": 0.07648086547851562, "step": 6472 }, { "epoch": 0.9019717132306835, "grad_norm": 0.6840061545372009, "learning_rate": 2.73537887069838e-07, "loss": 0.07252216339111328, "step": 6473 }, { "epoch": 0.9021110569218979, "grad_norm": 0.387497216463089, "learning_rate": 2.7276957225594367e-07, "loss": 0.05170392990112305, "step": 6474 }, { "epoch": 0.9022504006131122, "grad_norm": 0.899621307849884, "learning_rate": 2.7200230772472526e-07, "loss": 0.08684253692626953, "step": 6475 }, { "epoch": 0.9023897443043266, "grad_norm": 0.95391845703125, "learning_rate": 2.712360936466524e-07, "loss": 0.0887002944946289, "step": 6476 }, { "epoch": 0.902529087995541, "grad_norm": 0.755724310874939, "learning_rate": 2.704709301919606e-07, "loss": 0.08130073547363281, "step": 6477 }, { "epoch": 0.9026684316867554, "grad_norm": 0.98777836561203, "learning_rate": 2.6970681753065e-07, "loss": 0.09546375274658203, "step": 6478 }, { "epoch": 0.9028077753779697, "grad_norm": 0.8846080303192139, "learning_rate": 2.6894375583249144e-07, "loss": 0.07971954345703125, "step": 6479 }, { "epoch": 0.9029471190691841, "grad_norm": 0.5465882420539856, "learning_rate": 2.681817452670171e-07, "loss": 0.06237602233886719, "step": 6480 }, { "epoch": 0.9030864627603985, "grad_norm": 0.6226569414138794, "learning_rate": 2.6742078600353106e-07, "loss": 0.06679701805114746, "step": 6481 }, { "epoch": 0.9032258064516129, "grad_norm": 1.2236316204071045, "learning_rate": 2.6666087821109855e-07, "loss": 0.09796428680419922, "step": 6482 }, { "epoch": 0.9033651501428273, "grad_norm": 0.5421836972236633, "learning_rate": 2.6590202205855506e-07, "loss": 0.0736227035522461, "step": 6483 }, { "epoch": 0.9035044938340416, "grad_norm": 0.8158102035522461, "learning_rate": 2.6514421771450194e-07, "loss": 0.08737850189208984, "step": 6484 }, { "epoch": 0.903643837525256, "grad_norm": 0.8543972373008728, "learning_rate": 2.6438746534730497e-07, "loss": 0.07638359069824219, "step": 6485 }, { "epoch": 0.9037831812164704, "grad_norm": 0.9827067852020264, "learning_rate": 2.6363176512509637e-07, "loss": 0.08689069747924805, "step": 6486 }, { "epoch": 0.9039225249076848, "grad_norm": 0.8741369843482971, "learning_rate": 2.628771172157768e-07, "loss": 0.07576274871826172, "step": 6487 }, { "epoch": 0.9040618685988991, "grad_norm": 0.6640124320983887, "learning_rate": 2.621235217870116e-07, "loss": 0.0679159164428711, "step": 6488 }, { "epoch": 0.9042012122901135, "grad_norm": 0.7825855612754822, "learning_rate": 2.6137097900623185e-07, "loss": 0.07217597961425781, "step": 6489 }, { "epoch": 0.9043405559813279, "grad_norm": 0.7758253812789917, "learning_rate": 2.6061948904063663e-07, "loss": 0.08438777923583984, "step": 6490 }, { "epoch": 0.9044798996725423, "grad_norm": 0.8985232710838318, "learning_rate": 2.598690520571889e-07, "loss": 0.07754802703857422, "step": 6491 }, { "epoch": 0.9046192433637567, "grad_norm": 0.7535973787307739, "learning_rate": 2.591196682226182e-07, "loss": 0.0699167251586914, "step": 6492 }, { "epoch": 0.904758587054971, "grad_norm": 0.6515409350395203, "learning_rate": 2.5837133770342135e-07, "loss": 0.06493854522705078, "step": 6493 }, { "epoch": 0.9048979307461855, "grad_norm": 0.9494110941886902, "learning_rate": 2.5762406066585976e-07, "loss": 0.07810592651367188, "step": 6494 }, { "epoch": 0.9050372744373999, "grad_norm": 0.4206370711326599, "learning_rate": 2.568778372759628e-07, "loss": 0.051178932189941406, "step": 6495 }, { "epoch": 0.9051766181286143, "grad_norm": 0.7938984632492065, "learning_rate": 2.5613266769952183e-07, "loss": 0.07015228271484375, "step": 6496 }, { "epoch": 0.9053159618198287, "grad_norm": 0.5333216190338135, "learning_rate": 2.5538855210209823e-07, "loss": 0.06280803680419922, "step": 6497 }, { "epoch": 0.905455305511043, "grad_norm": 0.9134247899055481, "learning_rate": 2.54645490649017e-07, "loss": 0.08028030395507812, "step": 6498 }, { "epoch": 0.9055946492022574, "grad_norm": 1.0256379842758179, "learning_rate": 2.5390348350536887e-07, "loss": 0.0754861831665039, "step": 6499 }, { "epoch": 0.9057339928934718, "grad_norm": 0.9645101428031921, "learning_rate": 2.531625308360125e-07, "loss": 0.0845332145690918, "step": 6500 }, { "epoch": 0.9058733365846862, "grad_norm": 1.003507137298584, "learning_rate": 2.52422632805569e-07, "loss": 0.09719276428222656, "step": 6501 }, { "epoch": 0.9060126802759005, "grad_norm": 0.523685872554779, "learning_rate": 2.5168378957842797e-07, "loss": 0.06150054931640625, "step": 6502 }, { "epoch": 0.9061520239671149, "grad_norm": 0.5471355319023132, "learning_rate": 2.5094600131874205e-07, "loss": 0.062458038330078125, "step": 6503 }, { "epoch": 0.9062913676583293, "grad_norm": 0.5787297487258911, "learning_rate": 2.5020926819043223e-07, "loss": 0.05922222137451172, "step": 6504 }, { "epoch": 0.9064307113495437, "grad_norm": 0.5716431140899658, "learning_rate": 2.4947359035718434e-07, "loss": 0.07314014434814453, "step": 6505 }, { "epoch": 0.906570055040758, "grad_norm": 0.5792697668075562, "learning_rate": 2.487389679824481e-07, "loss": 0.07796478271484375, "step": 6506 }, { "epoch": 0.9067093987319724, "grad_norm": 0.7522451877593994, "learning_rate": 2.4800540122943915e-07, "loss": 0.07687091827392578, "step": 6507 }, { "epoch": 0.9068487424231868, "grad_norm": 0.8865262866020203, "learning_rate": 2.4727289026114043e-07, "loss": 0.07854318618774414, "step": 6508 }, { "epoch": 0.9069880861144012, "grad_norm": 0.6037786602973938, "learning_rate": 2.4654143524029896e-07, "loss": 0.08007097244262695, "step": 6509 }, { "epoch": 0.9071274298056156, "grad_norm": 0.772296667098999, "learning_rate": 2.4581103632942747e-07, "loss": 0.08645963668823242, "step": 6510 }, { "epoch": 0.9072667734968299, "grad_norm": 0.5819961428642273, "learning_rate": 2.4508169369080404e-07, "loss": 0.0684967041015625, "step": 6511 }, { "epoch": 0.9074061171880443, "grad_norm": 0.5729756951332092, "learning_rate": 2.443534074864706e-07, "loss": 0.06895875930786133, "step": 6512 }, { "epoch": 0.9075454608792587, "grad_norm": 0.9596138000488281, "learning_rate": 2.436261778782378e-07, "loss": 0.06975364685058594, "step": 6513 }, { "epoch": 0.9076848045704731, "grad_norm": 0.6401828527450562, "learning_rate": 2.4290000502767755e-07, "loss": 0.0720529556274414, "step": 6514 }, { "epoch": 0.9078241482616874, "grad_norm": 0.80306077003479, "learning_rate": 2.421748890961301e-07, "loss": 0.07473087310791016, "step": 6515 }, { "epoch": 0.9079634919529018, "grad_norm": 1.1200084686279297, "learning_rate": 2.4145083024469996e-07, "loss": 0.08870458602905273, "step": 6516 }, { "epoch": 0.9081028356441162, "grad_norm": 0.6423620581626892, "learning_rate": 2.407278286342557e-07, "loss": 0.0748753547668457, "step": 6517 }, { "epoch": 0.9082421793353306, "grad_norm": 0.9653897285461426, "learning_rate": 2.40005884425431e-07, "loss": 0.1058359146118164, "step": 6518 }, { "epoch": 0.908381523026545, "grad_norm": 0.4154556095600128, "learning_rate": 2.39284997778626e-07, "loss": 0.059012413024902344, "step": 6519 }, { "epoch": 0.9085208667177593, "grad_norm": 0.5478076338768005, "learning_rate": 2.3856516885400693e-07, "loss": 0.06687545776367188, "step": 6520 }, { "epoch": 0.9086602104089737, "grad_norm": 0.6034834384918213, "learning_rate": 2.3784639781150143e-07, "loss": 0.06898307800292969, "step": 6521 }, { "epoch": 0.9087995541001881, "grad_norm": 0.7160298228263855, "learning_rate": 2.3712868481080397e-07, "loss": 0.08051681518554688, "step": 6522 }, { "epoch": 0.9089388977914025, "grad_norm": 0.7968335747718811, "learning_rate": 2.364120300113748e-07, "loss": 0.07460212707519531, "step": 6523 }, { "epoch": 0.9090782414826168, "grad_norm": 0.9374685883522034, "learning_rate": 2.356964335724382e-07, "loss": 0.07692813873291016, "step": 6524 }, { "epoch": 0.9092175851738312, "grad_norm": 0.6071920394897461, "learning_rate": 2.3498189565298312e-07, "loss": 0.07749748229980469, "step": 6525 }, { "epoch": 0.9093569288650456, "grad_norm": 0.6212709546089172, "learning_rate": 2.3426841641176311e-07, "loss": 0.07180595397949219, "step": 6526 }, { "epoch": 0.90949627255626, "grad_norm": 0.5516761541366577, "learning_rate": 2.3355599600729916e-07, "loss": 0.06830978393554688, "step": 6527 }, { "epoch": 0.9096356162474744, "grad_norm": 0.4810609519481659, "learning_rate": 2.328446345978713e-07, "loss": 0.07004451751708984, "step": 6528 }, { "epoch": 0.9097749599386887, "grad_norm": 0.9029502868652344, "learning_rate": 2.3213433234152982e-07, "loss": 0.06866216659545898, "step": 6529 }, { "epoch": 0.9099143036299031, "grad_norm": 0.7892801761627197, "learning_rate": 2.3142508939608844e-07, "loss": 0.08699417114257812, "step": 6530 }, { "epoch": 0.9100536473211175, "grad_norm": 0.8753257393836975, "learning_rate": 2.3071690591912277e-07, "loss": 0.08552074432373047, "step": 6531 }, { "epoch": 0.9101929910123319, "grad_norm": 0.6865747570991516, "learning_rate": 2.3000978206797697e-07, "loss": 0.06919670104980469, "step": 6532 }, { "epoch": 0.9103323347035462, "grad_norm": 0.8986250162124634, "learning_rate": 2.2930371799975593e-07, "loss": 0.07178878784179688, "step": 6533 }, { "epoch": 0.9104716783947607, "grad_norm": 0.7724021673202515, "learning_rate": 2.2859871387133248e-07, "loss": 0.07364177703857422, "step": 6534 }, { "epoch": 0.9106110220859751, "grad_norm": 1.329505443572998, "learning_rate": 2.2789476983934133e-07, "loss": 0.08843135833740234, "step": 6535 }, { "epoch": 0.9107503657771895, "grad_norm": 0.5913036465644836, "learning_rate": 2.271918860601835e-07, "loss": 0.07513713836669922, "step": 6536 }, { "epoch": 0.9108897094684039, "grad_norm": 1.0993150472640991, "learning_rate": 2.2649006269002406e-07, "loss": 0.09452009201049805, "step": 6537 }, { "epoch": 0.9110290531596182, "grad_norm": 0.6125795245170593, "learning_rate": 2.257892998847916e-07, "loss": 0.0753164291381836, "step": 6538 }, { "epoch": 0.9111683968508326, "grad_norm": 0.867601215839386, "learning_rate": 2.250895978001788e-07, "loss": 0.0875544548034668, "step": 6539 }, { "epoch": 0.911307740542047, "grad_norm": 1.2775412797927856, "learning_rate": 2.2439095659164467e-07, "loss": 0.07979106903076172, "step": 6540 }, { "epoch": 0.9114470842332614, "grad_norm": 0.9566131234169006, "learning_rate": 2.236933764144117e-07, "loss": 0.07972145080566406, "step": 6541 }, { "epoch": 0.9115864279244758, "grad_norm": 0.8067091107368469, "learning_rate": 2.2299685742346423e-07, "loss": 0.07751846313476562, "step": 6542 }, { "epoch": 0.9117257716156901, "grad_norm": 0.8875723481178284, "learning_rate": 2.223013997735557e-07, "loss": 0.07157571613788605, "step": 6543 }, { "epoch": 0.9118651153069045, "grad_norm": 0.7147921919822693, "learning_rate": 2.2160700361919807e-07, "loss": 0.07562780380249023, "step": 6544 }, { "epoch": 0.9120044589981189, "grad_norm": 0.5757437944412231, "learning_rate": 2.2091366911467238e-07, "loss": 0.07391643524169922, "step": 6545 }, { "epoch": 0.9121438026893333, "grad_norm": 0.9665696024894714, "learning_rate": 2.2022139641402095e-07, "loss": 0.09286022186279297, "step": 6546 }, { "epoch": 0.9122831463805476, "grad_norm": 0.83360356092453, "learning_rate": 2.1953018567105078e-07, "loss": 0.07519721984863281, "step": 6547 }, { "epoch": 0.912422490071762, "grad_norm": 0.7586647272109985, "learning_rate": 2.1884003703933343e-07, "loss": 0.0645599365234375, "step": 6548 }, { "epoch": 0.9125618337629764, "grad_norm": 0.8758623003959656, "learning_rate": 2.181509506722046e-07, "loss": 0.08392715454101562, "step": 6549 }, { "epoch": 0.9127011774541908, "grad_norm": 0.5121265053749084, "learning_rate": 2.1746292672276238e-07, "loss": 0.0709085464477539, "step": 6550 }, { "epoch": 0.9128405211454051, "grad_norm": 0.7605056166648865, "learning_rate": 2.1677596534387114e-07, "loss": 0.08495903015136719, "step": 6551 }, { "epoch": 0.9129798648366195, "grad_norm": 0.7033904194831848, "learning_rate": 2.1609006668815768e-07, "loss": 0.08229446411132812, "step": 6552 }, { "epoch": 0.9131192085278339, "grad_norm": 0.7881150841712952, "learning_rate": 2.1540523090801292e-07, "loss": 0.08051204681396484, "step": 6553 }, { "epoch": 0.9132585522190483, "grad_norm": 0.5120587944984436, "learning_rate": 2.1472145815559064e-07, "loss": 0.056194305419921875, "step": 6554 }, { "epoch": 0.9133978959102627, "grad_norm": 0.7309039235115051, "learning_rate": 2.1403874858281104e-07, "loss": 0.08344841003417969, "step": 6555 }, { "epoch": 0.913537239601477, "grad_norm": 0.5764191746711731, "learning_rate": 2.133571023413572e-07, "loss": 0.06647205352783203, "step": 6556 }, { "epoch": 0.9136765832926914, "grad_norm": 0.8100705742835999, "learning_rate": 2.1267651958267298e-07, "loss": 0.0806121826171875, "step": 6557 }, { "epoch": 0.9138159269839058, "grad_norm": 0.4294814467430115, "learning_rate": 2.1199700045797077e-07, "loss": 0.06648904085159302, "step": 6558 }, { "epoch": 0.9139552706751202, "grad_norm": 0.8310063481330872, "learning_rate": 2.113185451182226e-07, "loss": 0.07488155364990234, "step": 6559 }, { "epoch": 0.9140946143663345, "grad_norm": 0.7248222231864929, "learning_rate": 2.106411537141656e-07, "loss": 0.08477210998535156, "step": 6560 }, { "epoch": 0.9142339580575489, "grad_norm": 0.9381599426269531, "learning_rate": 2.0996482639630167e-07, "loss": 0.10063838958740234, "step": 6561 }, { "epoch": 0.9143733017487633, "grad_norm": 0.7356530427932739, "learning_rate": 2.0928956331489558e-07, "loss": 0.061392784118652344, "step": 6562 }, { "epoch": 0.9145126454399777, "grad_norm": 0.8071518540382385, "learning_rate": 2.08615364619974e-07, "loss": 0.073974609375, "step": 6563 }, { "epoch": 0.9146519891311921, "grad_norm": 0.7842785120010376, "learning_rate": 2.079422304613299e-07, "loss": 0.07578277587890625, "step": 6564 }, { "epoch": 0.9147913328224064, "grad_norm": 0.637068510055542, "learning_rate": 2.0727016098851694e-07, "loss": 0.07544994354248047, "step": 6565 }, { "epoch": 0.9149306765136208, "grad_norm": 0.4925079047679901, "learning_rate": 2.0659915635085515e-07, "loss": 0.0561065673828125, "step": 6566 }, { "epoch": 0.9150700202048352, "grad_norm": 0.7797544598579407, "learning_rate": 2.0592921669742528e-07, "loss": 0.08427536487579346, "step": 6567 }, { "epoch": 0.9152093638960496, "grad_norm": 0.5658919215202332, "learning_rate": 2.0526034217707213e-07, "loss": 0.062354207038879395, "step": 6568 }, { "epoch": 0.9153487075872639, "grad_norm": 0.4519265592098236, "learning_rate": 2.0459253293840632e-07, "loss": 0.05000782012939453, "step": 6569 }, { "epoch": 0.9154880512784783, "grad_norm": 0.5553063154220581, "learning_rate": 2.0392578912979853e-07, "loss": 0.06586599349975586, "step": 6570 }, { "epoch": 0.9156273949696927, "grad_norm": 0.8052016496658325, "learning_rate": 2.032601108993837e-07, "loss": 0.09639644622802734, "step": 6571 }, { "epoch": 0.9157667386609071, "grad_norm": 0.5803646445274353, "learning_rate": 2.0259549839506064e-07, "loss": 0.07259368896484375, "step": 6572 }, { "epoch": 0.9159060823521215, "grad_norm": 0.803391695022583, "learning_rate": 2.0193195176449188e-07, "loss": 0.10656261444091797, "step": 6573 }, { "epoch": 0.9160454260433359, "grad_norm": 0.5829970240592957, "learning_rate": 2.0126947115510165e-07, "loss": 0.07941055297851562, "step": 6574 }, { "epoch": 0.9161847697345503, "grad_norm": 1.3635667562484741, "learning_rate": 2.006080567140778e-07, "loss": 0.1280498504638672, "step": 6575 }, { "epoch": 0.9163241134257647, "grad_norm": 0.6788187623023987, "learning_rate": 1.999477085883711e-07, "loss": 0.07317972183227539, "step": 6576 }, { "epoch": 0.9164634571169791, "grad_norm": 1.4854204654693604, "learning_rate": 1.9928842692469752e-07, "loss": 0.10078048706054688, "step": 6577 }, { "epoch": 0.9166028008081935, "grad_norm": 0.6613288521766663, "learning_rate": 1.9863021186953268e-07, "loss": 0.06570911407470703, "step": 6578 }, { "epoch": 0.9167421444994078, "grad_norm": 0.844333827495575, "learning_rate": 1.9797306356911793e-07, "loss": 0.08093643188476562, "step": 6579 }, { "epoch": 0.9168814881906222, "grad_norm": 0.6202071309089661, "learning_rate": 1.973169821694565e-07, "loss": 0.07775497436523438, "step": 6580 }, { "epoch": 0.9170208318818366, "grad_norm": 0.9051645398139954, "learning_rate": 1.9666196781631453e-07, "loss": 0.09837055206298828, "step": 6581 }, { "epoch": 0.917160175573051, "grad_norm": 1.01629638671875, "learning_rate": 1.9600802065522063e-07, "loss": 0.09720230102539062, "step": 6582 }, { "epoch": 0.9172995192642653, "grad_norm": 0.42463481426239014, "learning_rate": 1.95355140831468e-07, "loss": 0.0559844970703125, "step": 6583 }, { "epoch": 0.9174388629554797, "grad_norm": 0.74873948097229, "learning_rate": 1.947033284901112e-07, "loss": 0.07460832595825195, "step": 6584 }, { "epoch": 0.9175782066466941, "grad_norm": 0.5284908413887024, "learning_rate": 1.9405258377596825e-07, "loss": 0.05590701103210449, "step": 6585 }, { "epoch": 0.9177175503379085, "grad_norm": 0.6685947179794312, "learning_rate": 1.9340290683361907e-07, "loss": 0.08158111572265625, "step": 6586 }, { "epoch": 0.9178568940291228, "grad_norm": 0.5979717969894409, "learning_rate": 1.9275429780740763e-07, "loss": 0.06522274017333984, "step": 6587 }, { "epoch": 0.9179962377203372, "grad_norm": 0.9925804734230042, "learning_rate": 1.921067568414403e-07, "loss": 0.08236360549926758, "step": 6588 }, { "epoch": 0.9181355814115516, "grad_norm": 1.0331692695617676, "learning_rate": 1.9146028407958483e-07, "loss": 0.09605026245117188, "step": 6589 }, { "epoch": 0.918274925102766, "grad_norm": 0.7459104061126709, "learning_rate": 1.9081487966547407e-07, "loss": 0.07365131378173828, "step": 6590 }, { "epoch": 0.9184142687939804, "grad_norm": 0.698164701461792, "learning_rate": 1.9017054374250111e-07, "loss": 0.07712459564208984, "step": 6591 }, { "epoch": 0.9185536124851947, "grad_norm": 0.48260045051574707, "learning_rate": 1.8952727645382307e-07, "loss": 0.05988788604736328, "step": 6592 }, { "epoch": 0.9186929561764091, "grad_norm": 0.5599184036254883, "learning_rate": 1.88885077942359e-07, "loss": 0.06389808654785156, "step": 6593 }, { "epoch": 0.9188322998676235, "grad_norm": 0.8860617876052856, "learning_rate": 1.8824394835079086e-07, "loss": 0.0826578140258789, "step": 6594 }, { "epoch": 0.9189716435588379, "grad_norm": 0.785531759262085, "learning_rate": 1.8760388782156468e-07, "loss": 0.09023571014404297, "step": 6595 }, { "epoch": 0.9191109872500522, "grad_norm": 0.6951229572296143, "learning_rate": 1.8696489649688454e-07, "loss": 0.07351016998291016, "step": 6596 }, { "epoch": 0.9192503309412666, "grad_norm": 0.5828389525413513, "learning_rate": 1.8632697451872074e-07, "loss": 0.08014106750488281, "step": 6597 }, { "epoch": 0.919389674632481, "grad_norm": 0.8709299564361572, "learning_rate": 1.8569012202880599e-07, "loss": 0.07144737243652344, "step": 6598 }, { "epoch": 0.9195290183236954, "grad_norm": 0.8809630870819092, "learning_rate": 1.850543391686327e-07, "loss": 0.09311199188232422, "step": 6599 }, { "epoch": 0.9196683620149098, "grad_norm": 0.5245115160942078, "learning_rate": 1.8441962607945786e-07, "loss": 0.05822443962097168, "step": 6600 }, { "epoch": 0.9198077057061241, "grad_norm": 0.5674957633018494, "learning_rate": 1.83785982902302e-07, "loss": 0.06791400909423828, "step": 6601 }, { "epoch": 0.9199470493973385, "grad_norm": 0.7527022361755371, "learning_rate": 1.8315340977794415e-07, "loss": 0.07513999938964844, "step": 6602 }, { "epoch": 0.9200863930885529, "grad_norm": 0.8569799661636353, "learning_rate": 1.825219068469275e-07, "loss": 0.06985282897949219, "step": 6603 }, { "epoch": 0.9202257367797673, "grad_norm": 0.4222572147846222, "learning_rate": 1.818914742495581e-07, "loss": 0.05939292907714844, "step": 6604 }, { "epoch": 0.9203650804709816, "grad_norm": 0.5731126666069031, "learning_rate": 1.8126211212590505e-07, "loss": 0.07146644592285156, "step": 6605 }, { "epoch": 0.920504424162196, "grad_norm": 0.6047731041908264, "learning_rate": 1.8063382061579648e-07, "loss": 0.06442832946777344, "step": 6606 }, { "epoch": 0.9206437678534104, "grad_norm": 0.6017473936080933, "learning_rate": 1.8000659985882463e-07, "loss": 0.07469844818115234, "step": 6607 }, { "epoch": 0.9207831115446248, "grad_norm": 0.44864702224731445, "learning_rate": 1.7938044999434412e-07, "loss": 0.06297540664672852, "step": 6608 }, { "epoch": 0.9209224552358392, "grad_norm": 1.2141042947769165, "learning_rate": 1.7875537116147146e-07, "loss": 0.08220958709716797, "step": 6609 }, { "epoch": 0.9210617989270535, "grad_norm": 0.7946805953979492, "learning_rate": 1.781313634990839e-07, "loss": 0.0870208740234375, "step": 6610 }, { "epoch": 0.9212011426182679, "grad_norm": 0.8551148176193237, "learning_rate": 1.7750842714582272e-07, "loss": 0.0651407241821289, "step": 6611 }, { "epoch": 0.9213404863094823, "grad_norm": 0.81541508436203, "learning_rate": 1.7688656224008893e-07, "loss": 0.08141136169433594, "step": 6612 }, { "epoch": 0.9214798300006967, "grad_norm": 0.6513948440551758, "learning_rate": 1.762657689200481e-07, "loss": 0.06866073608398438, "step": 6613 }, { "epoch": 0.9216191736919112, "grad_norm": 0.7106896042823792, "learning_rate": 1.7564604732362545e-07, "loss": 0.07831287384033203, "step": 6614 }, { "epoch": 0.9217585173831255, "grad_norm": 0.5796127319335938, "learning_rate": 1.7502739758850863e-07, "loss": 0.058443546295166016, "step": 6615 }, { "epoch": 0.9218978610743399, "grad_norm": 1.0134220123291016, "learning_rate": 1.7440981985214933e-07, "loss": 0.0803680419921875, "step": 6616 }, { "epoch": 0.9220372047655543, "grad_norm": 0.9431380033493042, "learning_rate": 1.7379331425175728e-07, "loss": 0.09559249877929688, "step": 6617 }, { "epoch": 0.9221765484567687, "grad_norm": 0.9154015779495239, "learning_rate": 1.7317788092430676e-07, "loss": 0.10299396514892578, "step": 6618 }, { "epoch": 0.922315892147983, "grad_norm": 1.0190646648406982, "learning_rate": 1.725635200065323e-07, "loss": 0.09591007232666016, "step": 6619 }, { "epoch": 0.9224552358391974, "grad_norm": 0.7313058972358704, "learning_rate": 1.7195023163493253e-07, "loss": 0.07690596580505371, "step": 6620 }, { "epoch": 0.9225945795304118, "grad_norm": 1.8306819200515747, "learning_rate": 1.7133801594576393e-07, "loss": 0.12015533447265625, "step": 6621 }, { "epoch": 0.9227339232216262, "grad_norm": 1.149283528327942, "learning_rate": 1.7072687307504887e-07, "loss": 0.10288143157958984, "step": 6622 }, { "epoch": 0.9228732669128406, "grad_norm": 0.6069688200950623, "learning_rate": 1.701168031585676e-07, "loss": 0.07524299621582031, "step": 6623 }, { "epoch": 0.9230126106040549, "grad_norm": 0.5411213636398315, "learning_rate": 1.695078063318656e-07, "loss": 0.06725454330444336, "step": 6624 }, { "epoch": 0.9231519542952693, "grad_norm": 0.7055895328521729, "learning_rate": 1.6889988273024627e-07, "loss": 0.08487606048583984, "step": 6625 }, { "epoch": 0.9232912979864837, "grad_norm": 0.4783441424369812, "learning_rate": 1.682930324887766e-07, "loss": 0.06507158279418945, "step": 6626 }, { "epoch": 0.9234306416776981, "grad_norm": 0.47448232769966125, "learning_rate": 1.6768725574228706e-07, "loss": 0.061217308044433594, "step": 6627 }, { "epoch": 0.9235699853689124, "grad_norm": 0.7097110152244568, "learning_rate": 1.6708255262536443e-07, "loss": 0.07701683044433594, "step": 6628 }, { "epoch": 0.9237093290601268, "grad_norm": 0.8218961358070374, "learning_rate": 1.6647892327236125e-07, "loss": 0.07338881492614746, "step": 6629 }, { "epoch": 0.9238486727513412, "grad_norm": 0.4361800253391266, "learning_rate": 1.658763678173908e-07, "loss": 0.05704450607299805, "step": 6630 }, { "epoch": 0.9239880164425556, "grad_norm": 0.523539125919342, "learning_rate": 1.6527488639432543e-07, "loss": 0.06337213516235352, "step": 6631 }, { "epoch": 0.92412736013377, "grad_norm": 1.0284265279769897, "learning_rate": 1.6467447913680268e-07, "loss": 0.10491371154785156, "step": 6632 }, { "epoch": 0.9242667038249843, "grad_norm": 0.7055091261863708, "learning_rate": 1.6407514617821752e-07, "loss": 0.07327651977539062, "step": 6633 }, { "epoch": 0.9244060475161987, "grad_norm": 0.6289371848106384, "learning_rate": 1.6347688765172953e-07, "loss": 0.07614421844482422, "step": 6634 }, { "epoch": 0.9245453912074131, "grad_norm": 0.9988621473312378, "learning_rate": 1.6287970369025686e-07, "loss": 0.08929252624511719, "step": 6635 }, { "epoch": 0.9246847348986275, "grad_norm": 1.0829473733901978, "learning_rate": 1.6228359442648112e-07, "loss": 0.09676170349121094, "step": 6636 }, { "epoch": 0.9248240785898418, "grad_norm": 0.6779346466064453, "learning_rate": 1.616885599928436e-07, "loss": 0.07979011535644531, "step": 6637 }, { "epoch": 0.9249634222810562, "grad_norm": 0.596625804901123, "learning_rate": 1.6109460052154802e-07, "loss": 0.07354547083377838, "step": 6638 }, { "epoch": 0.9251027659722706, "grad_norm": 0.8431684374809265, "learning_rate": 1.6050171614455712e-07, "loss": 0.07552576065063477, "step": 6639 }, { "epoch": 0.925242109663485, "grad_norm": 0.8217649459838867, "learning_rate": 1.5990990699359777e-07, "loss": 0.07337570190429688, "step": 6640 }, { "epoch": 0.9253814533546993, "grad_norm": 0.5828179121017456, "learning_rate": 1.593191732001559e-07, "loss": 0.06574440002441406, "step": 6641 }, { "epoch": 0.9255207970459137, "grad_norm": 0.5895695090293884, "learning_rate": 1.5872951489547926e-07, "loss": 0.07488536834716797, "step": 6642 }, { "epoch": 0.9256601407371281, "grad_norm": 0.7759321928024292, "learning_rate": 1.5814093221057647e-07, "loss": 0.07882308959960938, "step": 6643 }, { "epoch": 0.9257994844283425, "grad_norm": 0.6537293791770935, "learning_rate": 1.575534252762162e-07, "loss": 0.07712936401367188, "step": 6644 }, { "epoch": 0.9259388281195569, "grad_norm": 0.6092860102653503, "learning_rate": 1.5696699422293072e-07, "loss": 0.0748453140258789, "step": 6645 }, { "epoch": 0.9260781718107712, "grad_norm": 0.6571005582809448, "learning_rate": 1.5638163918101024e-07, "loss": 0.06824827194213867, "step": 6646 }, { "epoch": 0.9262175155019856, "grad_norm": 0.7770805358886719, "learning_rate": 1.5579736028050797e-07, "loss": 0.07821369171142578, "step": 6647 }, { "epoch": 0.9263568591932, "grad_norm": 0.8438823223114014, "learning_rate": 1.5521415765123783e-07, "loss": 0.08665752410888672, "step": 6648 }, { "epoch": 0.9264962028844144, "grad_norm": 0.8969417810440063, "learning_rate": 1.546320314227734e-07, "loss": 0.08617305755615234, "step": 6649 }, { "epoch": 0.9266355465756287, "grad_norm": 0.37844616174697876, "learning_rate": 1.5405098172444954e-07, "loss": 0.05776405334472656, "step": 6650 }, { "epoch": 0.9267748902668431, "grad_norm": 1.1299517154693604, "learning_rate": 1.5347100868536246e-07, "loss": 0.08501434326171875, "step": 6651 }, { "epoch": 0.9269142339580575, "grad_norm": 0.7976924777030945, "learning_rate": 1.5289211243436964e-07, "loss": 0.06573867797851562, "step": 6652 }, { "epoch": 0.9270535776492719, "grad_norm": 0.6065711379051208, "learning_rate": 1.5231429310008817e-07, "loss": 0.07367420196533203, "step": 6653 }, { "epoch": 0.9271929213404864, "grad_norm": 0.9220256805419922, "learning_rate": 1.5173755081089536e-07, "loss": 0.07559394836425781, "step": 6654 }, { "epoch": 0.9273322650317007, "grad_norm": 0.7622584700584412, "learning_rate": 1.511618856949315e-07, "loss": 0.0696415901184082, "step": 6655 }, { "epoch": 0.9274716087229151, "grad_norm": 0.9742836952209473, "learning_rate": 1.5058729788009597e-07, "loss": 0.08990001678466797, "step": 6656 }, { "epoch": 0.9276109524141295, "grad_norm": 0.6327561140060425, "learning_rate": 1.5001378749404883e-07, "loss": 0.0671701431274414, "step": 6657 }, { "epoch": 0.9277502961053439, "grad_norm": 0.861524760723114, "learning_rate": 1.4944135466421095e-07, "loss": 0.0909423828125, "step": 6658 }, { "epoch": 0.9278896397965583, "grad_norm": 0.5845295190811157, "learning_rate": 1.4886999951776448e-07, "loss": 0.0737752914428711, "step": 6659 }, { "epoch": 0.9280289834877726, "grad_norm": 0.6960659623146057, "learning_rate": 1.4829972218165013e-07, "loss": 0.06369876861572266, "step": 6660 }, { "epoch": 0.928168327178987, "grad_norm": 0.6172186136245728, "learning_rate": 1.477305227825715e-07, "loss": 0.07285118103027344, "step": 6661 }, { "epoch": 0.9283076708702014, "grad_norm": 0.7993515729904175, "learning_rate": 1.471624014469919e-07, "loss": 0.0745704174041748, "step": 6662 }, { "epoch": 0.9284470145614158, "grad_norm": 0.636481523513794, "learning_rate": 1.4659535830113368e-07, "loss": 0.07500553131103516, "step": 6663 }, { "epoch": 0.9285863582526301, "grad_norm": 0.8512954115867615, "learning_rate": 1.4602939347098278e-07, "loss": 0.07961177825927734, "step": 6664 }, { "epoch": 0.9287257019438445, "grad_norm": 0.6367312669754028, "learning_rate": 1.454645070822819e-07, "loss": 0.08022117614746094, "step": 6665 }, { "epoch": 0.9288650456350589, "grad_norm": 0.9132249355316162, "learning_rate": 1.449006992605373e-07, "loss": 0.07703781127929688, "step": 6666 }, { "epoch": 0.9290043893262733, "grad_norm": 0.623943567276001, "learning_rate": 1.443379701310127e-07, "loss": 0.0788869857788086, "step": 6667 }, { "epoch": 0.9291437330174876, "grad_norm": 1.0533260107040405, "learning_rate": 1.4377631981873474e-07, "loss": 0.07304573059082031, "step": 6668 }, { "epoch": 0.929283076708702, "grad_norm": 0.6684543490409851, "learning_rate": 1.432157484484892e-07, "loss": 0.06803369522094727, "step": 6669 }, { "epoch": 0.9294224203999164, "grad_norm": 0.43937206268310547, "learning_rate": 1.4265625614482247e-07, "loss": 0.06403732299804688, "step": 6670 }, { "epoch": 0.9295617640911308, "grad_norm": 0.7540348172187805, "learning_rate": 1.4209784303203965e-07, "loss": 0.07990074157714844, "step": 6671 }, { "epoch": 0.9297011077823452, "grad_norm": 0.6959466338157654, "learning_rate": 1.415405092342087e-07, "loss": 0.08371353149414062, "step": 6672 }, { "epoch": 0.9298404514735595, "grad_norm": 0.7121877670288086, "learning_rate": 1.4098425487515665e-07, "loss": 0.06842374801635742, "step": 6673 }, { "epoch": 0.9299797951647739, "grad_norm": 0.8288484215736389, "learning_rate": 1.4042908007846912e-07, "loss": 0.0942239761352539, "step": 6674 }, { "epoch": 0.9301191388559883, "grad_norm": 0.7195996046066284, "learning_rate": 1.3987498496749463e-07, "loss": 0.07022762298583984, "step": 6675 }, { "epoch": 0.9302584825472027, "grad_norm": 0.5032138228416443, "learning_rate": 1.3932196966533972e-07, "loss": 0.056908607482910156, "step": 6676 }, { "epoch": 0.930397826238417, "grad_norm": 0.8555684685707092, "learning_rate": 1.3877003429487224e-07, "loss": 0.09109783172607422, "step": 6677 }, { "epoch": 0.9305371699296314, "grad_norm": 0.8228211998939514, "learning_rate": 1.3821917897871905e-07, "loss": 0.08133125305175781, "step": 6678 }, { "epoch": 0.9306765136208458, "grad_norm": 0.6613706350326538, "learning_rate": 1.3766940383926785e-07, "loss": 0.07818412780761719, "step": 6679 }, { "epoch": 0.9308158573120602, "grad_norm": 0.798363447189331, "learning_rate": 1.3712070899866704e-07, "loss": 0.0869131088256836, "step": 6680 }, { "epoch": 0.9309552010032746, "grad_norm": 0.6017001867294312, "learning_rate": 1.3657309457882294e-07, "loss": 0.05779314041137695, "step": 6681 }, { "epoch": 0.9310945446944889, "grad_norm": 0.6259774565696716, "learning_rate": 1.3602656070140275e-07, "loss": 0.05492877960205078, "step": 6682 }, { "epoch": 0.9312338883857033, "grad_norm": 0.5225476622581482, "learning_rate": 1.3548110748783426e-07, "loss": 0.06303071975708008, "step": 6683 }, { "epoch": 0.9313732320769177, "grad_norm": 0.5968576669692993, "learning_rate": 1.349367350593056e-07, "loss": 0.06029224395751953, "step": 6684 }, { "epoch": 0.9315125757681321, "grad_norm": 0.6636189222335815, "learning_rate": 1.3439344353676276e-07, "loss": 0.07940053939819336, "step": 6685 }, { "epoch": 0.9316519194593464, "grad_norm": 0.4617217779159546, "learning_rate": 1.3385123304091306e-07, "loss": 0.059484004974365234, "step": 6686 }, { "epoch": 0.9317912631505608, "grad_norm": 0.7209046483039856, "learning_rate": 1.3331010369222298e-07, "loss": 0.07510757446289062, "step": 6687 }, { "epoch": 0.9319306068417752, "grad_norm": 0.8787838220596313, "learning_rate": 1.3277005561092016e-07, "loss": 0.0775461196899414, "step": 6688 }, { "epoch": 0.9320699505329896, "grad_norm": 0.7740519046783447, "learning_rate": 1.3223108891698976e-07, "loss": 0.0785684585571289, "step": 6689 }, { "epoch": 0.932209294224204, "grad_norm": 0.8999121189117432, "learning_rate": 1.316932037301788e-07, "loss": 0.09262657165527344, "step": 6690 }, { "epoch": 0.9323486379154183, "grad_norm": 1.1348230838775635, "learning_rate": 1.3115640016999222e-07, "loss": 0.0883631706237793, "step": 6691 }, { "epoch": 0.9324879816066327, "grad_norm": 0.6243541836738586, "learning_rate": 1.3062067835569625e-07, "loss": 0.07537651062011719, "step": 6692 }, { "epoch": 0.9326273252978471, "grad_norm": 0.6086629033088684, "learning_rate": 1.3008603840631516e-07, "loss": 0.07643699645996094, "step": 6693 }, { "epoch": 0.9327666689890615, "grad_norm": 1.0199986696243286, "learning_rate": 1.2955248044063452e-07, "loss": 0.07790231704711914, "step": 6694 }, { "epoch": 0.932906012680276, "grad_norm": 0.7708897590637207, "learning_rate": 1.2902000457719886e-07, "loss": 0.06983006000518799, "step": 6695 }, { "epoch": 0.9330453563714903, "grad_norm": 0.5952630639076233, "learning_rate": 1.2848861093431143e-07, "loss": 0.06165504455566406, "step": 6696 }, { "epoch": 0.9331847000627047, "grad_norm": 0.6153601408004761, "learning_rate": 1.2795829963003604e-07, "loss": 0.06315755844116211, "step": 6697 }, { "epoch": 0.9333240437539191, "grad_norm": 1.054147481918335, "learning_rate": 1.274290707821968e-07, "loss": 0.09731292724609375, "step": 6698 }, { "epoch": 0.9334633874451335, "grad_norm": 0.6610550880432129, "learning_rate": 1.269009245083741e-07, "loss": 0.08046150207519531, "step": 6699 }, { "epoch": 0.9336027311363478, "grad_norm": 0.604707658290863, "learning_rate": 1.2637386092591187e-07, "loss": 0.06984519958496094, "step": 6700 }, { "epoch": 0.9337420748275622, "grad_norm": 0.462332159280777, "learning_rate": 1.258478801519114e-07, "loss": 0.06795549392700195, "step": 6701 }, { "epoch": 0.9338814185187766, "grad_norm": 0.6980783939361572, "learning_rate": 1.2532298230323258e-07, "loss": 0.08638668060302734, "step": 6702 }, { "epoch": 0.934020762209991, "grad_norm": 1.1326817274093628, "learning_rate": 1.2479916749649657e-07, "loss": 0.07775497436523438, "step": 6703 }, { "epoch": 0.9341601059012054, "grad_norm": 0.757793128490448, "learning_rate": 1.2427643584808246e-07, "loss": 0.08014321327209473, "step": 6704 }, { "epoch": 0.9342994495924197, "grad_norm": 0.9137756824493408, "learning_rate": 1.2375478747413017e-07, "loss": 0.07569026947021484, "step": 6705 }, { "epoch": 0.9344387932836341, "grad_norm": 0.9049603343009949, "learning_rate": 1.2323422249053696e-07, "loss": 0.06972837448120117, "step": 6706 }, { "epoch": 0.9345781369748485, "grad_norm": 0.5745132565498352, "learning_rate": 1.2271474101296144e-07, "loss": 0.06574153900146484, "step": 6707 }, { "epoch": 0.9347174806660629, "grad_norm": 1.0587326288223267, "learning_rate": 1.2219634315681962e-07, "loss": 0.10393238067626953, "step": 6708 }, { "epoch": 0.9348568243572772, "grad_norm": 0.5706539154052734, "learning_rate": 1.2167902903728879e-07, "loss": 0.07149553298950195, "step": 6709 }, { "epoch": 0.9349961680484916, "grad_norm": 0.630678117275238, "learning_rate": 1.211627987693037e-07, "loss": 0.06647872924804688, "step": 6710 }, { "epoch": 0.935135511739706, "grad_norm": 0.7105315327644348, "learning_rate": 1.206476524675587e-07, "loss": 0.08259439468383789, "step": 6711 }, { "epoch": 0.9352748554309204, "grad_norm": 0.7979263067245483, "learning_rate": 1.2013359024650785e-07, "loss": 0.08241844177246094, "step": 6712 }, { "epoch": 0.9354141991221347, "grad_norm": 0.5822186470031738, "learning_rate": 1.196206122203647e-07, "loss": 0.06985759735107422, "step": 6713 }, { "epoch": 0.9355535428133491, "grad_norm": 0.7168726325035095, "learning_rate": 1.1910871850309979e-07, "loss": 0.07223176956176758, "step": 6714 }, { "epoch": 0.9356928865045635, "grad_norm": 0.984362006187439, "learning_rate": 1.1859790920844494e-07, "loss": 0.09944915771484375, "step": 6715 }, { "epoch": 0.9358322301957779, "grad_norm": 0.9219760894775391, "learning_rate": 1.1808818444989046e-07, "loss": 0.08903312683105469, "step": 6716 }, { "epoch": 0.9359715738869923, "grad_norm": 0.8818931579589844, "learning_rate": 1.1757954434068574e-07, "loss": 0.07653427124023438, "step": 6717 }, { "epoch": 0.9361109175782066, "grad_norm": 0.8577467799186707, "learning_rate": 1.1707198899383875e-07, "loss": 0.07859611511230469, "step": 6718 }, { "epoch": 0.936250261269421, "grad_norm": 0.8392706513404846, "learning_rate": 1.1656551852211595e-07, "loss": 0.061649322509765625, "step": 6719 }, { "epoch": 0.9363896049606354, "grad_norm": 0.7417725324630737, "learning_rate": 1.1606013303804508e-07, "loss": 0.07131290435791016, "step": 6720 }, { "epoch": 0.9365289486518498, "grad_norm": 1.2039662599563599, "learning_rate": 1.1555583265390968e-07, "loss": 0.08253097534179688, "step": 6721 }, { "epoch": 0.9366682923430641, "grad_norm": 0.6644238233566284, "learning_rate": 1.1505261748175512e-07, "loss": 0.059380531311035156, "step": 6722 }, { "epoch": 0.9368076360342785, "grad_norm": 0.8928946852684021, "learning_rate": 1.1455048763338361e-07, "loss": 0.07221412658691406, "step": 6723 }, { "epoch": 0.9369469797254929, "grad_norm": 0.6041558384895325, "learning_rate": 1.1404944322035705e-07, "loss": 0.06000041961669922, "step": 6724 }, { "epoch": 0.9370863234167073, "grad_norm": 1.03781259059906, "learning_rate": 1.1354948435399582e-07, "loss": 0.08323955535888672, "step": 6725 }, { "epoch": 0.9372256671079217, "grad_norm": 0.43355593085289, "learning_rate": 1.130506111453794e-07, "loss": 0.06478404998779297, "step": 6726 }, { "epoch": 0.937365010799136, "grad_norm": 0.43441519141197205, "learning_rate": 1.1255282370534748e-07, "loss": 0.058073997497558594, "step": 6727 }, { "epoch": 0.9375043544903504, "grad_norm": 0.5839492082595825, "learning_rate": 1.1205612214449434e-07, "loss": 0.07778072357177734, "step": 6728 }, { "epoch": 0.9376436981815648, "grad_norm": 0.5575488209724426, "learning_rate": 1.1156050657317785e-07, "loss": 0.073944091796875, "step": 6729 }, { "epoch": 0.9377830418727792, "grad_norm": 0.8497651815414429, "learning_rate": 1.1106597710151157e-07, "loss": 0.06876230239868164, "step": 6730 }, { "epoch": 0.9379223855639935, "grad_norm": 0.8136534094810486, "learning_rate": 1.1057253383936928e-07, "loss": 0.07967567443847656, "step": 6731 }, { "epoch": 0.9380617292552079, "grad_norm": 0.8406270742416382, "learning_rate": 1.1008017689638162e-07, "loss": 0.09208011627197266, "step": 6732 }, { "epoch": 0.9382010729464223, "grad_norm": 0.8164215683937073, "learning_rate": 1.0958890638194108e-07, "loss": 0.0716400146484375, "step": 6733 }, { "epoch": 0.9383404166376367, "grad_norm": 1.0161607265472412, "learning_rate": 1.0909872240519481e-07, "loss": 0.09802818298339844, "step": 6734 }, { "epoch": 0.9384797603288512, "grad_norm": 0.656505286693573, "learning_rate": 1.0860962507505124e-07, "loss": 0.07216167449951172, "step": 6735 }, { "epoch": 0.9386191040200655, "grad_norm": 0.7032126784324646, "learning_rate": 1.0812161450017678e-07, "loss": 0.07006168365478516, "step": 6736 }, { "epoch": 0.9387584477112799, "grad_norm": 0.8234323859214783, "learning_rate": 1.0763469078899635e-07, "loss": 0.0833597183227539, "step": 6737 }, { "epoch": 0.9388977914024943, "grad_norm": 0.656849205493927, "learning_rate": 1.0714885404969288e-07, "loss": 0.07435131072998047, "step": 6738 }, { "epoch": 0.9390371350937087, "grad_norm": 0.7259564995765686, "learning_rate": 1.0666410439020836e-07, "loss": 0.07731389999389648, "step": 6739 }, { "epoch": 0.939176478784923, "grad_norm": 1.418168544769287, "learning_rate": 1.0618044191824273e-07, "loss": 0.11719989776611328, "step": 6740 }, { "epoch": 0.9393158224761374, "grad_norm": 0.5241507291793823, "learning_rate": 1.056978667412556e-07, "loss": 0.061824798583984375, "step": 6741 }, { "epoch": 0.9394551661673518, "grad_norm": 1.1069388389587402, "learning_rate": 1.0521637896646286e-07, "loss": 0.11240768432617188, "step": 6742 }, { "epoch": 0.9395945098585662, "grad_norm": 0.8272306323051453, "learning_rate": 1.0473597870084174e-07, "loss": 0.08341455459594727, "step": 6743 }, { "epoch": 0.9397338535497806, "grad_norm": 0.3727540969848633, "learning_rate": 1.0425666605112516e-07, "loss": 0.0540623664855957, "step": 6744 }, { "epoch": 0.9398731972409949, "grad_norm": 0.7671849131584167, "learning_rate": 1.0377844112380575e-07, "loss": 0.0810389518737793, "step": 6745 }, { "epoch": 0.9400125409322093, "grad_norm": 0.6699565052986145, "learning_rate": 1.0330130402513406e-07, "loss": 0.06861209869384766, "step": 6746 }, { "epoch": 0.9401518846234237, "grad_norm": 0.9356794357299805, "learning_rate": 1.028252548611186e-07, "loss": 0.09105300903320312, "step": 6747 }, { "epoch": 0.9402912283146381, "grad_norm": 0.7313507199287415, "learning_rate": 1.0235029373752758e-07, "loss": 0.06370258331298828, "step": 6748 }, { "epoch": 0.9404305720058524, "grad_norm": 0.5934543013572693, "learning_rate": 1.0187642075988602e-07, "loss": 0.06985282897949219, "step": 6749 }, { "epoch": 0.9405699156970668, "grad_norm": 0.7120091915130615, "learning_rate": 1.0140363603347747e-07, "loss": 0.07475662231445312, "step": 6750 }, { "epoch": 0.9407092593882812, "grad_norm": 0.7921925187110901, "learning_rate": 1.0093193966334403e-07, "loss": 0.07603931427001953, "step": 6751 }, { "epoch": 0.9408486030794956, "grad_norm": 0.5795302987098694, "learning_rate": 1.0046133175428685e-07, "loss": 0.06351232528686523, "step": 6752 }, { "epoch": 0.94098794677071, "grad_norm": 0.5253768563270569, "learning_rate": 9.999181241086231e-08, "loss": 0.05368995666503906, "step": 6753 }, { "epoch": 0.9411272904619243, "grad_norm": 0.8996132016181946, "learning_rate": 9.952338173738862e-08, "loss": 0.09373092651367188, "step": 6754 }, { "epoch": 0.9412666341531387, "grad_norm": 0.9634394645690918, "learning_rate": 9.905603983793921e-08, "loss": 0.0791778564453125, "step": 6755 }, { "epoch": 0.9414059778443531, "grad_norm": 0.5507150888442993, "learning_rate": 9.858978681634823e-08, "loss": 0.06803607940673828, "step": 6756 }, { "epoch": 0.9415453215355675, "grad_norm": 0.49679088592529297, "learning_rate": 9.81246227762045e-08, "loss": 0.05889272689819336, "step": 6757 }, { "epoch": 0.9416846652267818, "grad_norm": 0.6901203393936157, "learning_rate": 9.76605478208581e-08, "loss": 0.08274173736572266, "step": 6758 }, { "epoch": 0.9418240089179962, "grad_norm": 0.5849433541297913, "learning_rate": 9.719756205341658e-08, "loss": 0.07146310806274414, "step": 6759 }, { "epoch": 0.9419633526092106, "grad_norm": 1.062755823135376, "learning_rate": 9.673566557674263e-08, "loss": 0.08638572692871094, "step": 6760 }, { "epoch": 0.942102696300425, "grad_norm": 0.6846888661384583, "learning_rate": 9.627485849346085e-08, "loss": 0.06480598449707031, "step": 6761 }, { "epoch": 0.9422420399916394, "grad_norm": 0.9903894066810608, "learning_rate": 9.581514090595212e-08, "loss": 0.07900142669677734, "step": 6762 }, { "epoch": 0.9423813836828537, "grad_norm": 0.8836029171943665, "learning_rate": 9.535651291635362e-08, "loss": 0.07790756225585938, "step": 6763 }, { "epoch": 0.9425207273740681, "grad_norm": 0.6389946341514587, "learning_rate": 9.489897462656383e-08, "loss": 0.08035945892333984, "step": 6764 }, { "epoch": 0.9426600710652825, "grad_norm": 0.8768322467803955, "learning_rate": 9.44425261382359e-08, "loss": 0.08234214782714844, "step": 6765 }, { "epoch": 0.9427994147564969, "grad_norm": 0.6481616497039795, "learning_rate": 9.39871675527837e-08, "loss": 0.07001686096191406, "step": 6766 }, { "epoch": 0.9429387584477112, "grad_norm": 0.7713452577590942, "learning_rate": 9.353289897137574e-08, "loss": 0.07030200958251953, "step": 6767 }, { "epoch": 0.9430781021389256, "grad_norm": 0.7392988801002502, "learning_rate": 9.30797204949413e-08, "loss": 0.06767845153808594, "step": 6768 }, { "epoch": 0.94321744583014, "grad_norm": 0.5855527520179749, "learning_rate": 9.262763222416649e-08, "loss": 0.07655620574951172, "step": 6769 }, { "epoch": 0.9433567895213544, "grad_norm": 0.4592386484146118, "learning_rate": 9.217663425949486e-08, "loss": 0.059327125549316406, "step": 6770 }, { "epoch": 0.9434961332125688, "grad_norm": 0.4434945285320282, "learning_rate": 9.172672670112681e-08, "loss": 0.059187889099121094, "step": 6771 }, { "epoch": 0.9436354769037831, "grad_norm": 0.8689161539077759, "learning_rate": 9.127790964902239e-08, "loss": 0.08357906341552734, "step": 6772 }, { "epoch": 0.9437748205949975, "grad_norm": 0.43822580575942993, "learning_rate": 9.083018320289849e-08, "loss": 0.05401611328125, "step": 6773 }, { "epoch": 0.9439141642862119, "grad_norm": 0.7952272295951843, "learning_rate": 9.038354746222999e-08, "loss": 0.08137893676757812, "step": 6774 }, { "epoch": 0.9440535079774264, "grad_norm": 0.5667723417282104, "learning_rate": 8.993800252624863e-08, "loss": 0.06383228302001953, "step": 6775 }, { "epoch": 0.9441928516686408, "grad_norm": 0.5090085864067078, "learning_rate": 8.94935484939441e-08, "loss": 0.07111549377441406, "step": 6776 }, { "epoch": 0.9443321953598551, "grad_norm": 0.8275553584098816, "learning_rate": 8.905018546406519e-08, "loss": 0.06835746765136719, "step": 6777 }, { "epoch": 0.9444715390510695, "grad_norm": 0.6859777569770813, "learning_rate": 8.860791353511532e-08, "loss": 0.06421184539794922, "step": 6778 }, { "epoch": 0.9446108827422839, "grad_norm": 0.6671371459960938, "learning_rate": 8.816673280535815e-08, "loss": 0.08957290649414062, "step": 6779 }, { "epoch": 0.9447502264334983, "grad_norm": 0.6115559339523315, "learning_rate": 8.772664337281412e-08, "loss": 0.06138134002685547, "step": 6780 }, { "epoch": 0.9448895701247126, "grad_norm": 0.9445211291313171, "learning_rate": 8.728764533526112e-08, "loss": 0.0718841552734375, "step": 6781 }, { "epoch": 0.945028913815927, "grad_norm": 0.829624354839325, "learning_rate": 8.684973879023395e-08, "loss": 0.0714864730834961, "step": 6782 }, { "epoch": 0.9451682575071414, "grad_norm": 0.7638136148452759, "learning_rate": 8.641292383502531e-08, "loss": 0.06226158142089844, "step": 6783 }, { "epoch": 0.9453076011983558, "grad_norm": 0.4991914629936218, "learning_rate": 8.597720056668646e-08, "loss": 0.05727863311767578, "step": 6784 }, { "epoch": 0.9454469448895702, "grad_norm": 0.6675022840499878, "learning_rate": 8.55425690820244e-08, "loss": 0.0759420394897461, "step": 6785 }, { "epoch": 0.9455862885807845, "grad_norm": 0.6099369525909424, "learning_rate": 8.510902947760469e-08, "loss": 0.07763862609863281, "step": 6786 }, { "epoch": 0.9457256322719989, "grad_norm": 0.6751165986061096, "learning_rate": 8.467658184974914e-08, "loss": 0.06734371185302734, "step": 6787 }, { "epoch": 0.9458649759632133, "grad_norm": 0.8355581760406494, "learning_rate": 8.424522629453924e-08, "loss": 0.06884765625, "step": 6788 }, { "epoch": 0.9460043196544277, "grad_norm": 0.915313184261322, "learning_rate": 8.381496290781055e-08, "loss": 0.11313438415527344, "step": 6789 }, { "epoch": 0.946143663345642, "grad_norm": 0.7492846250534058, "learning_rate": 8.338579178515882e-08, "loss": 0.07694196701049805, "step": 6790 }, { "epoch": 0.9462830070368564, "grad_norm": 0.7432813048362732, "learning_rate": 8.295771302193723e-08, "loss": 0.0767660140991211, "step": 6791 }, { "epoch": 0.9464223507280708, "grad_norm": 0.6076717376708984, "learning_rate": 8.253072671325246e-08, "loss": 0.07083320617675781, "step": 6792 }, { "epoch": 0.9465616944192852, "grad_norm": 0.5740326046943665, "learning_rate": 8.210483295397309e-08, "loss": 0.06432867050170898, "step": 6793 }, { "epoch": 0.9467010381104995, "grad_norm": 0.837683916091919, "learning_rate": 8.168003183872175e-08, "loss": 0.0796213150024414, "step": 6794 }, { "epoch": 0.9468403818017139, "grad_norm": 0.50616854429245, "learning_rate": 8.125632346188073e-08, "loss": 0.04963111877441406, "step": 6795 }, { "epoch": 0.9469797254929283, "grad_norm": 0.7865211367607117, "learning_rate": 8.083370791758804e-08, "loss": 0.06396675109863281, "step": 6796 }, { "epoch": 0.9471190691841427, "grad_norm": 0.7477550506591797, "learning_rate": 8.04121852997386e-08, "loss": 0.0790548324584961, "step": 6797 }, { "epoch": 0.9472584128753571, "grad_norm": 0.6101304292678833, "learning_rate": 7.999175570198526e-08, "loss": 0.06891727447509766, "step": 6798 }, { "epoch": 0.9473977565665714, "grad_norm": 1.1650843620300293, "learning_rate": 7.957241921773828e-08, "loss": 0.10495471954345703, "step": 6799 }, { "epoch": 0.9475371002577858, "grad_norm": 0.8119065761566162, "learning_rate": 7.915417594016428e-08, "loss": 0.08420372009277344, "step": 6800 }, { "epoch": 0.9476764439490002, "grad_norm": 1.063091516494751, "learning_rate": 7.873702596218836e-08, "loss": 0.10959815979003906, "step": 6801 }, { "epoch": 0.9478157876402146, "grad_norm": 0.6252805590629578, "learning_rate": 7.83209693764908e-08, "loss": 0.0699462890625, "step": 6802 }, { "epoch": 0.947955131331429, "grad_norm": 1.6626070737838745, "learning_rate": 7.790600627550937e-08, "loss": 0.10441875457763672, "step": 6803 }, { "epoch": 0.9480944750226433, "grad_norm": 0.6895521283149719, "learning_rate": 7.749213675143974e-08, "loss": 0.08225059509277344, "step": 6804 }, { "epoch": 0.9482338187138577, "grad_norm": 0.735104501247406, "learning_rate": 7.707936089623558e-08, "loss": 0.08326435089111328, "step": 6805 }, { "epoch": 0.9483731624050721, "grad_norm": 0.6706535816192627, "learning_rate": 7.666767880160464e-08, "loss": 0.08792686462402344, "step": 6806 }, { "epoch": 0.9485125060962865, "grad_norm": 0.6677177548408508, "learning_rate": 7.625709055901375e-08, "loss": 0.07213878631591797, "step": 6807 }, { "epoch": 0.9486518497875008, "grad_norm": 0.6268322467803955, "learning_rate": 7.584759625968663e-08, "loss": 0.06574487686157227, "step": 6808 }, { "epoch": 0.9487911934787152, "grad_norm": 0.7600622773170471, "learning_rate": 7.543919599460325e-08, "loss": 0.07490015029907227, "step": 6809 }, { "epoch": 0.9489305371699296, "grad_norm": 0.5864605903625488, "learning_rate": 7.503188985450105e-08, "loss": 0.07779741287231445, "step": 6810 }, { "epoch": 0.949069880861144, "grad_norm": 0.6425720453262329, "learning_rate": 7.462567792987374e-08, "loss": 0.07034015655517578, "step": 6811 }, { "epoch": 0.9492092245523583, "grad_norm": 0.6445058584213257, "learning_rate": 7.422056031097302e-08, "loss": 0.06863880157470703, "step": 6812 }, { "epoch": 0.9493485682435727, "grad_norm": 0.8293919563293457, "learning_rate": 7.381653708780578e-08, "loss": 0.07029438018798828, "step": 6813 }, { "epoch": 0.9494879119347871, "grad_norm": 0.4500960111618042, "learning_rate": 7.341360835013745e-08, "loss": 0.0591282844543457, "step": 6814 }, { "epoch": 0.9496272556260016, "grad_norm": 0.6979815363883972, "learning_rate": 7.301177418748973e-08, "loss": 0.08184623718261719, "step": 6815 }, { "epoch": 0.949766599317216, "grad_norm": 0.7299022078514099, "learning_rate": 7.261103468914066e-08, "loss": 0.071746826171875, "step": 6816 }, { "epoch": 0.9499059430084303, "grad_norm": 0.615136444568634, "learning_rate": 7.221138994412569e-08, "loss": 0.07223331928253174, "step": 6817 }, { "epoch": 0.9500452866996447, "grad_norm": 0.48486706614494324, "learning_rate": 7.181284004123601e-08, "loss": 0.0718374252319336, "step": 6818 }, { "epoch": 0.9501846303908591, "grad_norm": 0.8404608964920044, "learning_rate": 7.14153850690208e-08, "loss": 0.0848379135131836, "step": 6819 }, { "epoch": 0.9503239740820735, "grad_norm": 0.5176748037338257, "learning_rate": 7.101902511578606e-08, "loss": 0.05909156799316406, "step": 6820 }, { "epoch": 0.9504633177732879, "grad_norm": 0.6080252528190613, "learning_rate": 7.062376026959305e-08, "loss": 0.07041072845458984, "step": 6821 }, { "epoch": 0.9506026614645022, "grad_norm": 0.8915818333625793, "learning_rate": 7.022959061826151e-08, "loss": 0.0816659927368164, "step": 6822 }, { "epoch": 0.9507420051557166, "grad_norm": 0.6256545186042786, "learning_rate": 6.983651624936527e-08, "loss": 0.06836414337158203, "step": 6823 }, { "epoch": 0.950881348846931, "grad_norm": 1.0070698261260986, "learning_rate": 6.944453725023836e-08, "loss": 0.0969533920288086, "step": 6824 }, { "epoch": 0.9510206925381454, "grad_norm": 0.7966784238815308, "learning_rate": 6.905365370796891e-08, "loss": 0.06987524032592773, "step": 6825 }, { "epoch": 0.9511600362293597, "grad_norm": 1.2335069179534912, "learning_rate": 6.866386570940132e-08, "loss": 0.10137796401977539, "step": 6826 }, { "epoch": 0.9512993799205741, "grad_norm": 0.7238966822624207, "learning_rate": 6.827517334113965e-08, "loss": 0.07676553726196289, "step": 6827 }, { "epoch": 0.9514387236117885, "grad_norm": 0.664478600025177, "learning_rate": 6.788757668954038e-08, "loss": 0.06343746185302734, "step": 6828 }, { "epoch": 0.9515780673030029, "grad_norm": 0.697077751159668, "learning_rate": 6.750107584071964e-08, "loss": 0.07788848876953125, "step": 6829 }, { "epoch": 0.9517174109942172, "grad_norm": 1.0497163534164429, "learning_rate": 6.711567088054927e-08, "loss": 0.08273553848266602, "step": 6830 }, { "epoch": 0.9518567546854316, "grad_norm": 0.85129314661026, "learning_rate": 6.67313618946569e-08, "loss": 0.09531784057617188, "step": 6831 }, { "epoch": 0.951996098376646, "grad_norm": 0.6229251623153687, "learning_rate": 6.634814896842757e-08, "loss": 0.06885623931884766, "step": 6832 }, { "epoch": 0.9521354420678604, "grad_norm": 1.1952794790267944, "learning_rate": 6.59660321870026e-08, "loss": 0.08007192611694336, "step": 6833 }, { "epoch": 0.9522747857590748, "grad_norm": 0.746506929397583, "learning_rate": 6.558501163527964e-08, "loss": 0.0751333236694336, "step": 6834 }, { "epoch": 0.9524141294502891, "grad_norm": 1.0804924964904785, "learning_rate": 6.520508739791153e-08, "loss": 0.09304046630859375, "step": 6835 }, { "epoch": 0.9525534731415035, "grad_norm": 0.7095690965652466, "learning_rate": 6.482625955931022e-08, "loss": 0.08583450317382812, "step": 6836 }, { "epoch": 0.9526928168327179, "grad_norm": 1.070186734199524, "learning_rate": 6.444852820364222e-08, "loss": 0.10379695892333984, "step": 6837 }, { "epoch": 0.9528321605239323, "grad_norm": 0.7631090879440308, "learning_rate": 6.407189341483044e-08, "loss": 0.08407020568847656, "step": 6838 }, { "epoch": 0.9529715042151466, "grad_norm": 1.1658830642700195, "learning_rate": 6.369635527655515e-08, "loss": 0.07634592056274414, "step": 6839 }, { "epoch": 0.953110847906361, "grad_norm": 0.3957681357860565, "learning_rate": 6.332191387225128e-08, "loss": 0.04680824279785156, "step": 6840 }, { "epoch": 0.9532501915975754, "grad_norm": 0.525948166847229, "learning_rate": 6.294856928511284e-08, "loss": 0.06595134735107422, "step": 6841 }, { "epoch": 0.9533895352887898, "grad_norm": 0.9395543336868286, "learning_rate": 6.257632159808679e-08, "loss": 0.08290815353393555, "step": 6842 }, { "epoch": 0.9535288789800042, "grad_norm": 0.6041657328605652, "learning_rate": 6.220517089387867e-08, "loss": 0.07391738891601562, "step": 6843 }, { "epoch": 0.9536682226712185, "grad_norm": 0.8072631359100342, "learning_rate": 6.183511725495028e-08, "loss": 0.079925537109375, "step": 6844 }, { "epoch": 0.9538075663624329, "grad_norm": 0.9176819920539856, "learning_rate": 6.146616076351864e-08, "loss": 0.11552238464355469, "step": 6845 }, { "epoch": 0.9539469100536473, "grad_norm": 0.6801439523696899, "learning_rate": 6.109830150155705e-08, "loss": 0.06671249866485596, "step": 6846 }, { "epoch": 0.9540862537448617, "grad_norm": 1.3400622606277466, "learning_rate": 6.07315395507957e-08, "loss": 0.1023721694946289, "step": 6847 }, { "epoch": 0.954225597436076, "grad_norm": 0.6392443180084229, "learning_rate": 6.036587499272161e-08, "loss": 0.07237434387207031, "step": 6848 }, { "epoch": 0.9543649411272904, "grad_norm": 0.7779684662818909, "learning_rate": 6.000130790857595e-08, "loss": 0.07904243469238281, "step": 6849 }, { "epoch": 0.9545042848185048, "grad_norm": 0.744404673576355, "learning_rate": 5.963783837935722e-08, "loss": 0.08028125762939453, "step": 6850 }, { "epoch": 0.9546436285097192, "grad_norm": 0.7842335104942322, "learning_rate": 5.927546648582083e-08, "loss": 0.08055973052978516, "step": 6851 }, { "epoch": 0.9547829722009336, "grad_norm": 0.891547679901123, "learning_rate": 5.8914192308476835e-08, "loss": 0.07780098915100098, "step": 6852 }, { "epoch": 0.9549223158921479, "grad_norm": 0.7650583982467651, "learning_rate": 5.855401592759269e-08, "loss": 0.07374095916748047, "step": 6853 }, { "epoch": 0.9550616595833623, "grad_norm": 0.9477375745773315, "learning_rate": 5.8194937423191043e-08, "loss": 0.08200836181640625, "step": 6854 }, { "epoch": 0.9552010032745768, "grad_norm": 0.6684350967407227, "learning_rate": 5.783695687505087e-08, "loss": 0.08111095428466797, "step": 6855 }, { "epoch": 0.9553403469657912, "grad_norm": 0.6281943917274475, "learning_rate": 5.7480074362707415e-08, "loss": 0.06712627410888672, "step": 6856 }, { "epoch": 0.9554796906570056, "grad_norm": 0.5452657341957092, "learning_rate": 5.712428996545172e-08, "loss": 0.07049274444580078, "step": 6857 }, { "epoch": 0.9556190343482199, "grad_norm": 1.1228675842285156, "learning_rate": 5.6769603762331096e-08, "loss": 0.10423088073730469, "step": 6858 }, { "epoch": 0.9557583780394343, "grad_norm": 0.8596356511116028, "learning_rate": 5.641601583214862e-08, "loss": 0.10359668731689453, "step": 6859 }, { "epoch": 0.9558977217306487, "grad_norm": 0.6145489811897278, "learning_rate": 5.606352625346368e-08, "loss": 0.06811225414276123, "step": 6860 }, { "epoch": 0.9560370654218631, "grad_norm": 1.030078649520874, "learning_rate": 5.571213510459084e-08, "loss": 0.10619735717773438, "step": 6861 }, { "epoch": 0.9561764091130774, "grad_norm": 0.8200021982192993, "learning_rate": 5.53618424636021e-08, "loss": 0.08098602294921875, "step": 6862 }, { "epoch": 0.9563157528042918, "grad_norm": 0.6790552139282227, "learning_rate": 5.501264840832299e-08, "loss": 0.08105850219726562, "step": 6863 }, { "epoch": 0.9564550964955062, "grad_norm": 0.7111321687698364, "learning_rate": 5.466455301633811e-08, "loss": 0.07938957214355469, "step": 6864 }, { "epoch": 0.9565944401867206, "grad_norm": 0.930417001247406, "learning_rate": 5.431755636498559e-08, "loss": 0.08314943313598633, "step": 6865 }, { "epoch": 0.956733783877935, "grad_norm": 0.7368358969688416, "learning_rate": 5.3971658531360436e-08, "loss": 0.07335567474365234, "step": 6866 }, { "epoch": 0.9568731275691493, "grad_norm": 1.299312949180603, "learning_rate": 5.362685959231284e-08, "loss": 0.1022939682006836, "step": 6867 }, { "epoch": 0.9570124712603637, "grad_norm": 0.5761035680770874, "learning_rate": 5.3283159624448745e-08, "loss": 0.059546470642089844, "step": 6868 }, { "epoch": 0.9571518149515781, "grad_norm": 1.14421546459198, "learning_rate": 5.294055870413206e-08, "loss": 0.08118009567260742, "step": 6869 }, { "epoch": 0.9572911586427925, "grad_norm": 0.8607847690582275, "learning_rate": 5.2599056907479685e-08, "loss": 0.07737541198730469, "step": 6870 }, { "epoch": 0.9574305023340068, "grad_norm": 0.8587531447410583, "learning_rate": 5.2258654310365366e-08, "loss": 0.08713150024414062, "step": 6871 }, { "epoch": 0.9575698460252212, "grad_norm": 0.5192548632621765, "learning_rate": 5.1919350988419716e-08, "loss": 0.06056499481201172, "step": 6872 }, { "epoch": 0.9577091897164356, "grad_norm": 0.8619792461395264, "learning_rate": 5.1581147017027434e-08, "loss": 0.09712028503417969, "step": 6873 }, { "epoch": 0.95784853340765, "grad_norm": 0.5055266618728638, "learning_rate": 5.124404247133008e-08, "loss": 0.07151985168457031, "step": 6874 }, { "epoch": 0.9579878770988643, "grad_norm": 0.7390970587730408, "learning_rate": 5.090803742622441e-08, "loss": 0.07386112213134766, "step": 6875 }, { "epoch": 0.9581272207900787, "grad_norm": 0.4927445948123932, "learning_rate": 5.057313195636293e-08, "loss": 0.06415939331054688, "step": 6876 }, { "epoch": 0.9582665644812931, "grad_norm": 1.0530611276626587, "learning_rate": 5.0239326136154454e-08, "loss": 0.08645391464233398, "step": 6877 }, { "epoch": 0.9584059081725075, "grad_norm": 0.9792599081993103, "learning_rate": 4.990662003976243e-08, "loss": 0.09067726135253906, "step": 6878 }, { "epoch": 0.9585452518637219, "grad_norm": 0.7776716351509094, "learning_rate": 4.957501374110718e-08, "loss": 0.0861053466796875, "step": 6879 }, { "epoch": 0.9586845955549362, "grad_norm": 0.6652316451072693, "learning_rate": 4.924450731386365e-08, "loss": 0.07381677627563477, "step": 6880 }, { "epoch": 0.9588239392461506, "grad_norm": 0.8395054340362549, "learning_rate": 4.8915100831463116e-08, "loss": 0.08115005493164062, "step": 6881 }, { "epoch": 0.958963282937365, "grad_norm": 0.6504563093185425, "learning_rate": 4.858679436709201e-08, "loss": 0.07933163642883301, "step": 6882 }, { "epoch": 0.9591026266285794, "grad_norm": 0.755500078201294, "learning_rate": 4.825958799369201e-08, "loss": 0.08556079864501953, "step": 6883 }, { "epoch": 0.9592419703197937, "grad_norm": 1.1815056800842285, "learning_rate": 4.7933481783961624e-08, "loss": 0.09115982055664062, "step": 6884 }, { "epoch": 0.9593813140110081, "grad_norm": 0.6495813727378845, "learning_rate": 4.760847581035399e-08, "loss": 0.07354068756103516, "step": 6885 }, { "epoch": 0.9595206577022225, "grad_norm": 0.7096637487411499, "learning_rate": 4.728457014507859e-08, "loss": 0.07711315155029297, "step": 6886 }, { "epoch": 0.9596600013934369, "grad_norm": 0.7591232061386108, "learning_rate": 4.69617648600984e-08, "loss": 0.07174015045166016, "step": 6887 }, { "epoch": 0.9597993450846513, "grad_norm": 0.5628274083137512, "learning_rate": 4.664006002713495e-08, "loss": 0.05811786651611328, "step": 6888 }, { "epoch": 0.9599386887758656, "grad_norm": 0.9388400912284851, "learning_rate": 4.631945571766272e-08, "loss": 0.07894492149353027, "step": 6889 }, { "epoch": 0.96007803246708, "grad_norm": 0.556503415107727, "learning_rate": 4.5999952002912516e-08, "loss": 0.06635856628417969, "step": 6890 }, { "epoch": 0.9602173761582944, "grad_norm": 0.6648526191711426, "learning_rate": 4.5681548953872555e-08, "loss": 0.06662893295288086, "step": 6891 }, { "epoch": 0.9603567198495088, "grad_norm": 0.564328134059906, "learning_rate": 4.536424664128236e-08, "loss": 0.06304454803466797, "step": 6892 }, { "epoch": 0.9604960635407231, "grad_norm": 0.5754169821739197, "learning_rate": 4.504804513564054e-08, "loss": 0.0595473051071167, "step": 6893 }, { "epoch": 0.9606354072319375, "grad_norm": 0.9003597497940063, "learning_rate": 4.473294450719923e-08, "loss": 0.0781545639038086, "step": 6894 }, { "epoch": 0.9607747509231519, "grad_norm": 0.5483642816543579, "learning_rate": 4.441894482596743e-08, "loss": 0.06178569793701172, "step": 6895 }, { "epoch": 0.9609140946143664, "grad_norm": 0.6161083579063416, "learning_rate": 4.410604616170822e-08, "loss": 0.0709381103515625, "step": 6896 }, { "epoch": 0.9610534383055808, "grad_norm": 0.5747891068458557, "learning_rate": 4.379424858394043e-08, "loss": 0.06525230407714844, "step": 6897 }, { "epoch": 0.9611927819967951, "grad_norm": 0.5845580101013184, "learning_rate": 4.348355216193867e-08, "loss": 0.06830215454101562, "step": 6898 }, { "epoch": 0.9613321256880095, "grad_norm": 0.7153400182723999, "learning_rate": 4.3173956964732145e-08, "loss": 0.08409500122070312, "step": 6899 }, { "epoch": 0.9614714693792239, "grad_norm": 0.8295678496360779, "learning_rate": 4.286546306110639e-08, "loss": 0.07614398002624512, "step": 6900 }, { "epoch": 0.9616108130704383, "grad_norm": 0.9563830494880676, "learning_rate": 4.2558070519601594e-08, "loss": 0.0831594467163086, "step": 6901 }, { "epoch": 0.9617501567616527, "grad_norm": 0.8358207941055298, "learning_rate": 4.2251779408513104e-08, "loss": 0.09022712707519531, "step": 6902 }, { "epoch": 0.961889500452867, "grad_norm": 0.6407365202903748, "learning_rate": 4.19465897958915e-08, "loss": 0.08185005187988281, "step": 6903 }, { "epoch": 0.9620288441440814, "grad_norm": 0.5827127695083618, "learning_rate": 4.164250174954365e-08, "loss": 0.07674407958984375, "step": 6904 }, { "epoch": 0.9621681878352958, "grad_norm": 0.5736609697341919, "learning_rate": 4.133951533703107e-08, "loss": 0.06502103805541992, "step": 6905 }, { "epoch": 0.9623075315265102, "grad_norm": 0.5167704820632935, "learning_rate": 4.1037630625669345e-08, "loss": 0.05892133712768555, "step": 6906 }, { "epoch": 0.9624468752177245, "grad_norm": 0.753471851348877, "learning_rate": 4.07368476825315e-08, "loss": 0.07181262969970703, "step": 6907 }, { "epoch": 0.9625862189089389, "grad_norm": 0.6254007816314697, "learning_rate": 4.043716657444407e-08, "loss": 0.07445526123046875, "step": 6908 }, { "epoch": 0.9627255626001533, "grad_norm": 0.8264257907867432, "learning_rate": 4.0138587367989365e-08, "loss": 0.07985877990722656, "step": 6909 }, { "epoch": 0.9628649062913677, "grad_norm": 0.5141006708145142, "learning_rate": 3.984111012950487e-08, "loss": 0.0685873031616211, "step": 6910 }, { "epoch": 0.963004249982582, "grad_norm": 1.098280906677246, "learning_rate": 3.9544734925083264e-08, "loss": 0.07793807983398438, "step": 6911 }, { "epoch": 0.9631435936737964, "grad_norm": 0.6193368434906006, "learning_rate": 3.924946182057299e-08, "loss": 0.06600761413574219, "step": 6912 }, { "epoch": 0.9632829373650108, "grad_norm": 1.027862548828125, "learning_rate": 3.8955290881576566e-08, "loss": 0.0799551010131836, "step": 6913 }, { "epoch": 0.9634222810562252, "grad_norm": 1.1644489765167236, "learning_rate": 3.866222217345117e-08, "loss": 0.09900951385498047, "step": 6914 }, { "epoch": 0.9635616247474396, "grad_norm": 0.9522944092750549, "learning_rate": 3.837025576131137e-08, "loss": 0.09975624084472656, "step": 6915 }, { "epoch": 0.9637009684386539, "grad_norm": 0.7757484912872314, "learning_rate": 3.807939171002473e-08, "loss": 0.08406925201416016, "step": 6916 }, { "epoch": 0.9638403121298683, "grad_norm": 0.8489139676094055, "learning_rate": 3.778963008421455e-08, "loss": 0.07537460327148438, "step": 6917 }, { "epoch": 0.9639796558210827, "grad_norm": 0.878241240978241, "learning_rate": 3.750097094825933e-08, "loss": 0.07602787017822266, "step": 6918 }, { "epoch": 0.9641189995122971, "grad_norm": 0.647854745388031, "learning_rate": 3.721341436629222e-08, "loss": 0.0667123794555664, "step": 6919 }, { "epoch": 0.9642583432035114, "grad_norm": 0.5103780031204224, "learning_rate": 3.6926960402202674e-08, "loss": 0.06589794158935547, "step": 6920 }, { "epoch": 0.9643976868947258, "grad_norm": 0.9084089398384094, "learning_rate": 3.66416091196331e-08, "loss": 0.08495283126831055, "step": 6921 }, { "epoch": 0.9645370305859402, "grad_norm": 0.9176865220069885, "learning_rate": 3.63573605819828e-08, "loss": 0.10028743743896484, "step": 6922 }, { "epoch": 0.9646763742771546, "grad_norm": 0.86774742603302, "learning_rate": 3.6074214852405695e-08, "loss": 0.104949951171875, "step": 6923 }, { "epoch": 0.964815717968369, "grad_norm": 1.0806337594985962, "learning_rate": 3.5792171993809244e-08, "loss": 0.087005615234375, "step": 6924 }, { "epoch": 0.9649550616595833, "grad_norm": 0.6522619128227234, "learning_rate": 3.55112320688572e-08, "loss": 0.060422658920288086, "step": 6925 }, { "epoch": 0.9650944053507977, "grad_norm": 0.7914857864379883, "learning_rate": 3.523139513996798e-08, "loss": 0.0849313735961914, "step": 6926 }, { "epoch": 0.9652337490420121, "grad_norm": 0.7522597312927246, "learning_rate": 3.495266126931574e-08, "loss": 0.07480144500732422, "step": 6927 }, { "epoch": 0.9653730927332265, "grad_norm": 0.4958910048007965, "learning_rate": 3.467503051882815e-08, "loss": 0.06444454193115234, "step": 6928 }, { "epoch": 0.9655124364244408, "grad_norm": 0.6250443458557129, "learning_rate": 3.4398502950188096e-08, "loss": 0.0707693099975586, "step": 6929 }, { "epoch": 0.9656517801156552, "grad_norm": 1.1750926971435547, "learning_rate": 3.4123078624834214e-08, "loss": 0.10126495361328125, "step": 6930 }, { "epoch": 0.9657911238068696, "grad_norm": 0.5487766861915588, "learning_rate": 3.384875760395978e-08, "loss": 0.07576370239257812, "step": 6931 }, { "epoch": 0.965930467498084, "grad_norm": 0.7350956797599792, "learning_rate": 3.3575539948511595e-08, "loss": 0.08954715728759766, "step": 6932 }, { "epoch": 0.9660698111892984, "grad_norm": 0.763698935508728, "learning_rate": 3.330342571919332e-08, "loss": 0.08588790893554688, "step": 6933 }, { "epoch": 0.9662091548805127, "grad_norm": 1.1029212474822998, "learning_rate": 3.30324149764627e-08, "loss": 0.08576297760009766, "step": 6934 }, { "epoch": 0.9663484985717271, "grad_norm": 0.8322984576225281, "learning_rate": 3.2762507780531026e-08, "loss": 0.0832834243774414, "step": 6935 }, { "epoch": 0.9664878422629416, "grad_norm": 0.8150172829627991, "learning_rate": 3.249370419136644e-08, "loss": 0.09591364860534668, "step": 6936 }, { "epoch": 0.966627185954156, "grad_norm": 0.9421684145927429, "learning_rate": 3.2226004268690605e-08, "loss": 0.0979461669921875, "step": 6937 }, { "epoch": 0.9667665296453704, "grad_norm": 0.7426514029502869, "learning_rate": 3.195940807198039e-08, "loss": 0.06956815719604492, "step": 6938 }, { "epoch": 0.9669058733365847, "grad_norm": 0.9489829540252686, "learning_rate": 3.169391566046731e-08, "loss": 0.09865951538085938, "step": 6939 }, { "epoch": 0.9670452170277991, "grad_norm": 0.5040756464004517, "learning_rate": 3.142952709313807e-08, "loss": 0.058265089988708496, "step": 6940 }, { "epoch": 0.9671845607190135, "grad_norm": 0.7330237030982971, "learning_rate": 3.116624242873345e-08, "loss": 0.06822872161865234, "step": 6941 }, { "epoch": 0.9673239044102279, "grad_norm": 0.582054078578949, "learning_rate": 3.090406172574889e-08, "loss": 0.07623672485351562, "step": 6942 }, { "epoch": 0.9674632481014422, "grad_norm": 1.0321106910705566, "learning_rate": 3.064298504243612e-08, "loss": 0.08299636840820312, "step": 6943 }, { "epoch": 0.9676025917926566, "grad_norm": 0.9764934778213501, "learning_rate": 3.0383012436799306e-08, "loss": 0.06713485717773438, "step": 6944 }, { "epoch": 0.967741935483871, "grad_norm": 0.7733967900276184, "learning_rate": 3.0124143966599464e-08, "loss": 0.08631610870361328, "step": 6945 }, { "epoch": 0.9678812791750854, "grad_norm": 0.563905656337738, "learning_rate": 2.9866379689350024e-08, "loss": 0.07074546813964844, "step": 6946 }, { "epoch": 0.9680206228662998, "grad_norm": 1.0094231367111206, "learning_rate": 2.9609719662320735e-08, "loss": 0.0880594253540039, "step": 6947 }, { "epoch": 0.9681599665575141, "grad_norm": 0.7976794838905334, "learning_rate": 2.9354163942535983e-08, "loss": 0.07566261291503906, "step": 6948 }, { "epoch": 0.9682993102487285, "grad_norm": 0.788666307926178, "learning_rate": 2.90997125867748e-08, "loss": 0.08048439025878906, "step": 6949 }, { "epoch": 0.9684386539399429, "grad_norm": 1.2793079614639282, "learning_rate": 2.8846365651569175e-08, "loss": 0.0899806022644043, "step": 6950 }, { "epoch": 0.9685779976311573, "grad_norm": 0.8937751650810242, "learning_rate": 2.8594123193207978e-08, "loss": 0.09292316436767578, "step": 6951 }, { "epoch": 0.9687173413223716, "grad_norm": 0.5282196998596191, "learning_rate": 2.83429852677336e-08, "loss": 0.06443214416503906, "step": 6952 }, { "epoch": 0.968856685013586, "grad_norm": 0.5749204158782959, "learning_rate": 2.809295193094308e-08, "loss": 0.07366561889648438, "step": 6953 }, { "epoch": 0.9689960287048004, "grad_norm": 0.9367205500602722, "learning_rate": 2.7844023238388084e-08, "loss": 0.08690738677978516, "step": 6954 }, { "epoch": 0.9691353723960148, "grad_norm": 1.0238479375839233, "learning_rate": 2.759619924537438e-08, "loss": 0.08582782745361328, "step": 6955 }, { "epoch": 0.9692747160872291, "grad_norm": 0.6095354557037354, "learning_rate": 2.7349480006964023e-08, "loss": 0.06223583221435547, "step": 6956 }, { "epoch": 0.9694140597784435, "grad_norm": 0.7139575481414795, "learning_rate": 2.7103865577970955e-08, "loss": 0.07978630065917969, "step": 6957 }, { "epoch": 0.9695534034696579, "grad_norm": 0.6479682326316833, "learning_rate": 2.6859356012965964e-08, "loss": 0.07505130767822266, "step": 6958 }, { "epoch": 0.9696927471608723, "grad_norm": 0.5322152972221375, "learning_rate": 2.661595136627393e-08, "loss": 0.07384014129638672, "step": 6959 }, { "epoch": 0.9698320908520867, "grad_norm": 0.5581799745559692, "learning_rate": 2.63736516919727e-08, "loss": 0.06542491912841797, "step": 6960 }, { "epoch": 0.969971434543301, "grad_norm": 0.7736369371414185, "learning_rate": 2.6132457043896442e-08, "loss": 0.06895732879638672, "step": 6961 }, { "epoch": 0.9701107782345154, "grad_norm": 0.6474936008453369, "learning_rate": 2.589236747563284e-08, "loss": 0.07237482070922852, "step": 6962 }, { "epoch": 0.9702501219257298, "grad_norm": 0.446162611246109, "learning_rate": 2.5653383040524228e-08, "loss": 0.06293678283691406, "step": 6963 }, { "epoch": 0.9703894656169442, "grad_norm": 0.6575921773910522, "learning_rate": 2.5415503791667573e-08, "loss": 0.06895256042480469, "step": 6964 }, { "epoch": 0.9705288093081585, "grad_norm": 0.9774541854858398, "learning_rate": 2.5178729781915046e-08, "loss": 0.08483219146728516, "step": 6965 }, { "epoch": 0.9706681529993729, "grad_norm": 0.7549224495887756, "learning_rate": 2.4943061063870678e-08, "loss": 0.09451770782470703, "step": 6966 }, { "epoch": 0.9708074966905873, "grad_norm": 0.6284815669059753, "learning_rate": 2.4708497689896472e-08, "loss": 0.07366418838500977, "step": 6967 }, { "epoch": 0.9709468403818017, "grad_norm": 0.7088721394538879, "learning_rate": 2.4475039712105742e-08, "loss": 0.06963825225830078, "step": 6968 }, { "epoch": 0.9710861840730161, "grad_norm": 0.5819746255874634, "learning_rate": 2.4242687182368106e-08, "loss": 0.059332847595214844, "step": 6969 }, { "epoch": 0.9712255277642304, "grad_norm": 0.6061080098152161, "learning_rate": 2.401144015230672e-08, "loss": 0.07194805145263672, "step": 6970 }, { "epoch": 0.9713648714554448, "grad_norm": 0.9951839447021484, "learning_rate": 2.3781298673299924e-08, "loss": 0.08570098876953125, "step": 6971 }, { "epoch": 0.9715042151466592, "grad_norm": 0.738382875919342, "learning_rate": 2.3552262796479042e-08, "loss": 0.07488203048706055, "step": 6972 }, { "epoch": 0.9716435588378736, "grad_norm": 0.5223232507705688, "learning_rate": 2.33243325727317e-08, "loss": 0.06644439697265625, "step": 6973 }, { "epoch": 0.9717829025290879, "grad_norm": 0.5188778638839722, "learning_rate": 2.3097508052697948e-08, "loss": 0.05883622169494629, "step": 6974 }, { "epoch": 0.9719222462203023, "grad_norm": 0.9262756705284119, "learning_rate": 2.2871789286773582e-08, "loss": 0.07623481750488281, "step": 6975 }, { "epoch": 0.9720615899115168, "grad_norm": 0.7955157160758972, "learning_rate": 2.264717632510738e-08, "loss": 0.07256269454956055, "step": 6976 }, { "epoch": 0.9722009336027312, "grad_norm": 0.6546200513839722, "learning_rate": 2.2423669217604415e-08, "loss": 0.08033466339111328, "step": 6977 }, { "epoch": 0.9723402772939456, "grad_norm": 1.0305616855621338, "learning_rate": 2.220126801392164e-08, "loss": 0.07706356048583984, "step": 6978 }, { "epoch": 0.9724796209851599, "grad_norm": 0.8728235363960266, "learning_rate": 2.1979972763471747e-08, "loss": 0.08515715599060059, "step": 6979 }, { "epoch": 0.9726189646763743, "grad_norm": 1.0091131925582886, "learning_rate": 2.1759783515422074e-08, "loss": 0.1045389175415039, "step": 6980 }, { "epoch": 0.9727583083675887, "grad_norm": 0.6281814575195312, "learning_rate": 2.1540700318693487e-08, "loss": 0.07844257354736328, "step": 6981 }, { "epoch": 0.9728976520588031, "grad_norm": 0.6373725533485413, "learning_rate": 2.132272322196094e-08, "loss": 0.07988691329956055, "step": 6982 }, { "epoch": 0.9730369957500175, "grad_norm": 1.1237390041351318, "learning_rate": 2.110585227365458e-08, "loss": 0.10843658447265625, "step": 6983 }, { "epoch": 0.9731763394412318, "grad_norm": 0.7665810585021973, "learning_rate": 2.0890087521957536e-08, "loss": 0.07684707641601562, "step": 6984 }, { "epoch": 0.9733156831324462, "grad_norm": 0.7808327674865723, "learning_rate": 2.0675429014807568e-08, "loss": 0.06803417205810547, "step": 6985 }, { "epoch": 0.9734550268236606, "grad_norm": 0.602837085723877, "learning_rate": 2.0461876799898196e-08, "loss": 0.07094478607177734, "step": 6986 }, { "epoch": 0.973594370514875, "grad_norm": 0.9852510094642639, "learning_rate": 2.024943092467424e-08, "loss": 0.0832834243774414, "step": 6987 }, { "epoch": 0.9737337142060893, "grad_norm": 0.6901596188545227, "learning_rate": 2.0038091436337392e-08, "loss": 0.07187175750732422, "step": 6988 }, { "epoch": 0.9738730578973037, "grad_norm": 1.3053021430969238, "learning_rate": 1.9827858381842312e-08, "loss": 0.09153270721435547, "step": 6989 }, { "epoch": 0.9740124015885181, "grad_norm": 0.6823211312294006, "learning_rate": 1.961873180789775e-08, "loss": 0.06938028335571289, "step": 6990 }, { "epoch": 0.9741517452797325, "grad_norm": 0.4484309256076813, "learning_rate": 1.9410711760967092e-08, "loss": 0.060181617736816406, "step": 6991 }, { "epoch": 0.9742910889709469, "grad_norm": 0.5735949277877808, "learning_rate": 1.920379828726726e-08, "loss": 0.06182384490966797, "step": 6992 }, { "epoch": 0.9744304326621612, "grad_norm": 1.1928569078445435, "learning_rate": 1.8997991432769812e-08, "loss": 0.086334228515625, "step": 6993 }, { "epoch": 0.9745697763533756, "grad_norm": 0.7439845204353333, "learning_rate": 1.8793291243200396e-08, "loss": 0.07935810089111328, "step": 6994 }, { "epoch": 0.97470912004459, "grad_norm": 0.8683624863624573, "learning_rate": 1.8589697764039295e-08, "loss": 0.07682943344116211, "step": 6995 }, { "epoch": 0.9748484637358044, "grad_norm": 0.9363491535186768, "learning_rate": 1.8387211040519216e-08, "loss": 0.08452320098876953, "step": 6996 }, { "epoch": 0.9749878074270187, "grad_norm": 0.5608766674995422, "learning_rate": 1.818583111762917e-08, "loss": 0.0625143051147461, "step": 6997 }, { "epoch": 0.9751271511182331, "grad_norm": 0.649458646774292, "learning_rate": 1.7985558040110594e-08, "loss": 0.07387351989746094, "step": 6998 }, { "epoch": 0.9752664948094475, "grad_norm": 0.937920868396759, "learning_rate": 1.778639185245956e-08, "loss": 0.08729410171508789, "step": 6999 }, { "epoch": 0.9754058385006619, "grad_norm": 0.5871910452842712, "learning_rate": 1.758833259892623e-08, "loss": 0.06028604507446289, "step": 7000 }, { "epoch": 0.9755451821918762, "grad_norm": 0.705720067024231, "learning_rate": 1.7391380323515395e-08, "loss": 0.06153416633605957, "step": 7001 }, { "epoch": 0.9756845258830906, "grad_norm": 0.9007208347320557, "learning_rate": 1.7195535069984838e-08, "loss": 0.07739591598510742, "step": 7002 }, { "epoch": 0.975823869574305, "grad_norm": 0.8337767124176025, "learning_rate": 1.700079688184697e-08, "loss": 0.09203261137008667, "step": 7003 }, { "epoch": 0.9759632132655194, "grad_norm": 0.8313201665878296, "learning_rate": 1.6807165802368297e-08, "loss": 0.08193761110305786, "step": 7004 }, { "epoch": 0.9761025569567338, "grad_norm": 0.4919494092464447, "learning_rate": 1.661464187456885e-08, "loss": 0.06921195983886719, "step": 7005 }, { "epoch": 0.9762419006479481, "grad_norm": 0.7295534610748291, "learning_rate": 1.6423225141223854e-08, "loss": 0.08719110488891602, "step": 7006 }, { "epoch": 0.9763812443391625, "grad_norm": 0.4684945344924927, "learning_rate": 1.623291564486096e-08, "loss": 0.05953407287597656, "step": 7007 }, { "epoch": 0.9765205880303769, "grad_norm": 0.7005287408828735, "learning_rate": 1.604371342776301e-08, "loss": 0.0786895751953125, "step": 7008 }, { "epoch": 0.9766599317215913, "grad_norm": 0.7521197199821472, "learning_rate": 1.585561853196582e-08, "loss": 0.06503677368164062, "step": 7009 }, { "epoch": 0.9767992754128056, "grad_norm": 0.7953388690948486, "learning_rate": 1.5668630999260968e-08, "loss": 0.07849645614624023, "step": 7010 }, { "epoch": 0.97693861910402, "grad_norm": 0.9585293531417847, "learning_rate": 1.5482750871191333e-08, "loss": 0.07557010650634766, "step": 7011 }, { "epoch": 0.9770779627952344, "grad_norm": 0.7539153695106506, "learning_rate": 1.529797818905665e-08, "loss": 0.09837198257446289, "step": 7012 }, { "epoch": 0.9772173064864488, "grad_norm": 0.5945495963096619, "learning_rate": 1.5114312993908532e-08, "loss": 0.07268333435058594, "step": 7013 }, { "epoch": 0.9773566501776632, "grad_norm": 0.6687086224555969, "learning_rate": 1.4931755326552667e-08, "loss": 0.07530403137207031, "step": 7014 }, { "epoch": 0.9774959938688775, "grad_norm": 0.7187106013298035, "learning_rate": 1.4750305227549943e-08, "loss": 0.07860565185546875, "step": 7015 }, { "epoch": 0.977635337560092, "grad_norm": 1.0062434673309326, "learning_rate": 1.4569962737214228e-08, "loss": 0.08486652374267578, "step": 7016 }, { "epoch": 0.9777746812513064, "grad_norm": 1.1412144899368286, "learning_rate": 1.4390727895613465e-08, "loss": 0.09517288208007812, "step": 7017 }, { "epoch": 0.9779140249425208, "grad_norm": 0.9606285691261292, "learning_rate": 1.4212600742569694e-08, "loss": 0.08356285095214844, "step": 7018 }, { "epoch": 0.9780533686337352, "grad_norm": 0.47216472029685974, "learning_rate": 1.4035581317658476e-08, "loss": 0.06647729873657227, "step": 7019 }, { "epoch": 0.9781927123249495, "grad_norm": 1.2576841115951538, "learning_rate": 1.3859669660209463e-08, "loss": 0.11114311218261719, "step": 7020 }, { "epoch": 0.9783320560161639, "grad_norm": 1.0230917930603027, "learning_rate": 1.368486580930639e-08, "loss": 0.08016490936279297, "step": 7021 }, { "epoch": 0.9784713997073783, "grad_norm": 0.6720202565193176, "learning_rate": 1.3511169803786527e-08, "loss": 0.07710933685302734, "step": 7022 }, { "epoch": 0.9786107433985927, "grad_norm": 0.9111685156822205, "learning_rate": 1.333858168224178e-08, "loss": 0.0773172378540039, "step": 7023 }, { "epoch": 0.978750087089807, "grad_norm": 0.5193610787391663, "learning_rate": 1.3167101483016476e-08, "loss": 0.07371997833251953, "step": 7024 }, { "epoch": 0.9788894307810214, "grad_norm": 0.7738408446311951, "learning_rate": 1.2996729244209583e-08, "loss": 0.05933427810668945, "step": 7025 }, { "epoch": 0.9790287744722358, "grad_norm": 0.6284339427947998, "learning_rate": 1.282746500367471e-08, "loss": 0.06973791122436523, "step": 7026 }, { "epoch": 0.9791681181634502, "grad_norm": 0.822594940662384, "learning_rate": 1.2659308799017889e-08, "loss": 0.1002340316772461, "step": 7027 }, { "epoch": 0.9793074618546646, "grad_norm": 0.7081766128540039, "learning_rate": 1.2492260667599232e-08, "loss": 0.07538032531738281, "step": 7028 }, { "epoch": 0.9794468055458789, "grad_norm": 0.7061092853546143, "learning_rate": 1.2326320646534051e-08, "loss": 0.08216094970703125, "step": 7029 }, { "epoch": 0.9795861492370933, "grad_norm": 1.0956547260284424, "learning_rate": 1.2161488772690077e-08, "loss": 0.08469390869140625, "step": 7030 }, { "epoch": 0.9797254929283077, "grad_norm": 0.7502949237823486, "learning_rate": 1.1997765082688573e-08, "loss": 0.08013772964477539, "step": 7031 }, { "epoch": 0.9798648366195221, "grad_norm": 0.7815611958503723, "learning_rate": 1.1835149612905438e-08, "loss": 0.08143234252929688, "step": 7032 }, { "epoch": 0.9800041803107364, "grad_norm": 0.6188247799873352, "learning_rate": 1.1673642399470663e-08, "loss": 0.0744009017944336, "step": 7033 }, { "epoch": 0.9801435240019508, "grad_norm": 0.6593865752220154, "learning_rate": 1.1513243478267211e-08, "loss": 0.0668792724609375, "step": 7034 }, { "epoch": 0.9802828676931652, "grad_norm": 0.8141356706619263, "learning_rate": 1.135395288493213e-08, "loss": 0.0885610580444336, "step": 7035 }, { "epoch": 0.9804222113843796, "grad_norm": 0.604856014251709, "learning_rate": 1.1195770654855443e-08, "loss": 0.0649728775024414, "step": 7036 }, { "epoch": 0.980561555075594, "grad_norm": 0.6193715929985046, "learning_rate": 1.1038696823182372e-08, "loss": 0.06811904907226562, "step": 7037 }, { "epoch": 0.9807008987668083, "grad_norm": 0.589553713798523, "learning_rate": 1.088273142481111e-08, "loss": 0.07267951965332031, "step": 7038 }, { "epoch": 0.9808402424580227, "grad_norm": 0.5928145051002502, "learning_rate": 1.0727874494393386e-08, "loss": 0.06374955177307129, "step": 7039 }, { "epoch": 0.9809795861492371, "grad_norm": 0.6418859958648682, "learning_rate": 1.0574126066335011e-08, "loss": 0.06899356842041016, "step": 7040 }, { "epoch": 0.9811189298404515, "grad_norm": 0.6150520443916321, "learning_rate": 1.0421486174795326e-08, "loss": 0.0752401351928711, "step": 7041 }, { "epoch": 0.9812582735316658, "grad_norm": 0.7084766626358032, "learning_rate": 1.0269954853687202e-08, "loss": 0.06659412384033203, "step": 7042 }, { "epoch": 0.9813976172228802, "grad_norm": 0.5667166709899902, "learning_rate": 1.01195321366776e-08, "loss": 0.068692147731781, "step": 7043 }, { "epoch": 0.9815369609140946, "grad_norm": 0.5870152115821838, "learning_rate": 9.970218057187009e-09, "loss": 0.06676673889160156, "step": 7044 }, { "epoch": 0.981676304605309, "grad_norm": 0.7268147468566895, "learning_rate": 9.82201264839e-09, "loss": 0.0728464126586914, "step": 7045 }, { "epoch": 0.9818156482965233, "grad_norm": 0.601842999458313, "learning_rate": 9.67491594321357e-09, "loss": 0.06573867797851562, "step": 7046 }, { "epoch": 0.9819549919877377, "grad_norm": 0.5823180675506592, "learning_rate": 9.528927974339908e-09, "loss": 0.07109546661376953, "step": 7047 }, { "epoch": 0.9820943356789521, "grad_norm": 0.5751903653144836, "learning_rate": 9.38404877420418e-09, "loss": 0.08239459991455078, "step": 7048 }, { "epoch": 0.9822336793701665, "grad_norm": 0.5486648678779602, "learning_rate": 9.240278374995637e-09, "loss": 0.06051826477050781, "step": 7049 }, { "epoch": 0.9823730230613809, "grad_norm": 1.0774755477905273, "learning_rate": 9.097616808655396e-09, "loss": 0.106475830078125, "step": 7050 }, { "epoch": 0.9825123667525952, "grad_norm": 0.9343611598014832, "learning_rate": 8.95606410688088e-09, "loss": 0.07734012603759766, "step": 7051 }, { "epoch": 0.9826517104438096, "grad_norm": 0.5385139584541321, "learning_rate": 8.815620301121375e-09, "loss": 0.06683707237243652, "step": 7052 }, { "epoch": 0.982791054135024, "grad_norm": 0.7242765426635742, "learning_rate": 8.676285422580255e-09, "loss": 0.07166147232055664, "step": 7053 }, { "epoch": 0.9829303978262384, "grad_norm": 0.7462858557701111, "learning_rate": 8.538059502214979e-09, "loss": 0.0694127082824707, "step": 7054 }, { "epoch": 0.9830697415174527, "grad_norm": 0.568528950214386, "learning_rate": 8.400942570735427e-09, "loss": 0.06828022003173828, "step": 7055 }, { "epoch": 0.9832090852086672, "grad_norm": 0.6584334373474121, "learning_rate": 8.264934658606672e-09, "loss": 0.07877826690673828, "step": 7056 }, { "epoch": 0.9833484288998816, "grad_norm": 0.5383577942848206, "learning_rate": 8.13003579604621e-09, "loss": 0.07069206237792969, "step": 7057 }, { "epoch": 0.983487772591096, "grad_norm": 0.5817012786865234, "learning_rate": 7.996246013025067e-09, "loss": 0.06399106979370117, "step": 7058 }, { "epoch": 0.9836271162823104, "grad_norm": 0.7541501522064209, "learning_rate": 7.863565339268908e-09, "loss": 0.08986568450927734, "step": 7059 }, { "epoch": 0.9837664599735247, "grad_norm": 0.6908285021781921, "learning_rate": 7.731993804256378e-09, "loss": 0.06427860260009766, "step": 7060 }, { "epoch": 0.9839058036647391, "grad_norm": 0.4889752268791199, "learning_rate": 7.60153143721909e-09, "loss": 0.06465721130371094, "step": 7061 }, { "epoch": 0.9840451473559535, "grad_norm": 0.9111093878746033, "learning_rate": 7.472178267143304e-09, "loss": 0.09563255310058594, "step": 7062 }, { "epoch": 0.9841844910471679, "grad_norm": 0.8368816375732422, "learning_rate": 7.343934322767699e-09, "loss": 0.08932304382324219, "step": 7063 }, { "epoch": 0.9843238347383823, "grad_norm": 0.7816336750984192, "learning_rate": 7.216799632586147e-09, "loss": 0.07808113098144531, "step": 7064 }, { "epoch": 0.9844631784295966, "grad_norm": 0.4652710258960724, "learning_rate": 7.0907742248443875e-09, "loss": 0.05704450607299805, "step": 7065 }, { "epoch": 0.984602522120811, "grad_norm": 0.6252948641777039, "learning_rate": 6.965858127542247e-09, "loss": 0.0717782974243164, "step": 7066 }, { "epoch": 0.9847418658120254, "grad_norm": 0.6314489245414734, "learning_rate": 6.842051368433633e-09, "loss": 0.07418417930603027, "step": 7067 }, { "epoch": 0.9848812095032398, "grad_norm": 0.6721243858337402, "learning_rate": 6.719353975025989e-09, "loss": 0.07214021682739258, "step": 7068 }, { "epoch": 0.9850205531944541, "grad_norm": 0.7139331102371216, "learning_rate": 6.5977659745786185e-09, "loss": 0.07315444946289062, "step": 7069 }, { "epoch": 0.9851598968856685, "grad_norm": 0.7386248707771301, "learning_rate": 6.477287394107134e-09, "loss": 0.08190727233886719, "step": 7070 }, { "epoch": 0.9852992405768829, "grad_norm": 0.9663850665092468, "learning_rate": 6.357918260377349e-09, "loss": 0.11746406555175781, "step": 7071 }, { "epoch": 0.9854385842680973, "grad_norm": 0.46457192301750183, "learning_rate": 6.239658599911935e-09, "loss": 0.05842113494873047, "step": 7072 }, { "epoch": 0.9855779279593117, "grad_norm": 0.7893351316452026, "learning_rate": 6.122508438984875e-09, "loss": 0.07049751281738281, "step": 7073 }, { "epoch": 0.985717271650526, "grad_norm": 0.9341877698898315, "learning_rate": 6.0064678036242385e-09, "loss": 0.09244155883789062, "step": 7074 }, { "epoch": 0.9858566153417404, "grad_norm": 0.5776981115341187, "learning_rate": 5.891536719611624e-09, "loss": 0.06969451904296875, "step": 7075 }, { "epoch": 0.9859959590329548, "grad_norm": 0.8479294180870056, "learning_rate": 5.77771521248216e-09, "loss": 0.08737564086914062, "step": 7076 }, { "epoch": 0.9861353027241692, "grad_norm": 0.7534261345863342, "learning_rate": 5.665003307524508e-09, "loss": 0.08063888549804688, "step": 7077 }, { "epoch": 0.9862746464153835, "grad_norm": 0.4107084572315216, "learning_rate": 5.5534010297803034e-09, "loss": 0.055657386779785156, "step": 7078 }, { "epoch": 0.9864139901065979, "grad_norm": 0.7576947212219238, "learning_rate": 5.4429084040452665e-09, "loss": 0.07729244232177734, "step": 7079 }, { "epoch": 0.9865533337978123, "grad_norm": 0.5414098501205444, "learning_rate": 5.333525454868094e-09, "loss": 0.07260894775390625, "step": 7080 }, { "epoch": 0.9866926774890267, "grad_norm": 0.6041224002838135, "learning_rate": 5.225252206551568e-09, "loss": 0.06664180755615234, "step": 7081 }, { "epoch": 0.986832021180241, "grad_norm": 0.8184123635292053, "learning_rate": 5.118088683151445e-09, "loss": 0.0759267807006836, "step": 7082 }, { "epoch": 0.9869713648714554, "grad_norm": 1.0665173530578613, "learning_rate": 5.01203490847646e-09, "loss": 0.08448219299316406, "step": 7083 }, { "epoch": 0.9871107085626698, "grad_norm": 0.7365670204162598, "learning_rate": 4.907090906090539e-09, "loss": 0.0832977294921875, "step": 7084 }, { "epoch": 0.9872500522538842, "grad_norm": 0.7079958915710449, "learning_rate": 4.803256699308923e-09, "loss": 0.0590970516204834, "step": 7085 }, { "epoch": 0.9873893959450986, "grad_norm": 0.7212856411933899, "learning_rate": 4.700532311200934e-09, "loss": 0.08665084838867188, "step": 7086 }, { "epoch": 0.9875287396363129, "grad_norm": 1.3077654838562012, "learning_rate": 4.598917764590538e-09, "loss": 0.08372879028320312, "step": 7087 }, { "epoch": 0.9876680833275273, "grad_norm": 0.6002435684204102, "learning_rate": 4.498413082053566e-09, "loss": 0.07203912734985352, "step": 7088 }, { "epoch": 0.9878074270187417, "grad_norm": 0.8493081331253052, "learning_rate": 4.399018285919376e-09, "loss": 0.0784139633178711, "step": 7089 }, { "epoch": 0.9879467707099561, "grad_norm": 0.7931269407272339, "learning_rate": 4.300733398272528e-09, "loss": 0.08131790161132812, "step": 7090 }, { "epoch": 0.9880861144011704, "grad_norm": 0.9139642715454102, "learning_rate": 4.203558440948885e-09, "loss": 0.09442424774169922, "step": 7091 }, { "epoch": 0.9882254580923848, "grad_norm": 0.670638918876648, "learning_rate": 4.1074934355384015e-09, "loss": 0.07040953636169434, "step": 7092 }, { "epoch": 0.9883648017835992, "grad_norm": 0.5589780211448669, "learning_rate": 4.0125384033845586e-09, "loss": 0.07326698303222656, "step": 7093 }, { "epoch": 0.9885041454748136, "grad_norm": 0.5767187476158142, "learning_rate": 3.91869336558437e-09, "loss": 0.0833425521850586, "step": 7094 }, { "epoch": 0.988643489166028, "grad_norm": 1.0773561000823975, "learning_rate": 3.8259583429883785e-09, "loss": 0.06785297393798828, "step": 7095 }, { "epoch": 0.9887828328572423, "grad_norm": 0.526212751865387, "learning_rate": 3.734333356199548e-09, "loss": 0.060213327407836914, "step": 7096 }, { "epoch": 0.9889221765484568, "grad_norm": 0.6322941780090332, "learning_rate": 3.643818425575485e-09, "loss": 0.06239032745361328, "step": 7097 }, { "epoch": 0.9890615202396712, "grad_norm": 0.9154913425445557, "learning_rate": 3.5544135712262116e-09, "loss": 0.09370994567871094, "step": 7098 }, { "epoch": 0.9892008639308856, "grad_norm": 0.7037792801856995, "learning_rate": 3.4661188130147295e-09, "loss": 0.07532024383544922, "step": 7099 }, { "epoch": 0.9893402076221, "grad_norm": 0.7764983773231506, "learning_rate": 3.378934170559789e-09, "loss": 0.08242321014404297, "step": 7100 }, { "epoch": 0.9894795513133143, "grad_norm": 0.8191845417022705, "learning_rate": 3.292859663230341e-09, "loss": 0.06479501724243164, "step": 7101 }, { "epoch": 0.9896188950045287, "grad_norm": 0.7169383764266968, "learning_rate": 3.207895310150533e-09, "loss": 0.08161163330078125, "step": 7102 }, { "epoch": 0.9897582386957431, "grad_norm": 1.19533371925354, "learning_rate": 3.1240411301980413e-09, "loss": 0.10181999206542969, "step": 7103 }, { "epoch": 0.9898975823869575, "grad_norm": 0.5793089270591736, "learning_rate": 3.0412971420029636e-09, "loss": 0.06766748428344727, "step": 7104 }, { "epoch": 0.9900369260781718, "grad_norm": 0.4552680552005768, "learning_rate": 2.959663363949483e-09, "loss": 0.07130813598632812, "step": 7105 }, { "epoch": 0.9901762697693862, "grad_norm": 0.7297782897949219, "learning_rate": 2.8791398141736484e-09, "loss": 0.0809774398803711, "step": 7106 }, { "epoch": 0.9903156134606006, "grad_norm": 0.6673335433006287, "learning_rate": 2.799726510567258e-09, "loss": 0.08554840087890625, "step": 7107 }, { "epoch": 0.990454957151815, "grad_norm": 0.5468272566795349, "learning_rate": 2.721423470773421e-09, "loss": 0.06205791234970093, "step": 7108 }, { "epoch": 0.9905943008430294, "grad_norm": 0.7088553309440613, "learning_rate": 2.644230712189888e-09, "loss": 0.05880022048950195, "step": 7109 }, { "epoch": 0.9907336445342437, "grad_norm": 0.5853708982467651, "learning_rate": 2.5681482519662736e-09, "loss": 0.0719003677368164, "step": 7110 }, { "epoch": 0.9908729882254581, "grad_norm": 0.6370747089385986, "learning_rate": 2.493176107006834e-09, "loss": 0.07042407989501953, "step": 7111 }, { "epoch": 0.9910123319166725, "grad_norm": 0.6047189831733704, "learning_rate": 2.4193142939687996e-09, "loss": 0.07322502136230469, "step": 7112 }, { "epoch": 0.9911516756078869, "grad_norm": 0.8177738785743713, "learning_rate": 2.3465628292623776e-09, "loss": 0.07346343994140625, "step": 7113 }, { "epoch": 0.9912910192991012, "grad_norm": 0.6029098033905029, "learning_rate": 2.2749217290513048e-09, "loss": 0.08169746398925781, "step": 7114 }, { "epoch": 0.9914303629903156, "grad_norm": 0.8010557889938354, "learning_rate": 2.2043910092522935e-09, "loss": 0.07804012298583984, "step": 7115 }, { "epoch": 0.99156970668153, "grad_norm": 0.894841194152832, "learning_rate": 2.134970685536697e-09, "loss": 0.10654067993164062, "step": 7116 }, { "epoch": 0.9917090503727444, "grad_norm": 1.3978101015090942, "learning_rate": 2.066660773326623e-09, "loss": 0.07802772521972656, "step": 7117 }, { "epoch": 0.9918483940639587, "grad_norm": 0.7436274886131287, "learning_rate": 1.999461287800486e-09, "loss": 0.08096837997436523, "step": 7118 }, { "epoch": 0.9919877377551731, "grad_norm": 0.6121771931648254, "learning_rate": 1.9333722438874548e-09, "loss": 0.0701742172241211, "step": 7119 }, { "epoch": 0.9921270814463875, "grad_norm": 0.6306000351905823, "learning_rate": 1.868393656271339e-09, "loss": 0.07392692565917969, "step": 7120 }, { "epoch": 0.9922664251376019, "grad_norm": 1.2576191425323486, "learning_rate": 1.8045255393889238e-09, "loss": 0.0899209976196289, "step": 7121 }, { "epoch": 0.9924057688288163, "grad_norm": 0.6257308721542358, "learning_rate": 1.7417679074299698e-09, "loss": 0.07010269165039062, "step": 7122 }, { "epoch": 0.9925451125200306, "grad_norm": 0.6152235269546509, "learning_rate": 1.680120774338323e-09, "loss": 0.061740875244140625, "step": 7123 }, { "epoch": 0.992684456211245, "grad_norm": 0.60127854347229, "learning_rate": 1.6195841538096947e-09, "loss": 0.07975339889526367, "step": 7124 }, { "epoch": 0.9928237999024594, "grad_norm": 0.5885370373725891, "learning_rate": 1.5601580592949916e-09, "loss": 0.0745706558227539, "step": 7125 }, { "epoch": 0.9929631435936738, "grad_norm": 0.5470208525657654, "learning_rate": 1.5018425039969864e-09, "loss": 0.06735706329345703, "step": 7126 }, { "epoch": 0.9931024872848881, "grad_norm": 0.8261162042617798, "learning_rate": 1.4446375008714264e-09, "loss": 0.07518291473388672, "step": 7127 }, { "epoch": 0.9932418309761025, "grad_norm": 0.6444846391677856, "learning_rate": 1.3885430626287e-09, "loss": 0.06799507141113281, "step": 7128 }, { "epoch": 0.9933811746673169, "grad_norm": 0.6590341329574585, "learning_rate": 1.3335592017316156e-09, "loss": 0.07398319244384766, "step": 7129 }, { "epoch": 0.9935205183585313, "grad_norm": 0.6091737747192383, "learning_rate": 1.2796859303959575e-09, "loss": 0.06851863861083984, "step": 7130 }, { "epoch": 0.9936598620497457, "grad_norm": 0.678688645362854, "learning_rate": 1.2269232605915948e-09, "loss": 0.0675973892211914, "step": 7131 }, { "epoch": 0.99379920574096, "grad_norm": 0.9106829762458801, "learning_rate": 1.1752712040408176e-09, "loss": 0.0827341079711914, "step": 7132 }, { "epoch": 0.9939385494321744, "grad_norm": 0.7155560255050659, "learning_rate": 1.124729772219446e-09, "loss": 0.07340431213378906, "step": 7133 }, { "epoch": 0.9940778931233888, "grad_norm": 0.8712220788002014, "learning_rate": 1.075298976356831e-09, "loss": 0.07041358947753906, "step": 7134 }, { "epoch": 0.9942172368146032, "grad_norm": 1.137858510017395, "learning_rate": 1.026978827435854e-09, "loss": 0.10284709930419922, "step": 7135 }, { "epoch": 0.9943565805058175, "grad_norm": 0.9616786241531372, "learning_rate": 9.797693361912607e-10, "loss": 0.07951545715332031, "step": 7136 }, { "epoch": 0.994495924197032, "grad_norm": 0.9214937686920166, "learning_rate": 9.33670513112439e-10, "loss": 0.07996797561645508, "step": 7137 }, { "epoch": 0.9946352678882464, "grad_norm": 0.8791815638542175, "learning_rate": 8.886823684417512e-10, "loss": 0.08676624298095703, "step": 7138 }, { "epoch": 0.9947746115794608, "grad_norm": 0.7912485599517822, "learning_rate": 8.448049121739798e-10, "loss": 0.07069873809814453, "step": 7139 }, { "epoch": 0.9949139552706752, "grad_norm": 0.88667231798172, "learning_rate": 8.020381540579936e-10, "loss": 0.08668899536132812, "step": 7140 }, { "epoch": 0.9950532989618895, "grad_norm": 0.8330209255218506, "learning_rate": 7.603821035950809e-10, "loss": 0.07281303405761719, "step": 7141 }, { "epoch": 0.9951926426531039, "grad_norm": 0.7703613042831421, "learning_rate": 7.198367700411712e-10, "loss": 0.07208728790283203, "step": 7142 }, { "epoch": 0.9953319863443183, "grad_norm": 0.645962119102478, "learning_rate": 6.80402162403504e-10, "loss": 0.072906494140625, "step": 7143 }, { "epoch": 0.9954713300355327, "grad_norm": 0.9146619439125061, "learning_rate": 6.420782894445144e-10, "loss": 0.08719539642333984, "step": 7144 }, { "epoch": 0.995610673726747, "grad_norm": 0.6116271018981934, "learning_rate": 6.048651596785027e-10, "loss": 0.060698509216308594, "step": 7145 }, { "epoch": 0.9957500174179614, "grad_norm": 0.5964954495429993, "learning_rate": 5.687627813727448e-10, "loss": 0.05957984924316406, "step": 7146 }, { "epoch": 0.9958893611091758, "grad_norm": 0.5259532928466797, "learning_rate": 5.337711625497122e-10, "loss": 0.062348365783691406, "step": 7147 }, { "epoch": 0.9960287048003902, "grad_norm": 0.7255178093910217, "learning_rate": 4.998903109826314e-10, "loss": 0.0715336799621582, "step": 7148 }, { "epoch": 0.9961680484916046, "grad_norm": 0.9182916879653931, "learning_rate": 4.671202341993697e-10, "loss": 0.09943580627441406, "step": 7149 }, { "epoch": 0.9963073921828189, "grad_norm": 1.1341094970703125, "learning_rate": 4.354609394802145e-10, "loss": 0.0966024398803711, "step": 7150 }, { "epoch": 0.9964467358740333, "grad_norm": 0.9706647992134094, "learning_rate": 4.0491243386009403e-10, "loss": 0.07975101470947266, "step": 7151 }, { "epoch": 0.9965860795652477, "grad_norm": 0.8084762096405029, "learning_rate": 3.7547472412580167e-10, "loss": 0.06848549842834473, "step": 7152 }, { "epoch": 0.9967254232564621, "grad_norm": 0.7320875525474548, "learning_rate": 3.471478168176612e-10, "loss": 0.07307052612304688, "step": 7153 }, { "epoch": 0.9968647669476765, "grad_norm": 0.5829106569290161, "learning_rate": 3.19931718229527e-10, "loss": 0.061606407165527344, "step": 7154 }, { "epoch": 0.9970041106388908, "grad_norm": 1.0059794187545776, "learning_rate": 2.9382643440767354e-10, "loss": 0.0911407470703125, "step": 7155 }, { "epoch": 0.9971434543301052, "grad_norm": 1.1160480976104736, "learning_rate": 2.6883197115190606e-10, "loss": 0.08770418167114258, "step": 7156 }, { "epoch": 0.9972827980213196, "grad_norm": 0.41653919219970703, "learning_rate": 2.4494833401667027e-10, "loss": 0.06308889389038086, "step": 7157 }, { "epoch": 0.997422141712534, "grad_norm": 0.8298065662384033, "learning_rate": 2.2217552830716693e-10, "loss": 0.07294273376464844, "step": 7158 }, { "epoch": 0.9975614854037483, "grad_norm": 0.9164866209030151, "learning_rate": 2.0051355908323743e-10, "loss": 0.089111328125, "step": 7159 }, { "epoch": 0.9977008290949627, "grad_norm": 0.7278069853782654, "learning_rate": 1.7996243115769863e-10, "loss": 0.07802867889404297, "step": 7160 }, { "epoch": 0.9978401727861771, "grad_norm": 0.6266469955444336, "learning_rate": 1.605221490968978e-10, "loss": 0.07165336608886719, "step": 7161 }, { "epoch": 0.9979795164773915, "grad_norm": 1.0439211130142212, "learning_rate": 1.421927172201576e-10, "loss": 0.09467887878417969, "step": 7162 }, { "epoch": 0.9981188601686058, "grad_norm": 0.8576319813728333, "learning_rate": 1.24974139599221e-10, "loss": 0.08415985107421875, "step": 7163 }, { "epoch": 0.9982582038598202, "grad_norm": 1.0234540700912476, "learning_rate": 1.0886642005991654e-10, "loss": 0.09653854370117188, "step": 7164 }, { "epoch": 0.9983975475510346, "grad_norm": 0.55422443151474, "learning_rate": 9.386956218104815e-11, "loss": 0.06008434295654297, "step": 7165 }, { "epoch": 0.998536891242249, "grad_norm": 0.7835899591445923, "learning_rate": 7.998356929439511e-11, "loss": 0.07430839538574219, "step": 7166 }, { "epoch": 0.9986762349334634, "grad_norm": 0.5809925198554993, "learning_rate": 6.72084444852672e-11, "loss": 0.06998300552368164, "step": 7167 }, { "epoch": 0.9988155786246777, "grad_norm": 0.4362945258617401, "learning_rate": 5.554419059250471e-11, "loss": 0.0674734115600586, "step": 7168 }, { "epoch": 0.9989549223158921, "grad_norm": 0.8483622074127197, "learning_rate": 4.499081020681306e-11, "loss": 0.10628318786621094, "step": 7169 }, { "epoch": 0.9990942660071065, "grad_norm": 0.8460618853569031, "learning_rate": 3.554830567298328e-11, "loss": 0.09524154663085938, "step": 7170 }, { "epoch": 0.9992336096983209, "grad_norm": 0.45442652702331543, "learning_rate": 2.7216679089892008e-11, "loss": 0.06380319595336914, "step": 7171 }, { "epoch": 0.9993729533895352, "grad_norm": 0.5296136140823364, "learning_rate": 1.9995932307170783e-11, "loss": 0.06620216369628906, "step": 7172 }, { "epoch": 0.9995122970807496, "grad_norm": 1.488916277885437, "learning_rate": 1.3886066930202113e-11, "loss": 0.10059356689453125, "step": 7173 }, { "epoch": 0.999651640771964, "grad_norm": 0.5675370097160339, "learning_rate": 8.88708431623364e-12, "loss": 0.06390666961669922, "step": 7174 }, { "epoch": 0.9997909844631784, "grad_norm": 0.8537285923957825, "learning_rate": 4.998985576043503e-12, "loss": 0.07412147521972656, "step": 7175 }, { "epoch": 0.9999303281543928, "grad_norm": 0.7304275035858154, "learning_rate": 2.2217715728301003e-12, "loss": 0.07982063293457031, "step": 7176 }, { "epoch": 1.0, "grad_norm": 1.0831202268600464, "learning_rate": 5.554429238774361e-13, "loss": 0.09231948852539062, "step": 7177 }, { "epoch": 1.0, "step": 7177, "total_flos": 5.326107668090192e+19, "train_loss": 0.03450867652548411, "train_runtime": 31657.6261, "train_samples_per_second": 58.032, "train_steps_per_second": 0.227 } ], "logging_steps": 1.0, "max_steps": 7177, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.326107668090192e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }